Move core matching factors to the config file

Bonus changes here were making the config a singleton, fixing some more tests and then re-writing the stress test because it was pissing me off.
This commit is contained in:
Marc Di Luzio 2024-08-11 22:07:43 +01:00
parent a5d7dae851
commit 9043615498
6 changed files with 254 additions and 86 deletions

View file

@ -24,16 +24,25 @@ Only usable by `OWNER` users, reloads the config and syncs commands, or closes d
## Config ## Config
Matchy is configured by a `config.json` file that takes this format: Matchy is configured by a `config.json` file that takes this format:
``` ```json
{ {
"version": 1, "version" : 1,
"token": "<<github bot token>>", "token" : "<<github bot token>>",
"match" : {
"score_factors": {
"repeat_role" : 4,
"repeat_match" : 8,
"extra_member" : 32,
"upper_threshold" : 64
}
}
} }
``` ```
Only token and version are required. See [`py/config.py`](for explanations for any of these)
## TODO ## TODO
* Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html) * Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html)
* Move more constants to the config
* Add scheduling functionality * Add scheduling functionality
* Fix logging in some sub files (doesn't seem to actually be output?) * Fix logging in some sub files (doesn't seem to actually be output?)
* Improve the weirdo * Improve the weirdo

View file

@ -1,5 +1,5 @@
"""Very simple config loading library""" """Very simple config loading library"""
from schema import Schema, And, Use from schema import Schema, And, Use, Optional
import files import files
import os import os
import logging import logging
@ -13,10 +13,18 @@ _FILE = "config.json"
_VERSION = 1 _VERSION = 1
class _Keys(): class _Key():
TOKEN = "token" TOKEN = "token"
VERSION = "version" VERSION = "version"
MATCH = "match"
SCORE_FACTORS = "score_factors"
REPEAT_ROLE = "repeat_role"
REPEAT_MATCH = "repeat_match"
EXTRA_MEMBER = "extra_member"
UPPER_THRESHOLD = "upper_threshold"
# Removed # Removed
OWNERS = "owners" OWNERS = "owners"
@ -24,10 +32,21 @@ class _Keys():
_SCHEMA = Schema( _SCHEMA = Schema(
{ {
# The current version # The current version
_Keys.VERSION: And(Use(int)), _Key.VERSION: And(Use(int)),
# Discord bot token # Discord bot token
_Keys.TOKEN: And(Use(str)), _Key.TOKEN: And(Use(str)),
# Settings for the match algorithmn, see matching.py for explanations on usage
Optional(_Key.MATCH): {
Optional(_Key.SCORE_FACTORS): {
Optional(_Key.REPEAT_ROLE): And(Use(int)),
Optional(_Key.REPEAT_MATCH): And(Use(int)),
Optional(_Key.EXTRA_MEMBER): And(Use(int)),
Optional(_Key.UPPER_THRESHOLD): And(Use(int)),
}
}
} }
) )
@ -35,7 +54,7 @@ _SCHEMA = Schema(
def _migrate_to_v1(d: dict): def _migrate_to_v1(d: dict):
# Owners moved to History in v1 # Owners moved to History in v1
# Note: owners will be required to be re-added to the state.json # Note: owners will be required to be re-added to the state.json
owners = d.pop(_Keys.OWNERS) owners = d.pop(_Key.OWNERS)
logger.warn( logger.warn(
"Migration removed owners from config, these must be re-added to the state.json") "Migration removed owners from config, these must be re-added to the state.json")
logger.warn("Owners: %s", owners) logger.warn("Owners: %s", owners)
@ -47,7 +66,29 @@ _MIGRATIONS = [
] ]
class Config(): class _ScoreFactors():
def __init__(self, data: dict):
"""Initialise and validate the config"""
self._dict = data
@property
def repeat_role(self) -> int:
return self._dict.get(_Key.REPEAT_ROLE, None)
@property
def repeat_match(self) -> int:
return self._dict.get(_Key.REPEAT_MATCH, None)
@property
def extra_member(self) -> int:
return self._dict.get(_Key.EXTRA_MEMBER, None)
@property
def upper_threshold(self) -> int:
return self._dict.get(_Key.UPPER_THRESHOLD, None)
class _Config():
def __init__(self, data: dict): def __init__(self, data: dict):
"""Initialise and validate the config""" """Initialise and validate the config"""
_SCHEMA.validate(data) _SCHEMA.validate(data)
@ -57,6 +98,10 @@ class Config():
def token(self) -> str: def token(self) -> str:
return self._dict["token"] return self._dict["token"]
@property
def score_factors(self) -> _ScoreFactors:
return _ScoreFactors(self._dict.get(_Key.SCORE_FACTORS, {}))
def _migrate(dict: dict): def _migrate(dict: dict):
"""Migrate a dict through versions""" """Migrate a dict through versions"""
@ -66,7 +111,7 @@ def _migrate(dict: dict):
dict["version"] = _VERSION dict["version"] = _VERSION
def load_from_file(file: str = _FILE) -> Config: def _load_from_file(file: str = _FILE) -> _Config:
""" """
Load the state from a file Load the state from a file
Apply any required migrations Apply any required migrations
@ -74,4 +119,9 @@ def load_from_file(file: str = _FILE) -> Config:
assert os.path.isfile(file) assert os.path.isfile(file)
loaded = files.load(file) loaded = files.load(file)
_migrate(loaded) _migrate(loaded)
return Config(loaded) return _Config(loaded)
# Core config for users to use
# Singleton as there should only be one, and it's global
Config = _load_from_file()

View file

@ -3,20 +3,24 @@ import logging
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Protocol, runtime_checkable from typing import Protocol, runtime_checkable
import state import state
import config
# Number of days to step forward from the start of history for each match attempt
_ATTEMPT_TIMESTEP_INCREMENT = timedelta(days=7)
class _ScoreFactors(int): class _ScoreFactors(int):
"""Various eligability scoring factors for group meetups""" """
REPEAT_ROLE = 2**2 Score factors used when trying to build up "best fit" groups
REPEAT_MATCH = 2**3 Matchees are sequentially placed into the lowest scoring available group
EXTRA_MEMBER = 2**5 """
# Scores higher than this are fully rejected # Added for each role the matchee has that another group member has
UPPER_THRESHOLD = 2**6 REPEAT_ROLE = config.Config.score_factors.repeat_role or 2**2
# Added for each member in the group that the matchee has already matched with
REPEAT_MATCH = config.Config.score_factors.repeat_match or 2**3
# Added for each additional member over the set "per group" value
EXTRA_MEMBER = config.Config.score_factors.extra_member or 2**5
# Upper threshold, if the user scores higher than this they will not be placed in that group
UPPER_THRESHOLD = config.Config.score_factors.upper_threshold or 2**6
logger = logging.getLogger("matching") logger = logging.getLogger("matching")
@ -76,8 +80,8 @@ def get_member_group_eligibility_score(member: Member,
return rating return rating
# Add score based on prior matchups of this user # Add score based on prior matchups of this user
rating += sum(m.id in prior_matches for m in group) * \ num_prior = sum(m.id in prior_matches for m in group)
_ScoreFactors.REPEAT_MATCH rating += num_prior * _ScoreFactors.REPEAT_MATCH
# Calculate the number of roles that match # Calculate the number of roles that match
all_role_ids = set(r.id for mr in [r.roles for r in group] for r in mr) all_role_ids = set(r.id for mr in [r.roles for r in group] for r in mr)
@ -159,7 +163,7 @@ def iterate_all_shifts(list: list):
def members_to_groups(matchees: list[Member], def members_to_groups(matchees: list[Member],
hist: state.State = state.State(), st: state.State = state.State(),
per_group: int = 3, per_group: int = 3,
allow_fallback: bool = False) -> list[list[Member]]: allow_fallback: bool = False) -> list[list[Member]]:
"""Generate the groups from the set of matchees""" """Generate the groups from the set of matchees"""
@ -170,18 +174,16 @@ def members_to_groups(matchees: list[Member],
if not matchees: if not matchees:
return [] return []
# Grab the oldest timestamp # Walk from the start of history until now trying to match up groups
history_start = hist.get_oldest_timestamp() or datetime.now() # Or if there's no
for oldest_relevant_datetime in st.get_history_timestamps() + [datetime.now()]:
# Walk from the start of time until now using the timestep increment
for oldest_relevant_datetime in datetime_range(history_start, _ATTEMPT_TIMESTEP_INCREMENT, datetime.now()):
# Attempt with each starting matchee # Attempt with each starting matchee
for shifted_matchees in iterate_all_shifts(matchees): for shifted_matchees in iterate_all_shifts(matchees):
attempts += 1 attempts += 1
groups = attempt_create_groups( groups = attempt_create_groups(
shifted_matchees, hist, oldest_relevant_datetime, per_group) shifted_matchees, st, oldest_relevant_datetime, per_group)
# Fail the match if our groups aren't big enough # Fail the match if our groups aren't big enough
if num_groups <= 1 or (groups and all(len(g) >= per_group for g in groups)): if num_groups <= 1 or (groups and all(len(g) >= per_group for g in groups)):

View file

@ -6,6 +6,8 @@ import pytest
import random import random
import matching import matching
import state import state
import copy
import itertools
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -40,12 +42,16 @@ class Member():
def roles(self) -> list[Role]: def roles(self) -> list[Role]:
return self._roles return self._roles
@roles.setter
def roles(self, roles: list[Role]):
self._roles = roles
@property @property
def id(self) -> int: def id(self) -> int:
return self._id return self._id
def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.State, per_group: int): def members_to_groups_validate(matchees: list[Member], tmp_state: state.State, per_group: int):
"""Inner function to validate the main output of the groups function""" """Inner function to validate the main output of the groups function"""
groups = matching.members_to_groups(matchees, tmp_state, per_group) groups = matching.members_to_groups(matchees, tmp_state, per_group)
@ -83,7 +89,7 @@ def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.St
def test_members_to_groups_no_history(matchees, per_group): def test_members_to_groups_no_history(matchees, per_group):
"""Test simple group matching works""" """Test simple group matching works"""
tmp_state = state.State() tmp_state = state.State()
inner_validate_members_to_groups(matchees, tmp_state, per_group) members_to_groups_validate(matchees, tmp_state, per_group)
def items_found_in_lists(list_of_lists, items): def items_found_in_lists(list_of_lists, items):
@ -205,8 +211,113 @@ def items_found_in_lists(list_of_lists, items):
[ [
# Nothing else # Nothing else
] ]
),
# Another weird one pulled out of the stress test
(
[
# print([(str(h["ts"]), [[f"Member({gm.id})" for gm in g] for g in h["groups"]]) for h in history_data])
{"ts": datetime.strptime(ts, r"%Y-%m-%d %H:%M:%S.%f"), "groups": [
[Member(m) for m in group] for group in groups]}
for (ts, groups) in
[
(
'2024-07-07 20:25:56.313993',
[
[1, 2, 3, 4, 5],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[1],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
[1, 2, 3, 4, 5, 6, 7, 8]
]
),
(
'2024-07-13 20:25:56.313993',
[
[1, 2],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[1]
]
),
(
'2024-06-29 20:25:56.313993',
[
[1, 2, 3, 4, 5],
[1, 2, 3, 4, 5, 6, 7],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20]
]
),
(
'2024-06-25 20:25:56.313993',
[
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18],
[1, 2],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
[1, 2]
]
),
(
'2024-07-04 20:25:56.313993',
[
[1, 2, 3, 4, 5],
[1, 2, 3],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[1, 2, 3, 4, 5, 6, 7]
]
),
(
'2024-07-16 20:25:56.313993',
[
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
[1, 2, 3, 4, 5, 6, 7, 8, 9],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20],
[1, 2, 3, 4, 5, 6],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18]
]
)
]
],
[
# print([(m.id, [r.id for r in m.roles]) for m in matchees]) to get the below
Member(i, [Role(r) for r in roles]) for (i, roles) in
[
(10, [1, 2, 3]),
(4, [1, 2, 3]),
(5, [1, 2]),
(13, [1, 2]),
(3, [1, 2, 3, 4]),
(14, [1]),
(6, [1, 2, 3, 4]),
(11, [1]),
(9, [1]),
(1, [1, 2, 3]),
(16, [1, 2]),
(15, [1, 2]),
(2, [1, 2, 3]),
(7, [1, 2, 3]),
(12, [1, 2]),
(8, [1, 2, 3, 4])
]
],
5,
[
# Nothing
]
) )
], ids=['simple_history', 'fallback', 'example_1', 'example_2']) ], ids=['simple_history', 'fallback', 'example_1', 'example_2', 'example_3'])
def test_members_to_groups_with_history(history_data, matchees, per_group, checks): def test_members_to_groups_with_history(history_data, matchees, per_group, checks):
"""Test more advanced group matching works""" """Test more advanced group matching works"""
tmp_state = state.State() tmp_state = state.State()
@ -215,66 +326,65 @@ def test_members_to_groups_with_history(history_data, matchees, per_group, check
for d in history_data: for d in history_data:
tmp_state.log_groups(d["groups"], d["ts"]) tmp_state.log_groups(d["groups"], d["ts"])
groups = inner_validate_members_to_groups(matchees, tmp_state, per_group) groups = members_to_groups_validate(matchees, tmp_state, per_group)
# Run the custom validate functions # Run the custom validate functions
for check in checks: for check in checks:
assert check(groups) assert check(groups)
# Allows controling of the scale of the stress test def random_chunk(li, min_chunk, max_chunk, rand):
# Try and keep it under 10s when committed, but otherwise these numbers can be fudged """
# Especially to test a wide range of weird situations "Borrowed" from https://stackoverflow.com/questions/21439011/best-way-to-split-a-list-into-randomly-sized-chunks
_STRESS_TEST_RANGE_PER_GROUP = range(2, 6) """
_STRESS_TEST_RANGE_NUM_MEMBERS = range(1, 5) it = iter(li)
_STRESS_TEST_RANGE_NUM_HISTORIES = range(8) while True:
nxt = list(itertools.islice(it, rand.randint(min_chunk, max_chunk)))
if nxt:
yield nxt
else:
break
def test_members_to_groups_stress_test(): # Generate a large set of "interesting" tests that replay a fake history onto random people
"""stress test firing significant random data at the code""" # Increase these numbers for some extreme programming
@pytest.mark.parametrize("per_group, num_members, num_history", (
(per_group, num_members, num_history)
for per_group in range(2, 4)
for num_members in range(6, 24, 3)
for num_history in range(0, 4)))
def test_stess_random_groups(per_group, num_members, num_history):
"""Run a randomised test based on the input"""
# Use a stable rand, feel free to adjust this if needed but this lets the test be stable # Seed the random based on the inputs paird with primes
rand = random.Random(123) # Ensures the test has interesting fake data, but is stable
rand = random.Random(per_group*3 + num_members*5 + num_history*7)
# Slowly ramp up the group size # Start with a list of all possible members
for per_group in _STRESS_TEST_RANGE_PER_GROUP: possible_members = [Member(i) for i in range(num_members*2)]
for member in possible_members:
# Give each member 3 random roles from 1-7
member.roles = [Role(i) for i in rand.sample(range(1, 8), 3)]
# Slowly ramp a randomized shuffled list of members with randomised roles # Grab a subset for our members
for num_members in _STRESS_TEST_RANGE_NUM_MEMBERS: rand.shuffle(possible_members)
matchees = [Member(i, [Role(i) for i in range(1, rand.randint(2, num_members*2 + 1))]) members = copy.deepcopy(possible_members[:num_members])
for i in range(1, rand.randint(2, num_members*10 + 1))]
rand.shuffle(matchees)
for num_history in _STRESS_TEST_RANGE_NUM_HISTORIES: history_data = {}
for i in range(num_history):
possible_members = copy.deepcopy(possible_members)
rand.shuffle(possible_members)
history_data[datetime.now() - timedelta(days=i)] = [
chunk for chunk in random_chunk(possible_members, per_group, per_group+2, rand)
]
# Generate some super random history replay_state = state.State()
# Start some time from now to the past
time = datetime.now() - timedelta(days=rand.randint(0, num_history*5))
history_data = []
for _ in range(0, num_history):
run = {
"ts": time
}
groups = []
for y in range(1, num_history):
groups.append([Member(i)
for i in range(1, max(num_members, rand.randint(2, num_members*10 + 1)))])
run["groups"] = groups
history_data.append(run)
# Step some time backwards in time # Replay the history
time -= timedelta(days=rand.randint(1, num_history)) for ts, groups in history_data.items():
replay_state.log_groups(groups, ts)
# No guarantees on history data order so make it a little harder for matchy members_to_groups_validate(members, replay_state, per_group)
rand.shuffle(history_data)
# Replay the history
tmp_state = state.State()
for d in history_data:
tmp_state.log_groups(d["groups"], d["ts"])
inner_validate_members_to_groups(
matchees, tmp_state, per_group)
def test_auth_scopes(): def test_auth_scopes():

View file

@ -12,9 +12,7 @@ import re
STATE_FILE = "state.json" STATE_FILE = "state.json"
CONFIG_FILE = "config.json"
Config = config.load_from_file(CONFIG_FILE)
State = state.load_from_file(STATE_FILE) State = state.load_from_file(STATE_FILE)
logger = logging.getLogger("matchy") logger = logging.getLogger("matchy")
@ -229,4 +227,4 @@ def active_members_to_groups(channel: discord.channel, min_members: int):
if __name__ == "__main__": if __name__ == "__main__":
handler = logging.StreamHandler() handler = logging.StreamHandler()
bot.run(Config.token, log_handler=handler, root_logger=True) bot.run(config.Config.token, log_handler=handler, root_logger=True)

View file

@ -130,10 +130,9 @@ class State():
dict = self._dict dict = self._dict
_SCHEMA.validate(dict) _SCHEMA.validate(dict)
def get_oldest_timestamp(self) -> datetime: def get_history_timestamps(self) -> list[datetime]:
"""Grab the oldest timestamp in history""" """Grab all timestamps in the history"""
times = (ts_to_datetime(dt) for dt in self._history.keys()) return sorted([ts_to_datetime(dt) for dt in self._history.keys()])
return next(times, None)
def get_user_matches(self, id: int) -> list[int]: def get_user_matches(self, id: int) -> list[int]:
return self._users.get(str(id), {}).get(_Key.MATCHES, {}) return self._users.get(str(id), {}).get(_Key.MATCHES, {})