From 9043615498ba5e14b66743235f4e237d5b5be31a Mon Sep 17 00:00:00 2001 From: Marc Di Luzio Date: Sun, 11 Aug 2024 22:07:43 +0100 Subject: [PATCH] Move core matching factors to the config file Bonus changes here were making the config a singleton, fixing some more tests and then re-writing the stress test because it was pissing me off. --- README.md | 17 +++- py/config.py | 66 ++++++++++++-- py/matching.py | 40 +++++---- py/matching_test.py | 206 +++++++++++++++++++++++++++++++++----------- py/matchy.py | 4 +- py/state.py | 7 +- 6 files changed, 254 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index f3f6dde..2d1e418 100644 --- a/README.md +++ b/README.md @@ -24,16 +24,25 @@ Only usable by `OWNER` users, reloads the config and syncs commands, or closes d ## Config Matchy is configured by a `config.json` file that takes this format: -``` +```json { - "version": 1, - "token": "<>", + "version" : 1, + "token" : "<>", + + "match" : { + "score_factors": { + "repeat_role" : 4, + "repeat_match" : 8, + "extra_member" : 32, + "upper_threshold" : 64 + } + } } ``` +Only token and version are required. See [`py/config.py`](for explanations for any of these) ## TODO * Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html) -* Move more constants to the config * Add scheduling functionality * Fix logging in some sub files (doesn't seem to actually be output?) * Improve the weirdo \ No newline at end of file diff --git a/py/config.py b/py/config.py index 2719d73..c6a4ca9 100644 --- a/py/config.py +++ b/py/config.py @@ -1,5 +1,5 @@ """Very simple config loading library""" -from schema import Schema, And, Use +from schema import Schema, And, Use, Optional import files import os import logging @@ -13,10 +13,18 @@ _FILE = "config.json" _VERSION = 1 -class _Keys(): +class _Key(): TOKEN = "token" VERSION = "version" + MATCH = "match" + + SCORE_FACTORS = "score_factors" + REPEAT_ROLE = "repeat_role" + REPEAT_MATCH = "repeat_match" + EXTRA_MEMBER = "extra_member" + UPPER_THRESHOLD = "upper_threshold" + # Removed OWNERS = "owners" @@ -24,10 +32,21 @@ class _Keys(): _SCHEMA = Schema( { # The current version - _Keys.VERSION: And(Use(int)), + _Key.VERSION: And(Use(int)), # Discord bot token - _Keys.TOKEN: And(Use(str)), + _Key.TOKEN: And(Use(str)), + + # Settings for the match algorithmn, see matching.py for explanations on usage + Optional(_Key.MATCH): { + Optional(_Key.SCORE_FACTORS): { + + Optional(_Key.REPEAT_ROLE): And(Use(int)), + Optional(_Key.REPEAT_MATCH): And(Use(int)), + Optional(_Key.EXTRA_MEMBER): And(Use(int)), + Optional(_Key.UPPER_THRESHOLD): And(Use(int)), + } + } } ) @@ -35,7 +54,7 @@ _SCHEMA = Schema( def _migrate_to_v1(d: dict): # Owners moved to History in v1 # Note: owners will be required to be re-added to the state.json - owners = d.pop(_Keys.OWNERS) + owners = d.pop(_Key.OWNERS) logger.warn( "Migration removed owners from config, these must be re-added to the state.json") logger.warn("Owners: %s", owners) @@ -47,7 +66,29 @@ _MIGRATIONS = [ ] -class Config(): +class _ScoreFactors(): + def __init__(self, data: dict): + """Initialise and validate the config""" + self._dict = data + + @property + def repeat_role(self) -> int: + return self._dict.get(_Key.REPEAT_ROLE, None) + + @property + def repeat_match(self) -> int: + return self._dict.get(_Key.REPEAT_MATCH, None) + + @property + def extra_member(self) -> int: + return self._dict.get(_Key.EXTRA_MEMBER, None) + + @property + def upper_threshold(self) -> int: + return self._dict.get(_Key.UPPER_THRESHOLD, None) + + +class _Config(): def __init__(self, data: dict): """Initialise and validate the config""" _SCHEMA.validate(data) @@ -57,6 +98,10 @@ class Config(): def token(self) -> str: return self._dict["token"] + @property + def score_factors(self) -> _ScoreFactors: + return _ScoreFactors(self._dict.get(_Key.SCORE_FACTORS, {})) + def _migrate(dict: dict): """Migrate a dict through versions""" @@ -66,7 +111,7 @@ def _migrate(dict: dict): dict["version"] = _VERSION -def load_from_file(file: str = _FILE) -> Config: +def _load_from_file(file: str = _FILE) -> _Config: """ Load the state from a file Apply any required migrations @@ -74,4 +119,9 @@ def load_from_file(file: str = _FILE) -> Config: assert os.path.isfile(file) loaded = files.load(file) _migrate(loaded) - return Config(loaded) + return _Config(loaded) + + +# Core config for users to use +# Singleton as there should only be one, and it's global +Config = _load_from_file() diff --git a/py/matching.py b/py/matching.py index 4f9b31d..cef715e 100644 --- a/py/matching.py +++ b/py/matching.py @@ -3,20 +3,24 @@ import logging from datetime import datetime, timedelta from typing import Protocol, runtime_checkable import state - - -# Number of days to step forward from the start of history for each match attempt -_ATTEMPT_TIMESTEP_INCREMENT = timedelta(days=7) +import config class _ScoreFactors(int): - """Various eligability scoring factors for group meetups""" - REPEAT_ROLE = 2**2 - REPEAT_MATCH = 2**3 - EXTRA_MEMBER = 2**5 + """ + Score factors used when trying to build up "best fit" groups + Matchees are sequentially placed into the lowest scoring available group + """ - # Scores higher than this are fully rejected - UPPER_THRESHOLD = 2**6 + # Added for each role the matchee has that another group member has + REPEAT_ROLE = config.Config.score_factors.repeat_role or 2**2 + # Added for each member in the group that the matchee has already matched with + REPEAT_MATCH = config.Config.score_factors.repeat_match or 2**3 + # Added for each additional member over the set "per group" value + EXTRA_MEMBER = config.Config.score_factors.extra_member or 2**5 + + # Upper threshold, if the user scores higher than this they will not be placed in that group + UPPER_THRESHOLD = config.Config.score_factors.upper_threshold or 2**6 logger = logging.getLogger("matching") @@ -76,8 +80,8 @@ def get_member_group_eligibility_score(member: Member, return rating # Add score based on prior matchups of this user - rating += sum(m.id in prior_matches for m in group) * \ - _ScoreFactors.REPEAT_MATCH + num_prior = sum(m.id in prior_matches for m in group) + rating += num_prior * _ScoreFactors.REPEAT_MATCH # Calculate the number of roles that match all_role_ids = set(r.id for mr in [r.roles for r in group] for r in mr) @@ -159,7 +163,7 @@ def iterate_all_shifts(list: list): def members_to_groups(matchees: list[Member], - hist: state.State = state.State(), + st: state.State = state.State(), per_group: int = 3, allow_fallback: bool = False) -> list[list[Member]]: """Generate the groups from the set of matchees""" @@ -170,18 +174,16 @@ def members_to_groups(matchees: list[Member], if not matchees: return [] - # Grab the oldest timestamp - history_start = hist.get_oldest_timestamp() or datetime.now() - - # Walk from the start of time until now using the timestep increment - for oldest_relevant_datetime in datetime_range(history_start, _ATTEMPT_TIMESTEP_INCREMENT, datetime.now()): + # Walk from the start of history until now trying to match up groups + # Or if there's no + for oldest_relevant_datetime in st.get_history_timestamps() + [datetime.now()]: # Attempt with each starting matchee for shifted_matchees in iterate_all_shifts(matchees): attempts += 1 groups = attempt_create_groups( - shifted_matchees, hist, oldest_relevant_datetime, per_group) + shifted_matchees, st, oldest_relevant_datetime, per_group) # Fail the match if our groups aren't big enough if num_groups <= 1 or (groups and all(len(g) >= per_group for g in groups)): diff --git a/py/matching_test.py b/py/matching_test.py index 742d3a6..fa06560 100644 --- a/py/matching_test.py +++ b/py/matching_test.py @@ -6,6 +6,8 @@ import pytest import random import matching import state +import copy +import itertools from datetime import datetime, timedelta @@ -40,12 +42,16 @@ class Member(): def roles(self) -> list[Role]: return self._roles + @roles.setter + def roles(self, roles: list[Role]): + self._roles = roles + @property def id(self) -> int: return self._id -def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.State, per_group: int): +def members_to_groups_validate(matchees: list[Member], tmp_state: state.State, per_group: int): """Inner function to validate the main output of the groups function""" groups = matching.members_to_groups(matchees, tmp_state, per_group) @@ -83,7 +89,7 @@ def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.St def test_members_to_groups_no_history(matchees, per_group): """Test simple group matching works""" tmp_state = state.State() - inner_validate_members_to_groups(matchees, tmp_state, per_group) + members_to_groups_validate(matchees, tmp_state, per_group) def items_found_in_lists(list_of_lists, items): @@ -205,8 +211,113 @@ def items_found_in_lists(list_of_lists, items): [ # Nothing else ] + ), + # Another weird one pulled out of the stress test + ( + [ + # print([(str(h["ts"]), [[f"Member({gm.id})" for gm in g] for g in h["groups"]]) for h in history_data]) + {"ts": datetime.strptime(ts, r"%Y-%m-%d %H:%M:%S.%f"), "groups": [ + [Member(m) for m in group] for group in groups]} + for (ts, groups) in + [ + ( + '2024-07-07 20:25:56.313993', + [ + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + [1], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + [1, 2, 3, 4, 5, 6, 7, 8] + ] + ), + ( + '2024-07-13 20:25:56.313993', + [ + [1, 2], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1] + ] + ), + ( + '2024-06-29 20:25:56.313993', + [ + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5, 6, 7], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20] + ] + ), + ( + '2024-06-25 20:25:56.313993', + [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18], + [1, 2], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], + [1, 2] + ] + ), + ( + '2024-07-04 20:25:56.313993', + [ + [1, 2, 3, 4, 5], + [1, 2, 3], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + [1, 2, 3, 4, 5, 6, 7] + ] + ), + ( + '2024-07-16 20:25:56.313993', + [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + [1, 2, 3, 4, 5, 6, 7, 8, 9], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20], + [1, 2, 3, 4, 5, 6], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18] + ] + ) + ] + ], + [ + # print([(m.id, [r.id for r in m.roles]) for m in matchees]) to get the below + Member(i, [Role(r) for r in roles]) for (i, roles) in + [ + (10, [1, 2, 3]), + (4, [1, 2, 3]), + (5, [1, 2]), + (13, [1, 2]), + (3, [1, 2, 3, 4]), + (14, [1]), + (6, [1, 2, 3, 4]), + (11, [1]), + (9, [1]), + (1, [1, 2, 3]), + (16, [1, 2]), + (15, [1, 2]), + (2, [1, 2, 3]), + (7, [1, 2, 3]), + (12, [1, 2]), + (8, [1, 2, 3, 4]) + ] + ], + 5, + [ + # Nothing + ] ) -], ids=['simple_history', 'fallback', 'example_1', 'example_2']) +], ids=['simple_history', 'fallback', 'example_1', 'example_2', 'example_3']) def test_members_to_groups_with_history(history_data, matchees, per_group, checks): """Test more advanced group matching works""" tmp_state = state.State() @@ -215,66 +326,65 @@ def test_members_to_groups_with_history(history_data, matchees, per_group, check for d in history_data: tmp_state.log_groups(d["groups"], d["ts"]) - groups = inner_validate_members_to_groups(matchees, tmp_state, per_group) + groups = members_to_groups_validate(matchees, tmp_state, per_group) # Run the custom validate functions for check in checks: assert check(groups) -# Allows controling of the scale of the stress test -# Try and keep it under 10s when committed, but otherwise these numbers can be fudged -# Especially to test a wide range of weird situations -_STRESS_TEST_RANGE_PER_GROUP = range(2, 6) -_STRESS_TEST_RANGE_NUM_MEMBERS = range(1, 5) -_STRESS_TEST_RANGE_NUM_HISTORIES = range(8) +def random_chunk(li, min_chunk, max_chunk, rand): + """ + "Borrowed" from https://stackoverflow.com/questions/21439011/best-way-to-split-a-list-into-randomly-sized-chunks + """ + it = iter(li) + while True: + nxt = list(itertools.islice(it, rand.randint(min_chunk, max_chunk))) + if nxt: + yield nxt + else: + break -def test_members_to_groups_stress_test(): - """stress test firing significant random data at the code""" +# Generate a large set of "interesting" tests that replay a fake history onto random people +# Increase these numbers for some extreme programming +@pytest.mark.parametrize("per_group, num_members, num_history", ( + (per_group, num_members, num_history) + for per_group in range(2, 4) + for num_members in range(6, 24, 3) + for num_history in range(0, 4))) +def test_stess_random_groups(per_group, num_members, num_history): + """Run a randomised test based on the input""" - # Use a stable rand, feel free to adjust this if needed but this lets the test be stable - rand = random.Random(123) + # Seed the random based on the inputs paird with primes + # Ensures the test has interesting fake data, but is stable + rand = random.Random(per_group*3 + num_members*5 + num_history*7) - # Slowly ramp up the group size - for per_group in _STRESS_TEST_RANGE_PER_GROUP: + # Start with a list of all possible members + possible_members = [Member(i) for i in range(num_members*2)] + for member in possible_members: + # Give each member 3 random roles from 1-7 + member.roles = [Role(i) for i in rand.sample(range(1, 8), 3)] - # Slowly ramp a randomized shuffled list of members with randomised roles - for num_members in _STRESS_TEST_RANGE_NUM_MEMBERS: - matchees = [Member(i, [Role(i) for i in range(1, rand.randint(2, num_members*2 + 1))]) - for i in range(1, rand.randint(2, num_members*10 + 1))] - rand.shuffle(matchees) + # Grab a subset for our members + rand.shuffle(possible_members) + members = copy.deepcopy(possible_members[:num_members]) - for num_history in _STRESS_TEST_RANGE_NUM_HISTORIES: + history_data = {} + for i in range(num_history): + possible_members = copy.deepcopy(possible_members) + rand.shuffle(possible_members) + history_data[datetime.now() - timedelta(days=i)] = [ + chunk for chunk in random_chunk(possible_members, per_group, per_group+2, rand) + ] - # Generate some super random history - # Start some time from now to the past - time = datetime.now() - timedelta(days=rand.randint(0, num_history*5)) - history_data = [] - for _ in range(0, num_history): - run = { - "ts": time - } - groups = [] - for y in range(1, num_history): - groups.append([Member(i) - for i in range(1, max(num_members, rand.randint(2, num_members*10 + 1)))]) - run["groups"] = groups - history_data.append(run) + replay_state = state.State() - # Step some time backwards in time - time -= timedelta(days=rand.randint(1, num_history)) + # Replay the history + for ts, groups in history_data.items(): + replay_state.log_groups(groups, ts) - # No guarantees on history data order so make it a little harder for matchy - rand.shuffle(history_data) - - # Replay the history - tmp_state = state.State() - for d in history_data: - tmp_state.log_groups(d["groups"], d["ts"]) - - inner_validate_members_to_groups( - matchees, tmp_state, per_group) + members_to_groups_validate(members, replay_state, per_group) def test_auth_scopes(): diff --git a/py/matchy.py b/py/matchy.py index f43d1f9..fe5fd9c 100755 --- a/py/matchy.py +++ b/py/matchy.py @@ -12,9 +12,7 @@ import re STATE_FILE = "state.json" -CONFIG_FILE = "config.json" -Config = config.load_from_file(CONFIG_FILE) State = state.load_from_file(STATE_FILE) logger = logging.getLogger("matchy") @@ -229,4 +227,4 @@ def active_members_to_groups(channel: discord.channel, min_members: int): if __name__ == "__main__": handler = logging.StreamHandler() - bot.run(Config.token, log_handler=handler, root_logger=True) + bot.run(config.Config.token, log_handler=handler, root_logger=True) diff --git a/py/state.py b/py/state.py index 859b999..f75be41 100644 --- a/py/state.py +++ b/py/state.py @@ -130,10 +130,9 @@ class State(): dict = self._dict _SCHEMA.validate(dict) - def get_oldest_timestamp(self) -> datetime: - """Grab the oldest timestamp in history""" - times = (ts_to_datetime(dt) for dt in self._history.keys()) - return next(times, None) + def get_history_timestamps(self) -> list[datetime]: + """Grab all timestamps in the history""" + return sorted([ts_to_datetime(dt) for dt in self._history.keys()]) def get_user_matches(self, id: int) -> list[int]: return self._users.get(str(id), {}).get(_Key.MATCHES, {})