Move core matching factors to the config file

Bonus changes here were making the config a singleton, fixing some more tests and then re-writing the stress test because it was pissing me off.
2024-08-11 22:07:43 +01:00 · 2024-08-11 22:07:43 +01:00 · 9043615498
commit 9043615498
parent a5d7dae851
6 changed files with 254 additions and 86 deletions
--- a/README.md
+++ b/README.md
@ -24,16 +24,25 @@ Only usable by `OWNER` users, reloads the config and syncs commands, or closes d
 ## Config
 Matchy is configured by a `config.json` file that takes this format:
-```
+```json
 {
-    "version": 1,
+    "version" : 1,
-    "token": "<<github bot token>>",
+    "token" : "<<github bot token>>",
    "match" : {
        "score_factors": {
            "repeat_role" : 4,
            "repeat_match" : 8,
            "extra_member" : 32,
            "upper_threshold" : 64
        }
    }
 }
 ```
 Only token and version are required. See [`py/config.py`](for explanations for any of these)
 ## TODO
 * Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html)
 * Move more constants to the config
 * Add scheduling functionality
 * Fix logging in some sub files (doesn't seem to actually be output?)
 * Improve the weirdo
--- a/py/config.py
+++ b/py/config.py
@ -1,5 +1,5 @@
 """Very simple config loading library"""
-from schema import Schema, And, Use
+from schema import Schema, And, Use, Optional
 import files
 import os
 import logging
@ -13,10 +13,18 @@ _FILE = "config.json"
 _VERSION = 1
-class _Keys():
+class _Key():
    TOKEN = "token"
    VERSION = "version"
    MATCH = "match"
    SCORE_FACTORS = "score_factors"
    REPEAT_ROLE = "repeat_role"
    REPEAT_MATCH = "repeat_match"
    EXTRA_MEMBER = "extra_member"
    UPPER_THRESHOLD = "upper_threshold"
    # Removed
    OWNERS = "owners"
@ -24,10 +32,21 @@ class _Keys():
 _SCHEMA = Schema(
    {
        # The current version
-        _Keys.VERSION: And(Use(int)),
+        _Key.VERSION: And(Use(int)),
        # Discord bot token
-        _Keys.TOKEN: And(Use(str)),
+        _Key.TOKEN: And(Use(str)),
        # Settings for the match algorithmn, see matching.py for explanations on usage
        Optional(_Key.MATCH): {
            Optional(_Key.SCORE_FACTORS): {
                Optional(_Key.REPEAT_ROLE): And(Use(int)),
                Optional(_Key.REPEAT_MATCH): And(Use(int)),
                Optional(_Key.EXTRA_MEMBER): And(Use(int)),
                Optional(_Key.UPPER_THRESHOLD): And(Use(int)),
            }
        }
    }
 )
@ -35,7 +54,7 @@ _SCHEMA = Schema(
 def _migrate_to_v1(d: dict):
    # Owners moved to History in v1
    # Note: owners will be required to be re-added to the state.json
-    owners = d.pop(_Keys.OWNERS)
+    owners = d.pop(_Key.OWNERS)
    logger.warn(
        "Migration removed owners from config, these must be re-added to the state.json")
    logger.warn("Owners: %s", owners)
@ -47,7 +66,29 @@ _MIGRATIONS = [
 ]
-class Config():
+class _ScoreFactors():
    def __init__(self, data: dict):
        """Initialise and validate the config"""
        self._dict = data
    @property
    def repeat_role(self) -> int:
        return self._dict.get(_Key.REPEAT_ROLE, None)
    @property
    def repeat_match(self) -> int:
        return self._dict.get(_Key.REPEAT_MATCH, None)
    @property
    def extra_member(self) -> int:
        return self._dict.get(_Key.EXTRA_MEMBER, None)
    @property
    def upper_threshold(self) -> int:
        return self._dict.get(_Key.UPPER_THRESHOLD, None)
 class _Config():
    def __init__(self, data: dict):
        """Initialise and validate the config"""
        _SCHEMA.validate(data)
@ -57,6 +98,10 @@ class Config():
    def token(self) -> str:
        return self._dict["token"]
    @property
    def score_factors(self) -> _ScoreFactors:
        return _ScoreFactors(self._dict.get(_Key.SCORE_FACTORS, {}))
 def _migrate(dict: dict):
    """Migrate a dict through versions"""
@ -66,7 +111,7 @@ def _migrate(dict: dict):
        dict["version"] = _VERSION
-def load_from_file(file: str = _FILE) -> Config:
+def _load_from_file(file: str = _FILE) -> _Config:
    """
    Load the state from a file
    Apply any required migrations
@ -74,4 +119,9 @@ def load_from_file(file: str = _FILE) -> Config:
    assert os.path.isfile(file)
    loaded = files.load(file)
    _migrate(loaded)
-    return Config(loaded)
+    return _Config(loaded)
 # Core config for users to use
 # Singleton as there should only be one, and it's global
 Config = _load_from_file()
--- a/py/matching.py
+++ b/py/matching.py
@ -3,20 +3,24 @@ import logging
 from datetime import datetime, timedelta
 from typing import Protocol, runtime_checkable
 import state
-
+import config
 # Number of days to step forward from the start of history for each match attempt
 _ATTEMPT_TIMESTEP_INCREMENT = timedelta(days=7)
 class _ScoreFactors(int):
-    """Various eligability scoring factors for group meetups"""
+    """
-    REPEAT_ROLE = 2**2
+    Score factors used when trying to build up "best fit" groups
-    REPEAT_MATCH = 2**3
+    Matchees are sequentially placed into the lowest scoring available group
-    EXTRA_MEMBER = 2**5
+    """
-    # Scores higher than this are fully rejected
+    # Added for each role the matchee has that another group member has
-    UPPER_THRESHOLD = 2**6
+    REPEAT_ROLE = config.Config.score_factors.repeat_role or 2**2
    # Added for each member in the group that the matchee has already matched with
    REPEAT_MATCH = config.Config.score_factors.repeat_match or 2**3
    # Added for each additional member over the set "per group" value
    EXTRA_MEMBER = config.Config.score_factors.extra_member or 2**5
    # Upper threshold, if the user scores higher than this they will not be placed in that group
    UPPER_THRESHOLD = config.Config.score_factors.upper_threshold or 2**6
 logger = logging.getLogger("matching")
@ -76,8 +80,8 @@ def get_member_group_eligibility_score(member: Member,
        return rating
    # Add score based on prior matchups of this user
-    rating += sum(m.id in prior_matches for m in group) * \
+    num_prior = sum(m.id in prior_matches for m in group)
-        _ScoreFactors.REPEAT_MATCH
+    rating += num_prior * _ScoreFactors.REPEAT_MATCH
    # Calculate the number of roles that match
    all_role_ids = set(r.id for mr in [r.roles for r in group] for r in mr)
@ -159,7 +163,7 @@ def iterate_all_shifts(list: list):
 def members_to_groups(matchees: list[Member],
-                      hist: state.State = state.State(),
+                      st: state.State = state.State(),
                      per_group: int = 3,
                      allow_fallback: bool = False) -> list[list[Member]]:
    """Generate the groups from the set of matchees"""
@ -170,18 +174,16 @@ def members_to_groups(matchees: list[Member],
    if not matchees:
        return []
-    # Grab the oldest timestamp
+    # Walk from the start of history until now trying to match up groups
-    history_start = hist.get_oldest_timestamp() or datetime.now()
+    # Or if there's no 
-
+    for oldest_relevant_datetime in st.get_history_timestamps() + [datetime.now()]:
    # Walk from the start of time until now using the timestep increment
    for oldest_relevant_datetime in datetime_range(history_start, _ATTEMPT_TIMESTEP_INCREMENT, datetime.now()):
        # Attempt with each starting matchee
        for shifted_matchees in iterate_all_shifts(matchees):
            attempts += 1
            groups = attempt_create_groups(
-                shifted_matchees, hist, oldest_relevant_datetime, per_group)
+                shifted_matchees, st, oldest_relevant_datetime, per_group)
            # Fail the match if our groups aren't big enough
            if num_groups <= 1 or (groups and all(len(g) >= per_group for g in groups)):
--- a/py/matching_test.py
+++ b/py/matching_test.py
@ -6,6 +6,8 @@ import pytest
 import random
 import matching
 import state
 import copy
 import itertools
 from datetime import datetime, timedelta
@ -40,12 +42,16 @@ class Member():
    def roles(self) -> list[Role]:
        return self._roles
    @roles.setter
    def roles(self, roles: list[Role]):
        self._roles = roles
    @property
    def id(self) -> int:
        return self._id
-def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.State, per_group: int):
+def members_to_groups_validate(matchees: list[Member], tmp_state: state.State, per_group: int):
    """Inner function to validate the main output of the groups function"""
    groups = matching.members_to_groups(matchees, tmp_state, per_group)
@ -83,7 +89,7 @@ def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.St
 def test_members_to_groups_no_history(matchees, per_group):
    """Test simple group matching works"""
    tmp_state = state.State()
-    inner_validate_members_to_groups(matchees, tmp_state, per_group)
+    members_to_groups_validate(matchees, tmp_state, per_group)
 def items_found_in_lists(list_of_lists, items):
@ -205,8 +211,113 @@ def items_found_in_lists(list_of_lists, items):
        [
            # Nothing else
        ]
    ),
    # Another weird one pulled out of the stress test
    (
        [
            # print([(str(h["ts"]), [[f"Member({gm.id})" for gm in g] for g in h["groups"]]) for h in history_data])
            {"ts": datetime.strptime(ts, r"%Y-%m-%d %H:%M:%S.%f"), "groups": [
                [Member(m) for m in group] for group in groups]}
            for (ts, groups) in
            [
                (
                    '2024-07-07 20:25:56.313993',
                    [
                        [1, 2, 3, 4, 5],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                        [1],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
                        [1, 2, 3, 4, 5, 6, 7, 8]
                    ]
                ),
                (
                    '2024-07-13 20:25:56.313993',
                    [
                        [1, 2],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                            12, 13, 14, 15, 16, 17, 18],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                        [1]
                    ]
                ),
                (
                    '2024-06-29 20:25:56.313993',
                    [
                        [1, 2, 3, 4, 5],
                        [1, 2, 3, 4, 5, 6, 7],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                            13, 14, 15, 16, 17, 18, 19, 20],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                            11, 12, 13, 14, 15, 16, 17],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                            12, 13, 14, 15, 16, 17, 18, 19, 20]
                    ]
                ),
                (
                    '2024-06-25 20:25:56.313993',
                    [
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                            12, 13, 14, 15, 16, 17, 18],
                        [1, 2],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                            12, 13, 14, 15, 16, 17, 18, 19],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
                        [1, 2]
                    ]
                ),
                (
                    '2024-07-04 20:25:56.313993',
                    [
                        [1, 2, 3, 4, 5],
                        [1, 2, 3],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                        [1, 2, 3, 4, 5, 6, 7]
                    ]
                ),
                (
                    '2024-07-16 20:25:56.313993',
                    [
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                            13, 14, 15, 16, 17, 18, 19, 20],
                        [1, 2, 3, 4, 5, 6],
                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                            11, 12, 13, 14, 15, 16, 17, 18]
                    ]
                )
            ]
        ],
        [
            # print([(m.id, [r.id for r in m.roles]) for m in matchees]) to get the below
            Member(i, [Role(r) for r in roles]) for (i, roles) in
            [
                (10, [1, 2, 3]),
                (4, [1, 2, 3]),
                (5, [1, 2]),
                (13, [1, 2]),
                (3, [1, 2, 3, 4]),
                (14, [1]),
                (6, [1, 2, 3, 4]),
                (11, [1]),
                (9, [1]),
                (1, [1, 2, 3]),
                (16, [1, 2]),
                (15, [1, 2]),
                (2, [1, 2, 3]),
                (7, [1, 2, 3]),
                (12, [1, 2]),
                (8, [1, 2, 3, 4])
            ]
        ],
        5,
        [
            # Nothing
        ]
    )
-], ids=['simple_history', 'fallback', 'example_1', 'example_2'])
+], ids=['simple_history', 'fallback', 'example_1', 'example_2', 'example_3'])
 def test_members_to_groups_with_history(history_data, matchees, per_group, checks):
    """Test more advanced group matching works"""
    tmp_state = state.State()
@ -215,66 +326,65 @@ def test_members_to_groups_with_history(history_data, matchees, per_group, check
    for d in history_data:
        tmp_state.log_groups(d["groups"], d["ts"])
-    groups = inner_validate_members_to_groups(matchees, tmp_state, per_group)
+    groups = members_to_groups_validate(matchees, tmp_state, per_group)
    # Run the custom validate functions
    for check in checks:
        assert check(groups)
-# Allows controling of the scale of the stress test
+def random_chunk(li, min_chunk, max_chunk, rand):
-# Try and keep it under 10s when committed, but otherwise these numbers can be fudged
+    """
-# Especially to test a wide range of weird situations
+    "Borrowed" from https://stackoverflow.com/questions/21439011/best-way-to-split-a-list-into-randomly-sized-chunks
-_STRESS_TEST_RANGE_PER_GROUP = range(2, 6)
+    """
-_STRESS_TEST_RANGE_NUM_MEMBERS = range(1, 5)
+    it = iter(li)
-_STRESS_TEST_RANGE_NUM_HISTORIES = range(8)
+    while True:
        nxt = list(itertools.islice(it, rand.randint(min_chunk, max_chunk)))
        if nxt:
            yield nxt
        else:
            break
-def test_members_to_groups_stress_test():
+# Generate a large set of "interesting" tests that replay a fake history onto random people
-    """stress test firing significant random data at the code"""
+# Increase these numbers for some extreme programming
@pytest.mark.parametrize("per_group, num_members, num_history", (
    (per_group, num_members, num_history)
    for per_group in range(2, 4)
    for num_members in range(6, 24, 3)
    for num_history in range(0, 4)))
 def test_stess_random_groups(per_group, num_members, num_history):
    """Run a randomised test based on the input"""
-    # Use a stable rand, feel free to adjust this if needed but this lets the test be stable
+    # Seed the random based on the inputs paird with primes
-    rand = random.Random(123)
+    # Ensures the test has interesting fake data, but is stable
    rand = random.Random(per_group*3 + num_members*5 + num_history*7)
-    # Slowly ramp up the group size
+    # Start with a list of all possible members
-    for per_group in _STRESS_TEST_RANGE_PER_GROUP:
+    possible_members = [Member(i) for i in range(num_members*2)]
    for member in possible_members:
        # Give each member 3 random roles from 1-7
        member.roles = [Role(i) for i in rand.sample(range(1, 8), 3)]
-        # Slowly ramp a randomized shuffled list of members with randomised roles
+    # Grab a subset for our members
-        for num_members in _STRESS_TEST_RANGE_NUM_MEMBERS:
+    rand.shuffle(possible_members)
-            matchees = [Member(i, [Role(i) for i in range(1, rand.randint(2, num_members*2 + 1))])
+    members = copy.deepcopy(possible_members[:num_members])
                        for i in range(1, rand.randint(2, num_members*10 + 1))]
            rand.shuffle(matchees)
-            for num_history in _STRESS_TEST_RANGE_NUM_HISTORIES:
+    history_data = {}
    for i in range(num_history):
        possible_members = copy.deepcopy(possible_members)
        rand.shuffle(possible_members)
        history_data[datetime.now() - timedelta(days=i)] = [
            chunk for chunk in random_chunk(possible_members, per_group, per_group+2, rand)
        ]
-                # Generate some super random history
+    replay_state = state.State()
                # Start some time from now to the past
                time = datetime.now() - timedelta(days=rand.randint(0, num_history*5))
                history_data = []
                for _ in range(0, num_history):
                    run = {
                        "ts": time
                    }
                    groups = []
                    for y in range(1, num_history):
                        groups.append([Member(i)
                                       for i in range(1, max(num_members, rand.randint(2, num_members*10 + 1)))])
                    run["groups"] = groups
                    history_data.append(run)
-                    # Step some time backwards in time
+    # Replay the history
-                    time -= timedelta(days=rand.randint(1, num_history))
+    for ts, groups in history_data.items():
        replay_state.log_groups(groups, ts)
-                # No guarantees on history data order so make it a little harder for matchy
+    members_to_groups_validate(members, replay_state, per_group)
                rand.shuffle(history_data)
                # Replay the history
                tmp_state = state.State()
                for d in history_data:
                    tmp_state.log_groups(d["groups"], d["ts"])
                inner_validate_members_to_groups(
                    matchees, tmp_state, per_group)
 def test_auth_scopes():
--- a/py/matchy.py
+++ b/py/matchy.py
@ -12,9 +12,7 @@ import re
 STATE_FILE = "state.json"
 CONFIG_FILE = "config.json"
 Config = config.load_from_file(CONFIG_FILE)
 State = state.load_from_file(STATE_FILE)
 logger = logging.getLogger("matchy")
@ -229,4 +227,4 @@ def active_members_to_groups(channel: discord.channel, min_members: int):
 if __name__ == "__main__":
    handler = logging.StreamHandler()
-    bot.run(Config.token, log_handler=handler, root_logger=True)
+    bot.run(config.Config.token, log_handler=handler, root_logger=True)
--- a/py/state.py
+++ b/py/state.py
@ -130,10 +130,9 @@ class State():
            dict = self._dict
        _SCHEMA.validate(dict)
-    def get_oldest_timestamp(self) -> datetime:
+    def get_history_timestamps(self) -> list[datetime]:
-        """Grab the oldest timestamp in history"""
+        """Grab all timestamps in the history"""
-        times = (ts_to_datetime(dt) for dt in self._history.keys())
+        return sorted([ts_to_datetime(dt) for dt in self._history.keys()])
        return next(times, None)
    def get_user_matches(self, id: int) -> list[int]:
        return self._users.get(str(id), {}).get(_Key.MATCHES, {})