From 9043615498ba5e14b66743235f4e237d5b5be31a Mon Sep 17 00:00:00 2001
From: Marc Di Luzio <marc.diluzio@gmail.com>
Date: Sun, 11 Aug 2024 22:07:43 +0100
Subject: [PATCH] Move core matching factors to the config file

Bonus changes here were making the config a singleton, fixing some more tests and then re-writing the stress test because it was pissing me off.
---
 README.md           |  17 +++-
 py/config.py        |  66 ++++++++++++--
 py/matching.py      |  40 +++++----
 py/matching_test.py | 206 +++++++++++++++++++++++++++++++++-----------
 py/matchy.py        |   4 +-
 py/state.py         |   7 +-
 6 files changed, 254 insertions(+), 86 deletions(-)
diff --git a/README.md b/README.md
index f3f6dde..2d1e418 100644
--- a/README.md
+++ b/README.md
@@ -24,16 +24,25 @@ Only usable by `OWNER` users, reloads the config and syncs commands, or closes d
 
 ## Config
 Matchy is configured by a `config.json` file that takes this format:
-```
+```json
 {
-    "version": 1,
-    "token": "<<github bot token>>",
+    "version" : 1,
+    "token" : "<<github bot token>>",
+
+    "match" : {
+        "score_factors": {
+            "repeat_role" : 4,
+            "repeat_match" : 8,
+            "extra_member" : 32,
+            "upper_threshold" : 64
+        }
+    }
 }
 ```
+Only token and version are required. See [`py/config.py`](for explanations for any of these)
 
 ## TODO
 * Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html)
-* Move more constants to the config
 * Add scheduling functionality
 * Fix logging in some sub files (doesn't seem to actually be output?)
 * Improve the weirdo
\ No newline at end of file
diff --git a/py/config.py b/py/config.py
index 2719d73..c6a4ca9 100644
--- a/py/config.py
+++ b/py/config.py
@@ -1,5 +1,5 @@
 """Very simple config loading library"""
-from schema import Schema, And, Use
+from schema import Schema, And, Use, Optional
 import files
 import os
 import logging
@@ -13,10 +13,18 @@ _FILE = "config.json"
 _VERSION = 1
 
 
-class _Keys():
+class _Key():
     TOKEN = "token"
     VERSION = "version"
 
+    MATCH = "match"
+
+    SCORE_FACTORS = "score_factors"
+    REPEAT_ROLE = "repeat_role"
+    REPEAT_MATCH = "repeat_match"
+    EXTRA_MEMBER = "extra_member"
+    UPPER_THRESHOLD = "upper_threshold"
+
     # Removed
     OWNERS = "owners"
 
@@ -24,10 +32,21 @@ class _Keys():
 _SCHEMA = Schema(
     {
         # The current version
-        _Keys.VERSION: And(Use(int)),
+        _Key.VERSION: And(Use(int)),
 
         # Discord bot token
-        _Keys.TOKEN: And(Use(str)),
+        _Key.TOKEN: And(Use(str)),
+
+        # Settings for the match algorithmn, see matching.py for explanations on usage
+        Optional(_Key.MATCH): {
+            Optional(_Key.SCORE_FACTORS): {
+
+                Optional(_Key.REPEAT_ROLE): And(Use(int)),
+                Optional(_Key.REPEAT_MATCH): And(Use(int)),
+                Optional(_Key.EXTRA_MEMBER): And(Use(int)),
+                Optional(_Key.UPPER_THRESHOLD): And(Use(int)),
+            }
+        }
     }
 )
 
@@ -35,7 +54,7 @@ _SCHEMA = Schema(
 def _migrate_to_v1(d: dict):
     # Owners moved to History in v1
     # Note: owners will be required to be re-added to the state.json
-    owners = d.pop(_Keys.OWNERS)
+    owners = d.pop(_Key.OWNERS)
     logger.warn(
         "Migration removed owners from config, these must be re-added to the state.json")
     logger.warn("Owners: %s", owners)
@@ -47,7 +66,29 @@ _MIGRATIONS = [
 ]
 
 
-class Config():
+class _ScoreFactors():
+    def __init__(self, data: dict):
+        """Initialise and validate the config"""
+        self._dict = data
+
+    @property
+    def repeat_role(self) -> int:
+        return self._dict.get(_Key.REPEAT_ROLE, None)
+
+    @property
+    def repeat_match(self) -> int:
+        return self._dict.get(_Key.REPEAT_MATCH, None)
+
+    @property
+    def extra_member(self) -> int:
+        return self._dict.get(_Key.EXTRA_MEMBER, None)
+
+    @property
+    def upper_threshold(self) -> int:
+        return self._dict.get(_Key.UPPER_THRESHOLD, None)
+
+
+class _Config():
     def __init__(self, data: dict):
         """Initialise and validate the config"""
         _SCHEMA.validate(data)
@@ -57,6 +98,10 @@ class Config():
     def token(self) -> str:
         return self._dict["token"]
 
+    @property
+    def score_factors(self) -> _ScoreFactors:
+        return _ScoreFactors(self._dict.get(_Key.SCORE_FACTORS, {}))
+
 
 def _migrate(dict: dict):
     """Migrate a dict through versions"""
@@ -66,7 +111,7 @@ def _migrate(dict: dict):
         dict["version"] = _VERSION
 
 
-def load_from_file(file: str = _FILE) -> Config:
+def _load_from_file(file: str = _FILE) -> _Config:
     """
     Load the state from a file
     Apply any required migrations
@@ -74,4 +119,9 @@ def load_from_file(file: str = _FILE) -> Config:
     assert os.path.isfile(file)
     loaded = files.load(file)
     _migrate(loaded)
-    return Config(loaded)
+    return _Config(loaded)
+
+
+# Core config for users to use
+# Singleton as there should only be one, and it's global
+Config = _load_from_file()
diff --git a/py/matching.py b/py/matching.py
index 4f9b31d..cef715e 100644
--- a/py/matching.py
+++ b/py/matching.py
@@ -3,20 +3,24 @@ import logging
 from datetime import datetime, timedelta
 from typing import Protocol, runtime_checkable
 import state
-
-
-# Number of days to step forward from the start of history for each match attempt
-_ATTEMPT_TIMESTEP_INCREMENT = timedelta(days=7)
+import config
 
 
 class _ScoreFactors(int):
-    """Various eligability scoring factors for group meetups"""
-    REPEAT_ROLE = 2**2
-    REPEAT_MATCH = 2**3
-    EXTRA_MEMBER = 2**5
+    """
+    Score factors used when trying to build up "best fit" groups
+    Matchees are sequentially placed into the lowest scoring available group
+    """
 
-    # Scores higher than this are fully rejected
-    UPPER_THRESHOLD = 2**6
+    # Added for each role the matchee has that another group member has
+    REPEAT_ROLE = config.Config.score_factors.repeat_role or 2**2
+    # Added for each member in the group that the matchee has already matched with
+    REPEAT_MATCH = config.Config.score_factors.repeat_match or 2**3
+    # Added for each additional member over the set "per group" value
+    EXTRA_MEMBER = config.Config.score_factors.extra_member or 2**5
+
+    # Upper threshold, if the user scores higher than this they will not be placed in that group
+    UPPER_THRESHOLD = config.Config.score_factors.upper_threshold or 2**6
 
 
 logger = logging.getLogger("matching")
@@ -76,8 +80,8 @@ def get_member_group_eligibility_score(member: Member,
         return rating
 
     # Add score based on prior matchups of this user
-    rating += sum(m.id in prior_matches for m in group) * \
-        _ScoreFactors.REPEAT_MATCH
+    num_prior = sum(m.id in prior_matches for m in group)
+    rating += num_prior * _ScoreFactors.REPEAT_MATCH
 
     # Calculate the number of roles that match
     all_role_ids = set(r.id for mr in [r.roles for r in group] for r in mr)
@@ -159,7 +163,7 @@ def iterate_all_shifts(list: list):
 
 
 def members_to_groups(matchees: list[Member],
-                      hist: state.State = state.State(),
+                      st: state.State = state.State(),
                       per_group: int = 3,
                       allow_fallback: bool = False) -> list[list[Member]]:
     """Generate the groups from the set of matchees"""
@@ -170,18 +174,16 @@ def members_to_groups(matchees: list[Member],
     if not matchees:
         return []
 
-    # Grab the oldest timestamp
-    history_start = hist.get_oldest_timestamp() or datetime.now()
-
-    # Walk from the start of time until now using the timestep increment
-    for oldest_relevant_datetime in datetime_range(history_start, _ATTEMPT_TIMESTEP_INCREMENT, datetime.now()):
+    # Walk from the start of history until now trying to match up groups
+    # Or if there's no 
+    for oldest_relevant_datetime in st.get_history_timestamps() + [datetime.now()]:
 
         # Attempt with each starting matchee
         for shifted_matchees in iterate_all_shifts(matchees):
 
             attempts += 1
             groups = attempt_create_groups(
-                shifted_matchees, hist, oldest_relevant_datetime, per_group)
+                shifted_matchees, st, oldest_relevant_datetime, per_group)
 
             # Fail the match if our groups aren't big enough
             if num_groups <= 1 or (groups and all(len(g) >= per_group for g in groups)):
diff --git a/py/matching_test.py b/py/matching_test.py
index 742d3a6..fa06560 100644
--- a/py/matching_test.py
+++ b/py/matching_test.py
@@ -6,6 +6,8 @@ import pytest
 import random
 import matching
 import state
+import copy
+import itertools
 from datetime import datetime, timedelta
 
 
@@ -40,12 +42,16 @@ class Member():
     def roles(self) -> list[Role]:
         return self._roles
 
+    @roles.setter
+    def roles(self, roles: list[Role]):
+        self._roles = roles
+
     @property
     def id(self) -> int:
         return self._id
 
 
-def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.State, per_group: int):
+def members_to_groups_validate(matchees: list[Member], tmp_state: state.State, per_group: int):
     """Inner function to validate the main output of the groups function"""
     groups = matching.members_to_groups(matchees, tmp_state, per_group)
 
@@ -83,7 +89,7 @@ def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.St
 def test_members_to_groups_no_history(matchees, per_group):
     """Test simple group matching works"""
     tmp_state = state.State()
-    inner_validate_members_to_groups(matchees, tmp_state, per_group)
+    members_to_groups_validate(matchees, tmp_state, per_group)
 
 
 def items_found_in_lists(list_of_lists, items):
@@ -205,8 +211,113 @@ def items_found_in_lists(list_of_lists, items):
         [
             # Nothing else
         ]
+    ),
+    # Another weird one pulled out of the stress test
+    (
+        [
+            # print([(str(h["ts"]), [[f"Member({gm.id})" for gm in g] for g in h["groups"]]) for h in history_data])
+            {"ts": datetime.strptime(ts, r"%Y-%m-%d %H:%M:%S.%f"), "groups": [
+                [Member(m) for m in group] for group in groups]}
+            for (ts, groups) in
+            [
+                (
+                    '2024-07-07 20:25:56.313993',
+                    [
+                        [1, 2, 3, 4, 5],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+                        [1],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+                        [1, 2, 3, 4, 5, 6, 7, 8]
+                    ]
+                ),
+                (
+                    '2024-07-13 20:25:56.313993',
+                    [
+                        [1, 2],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                            12, 13, 14, 15, 16, 17, 18],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                        [1]
+                    ]
+                ),
+                (
+                    '2024-06-29 20:25:56.313993',
+                    [
+                        [1, 2, 3, 4, 5],
+                        [1, 2, 3, 4, 5, 6, 7],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                            13, 14, 15, 16, 17, 18, 19, 20],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                            11, 12, 13, 14, 15, 16, 17],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                            12, 13, 14, 15, 16, 17, 18, 19, 20]
+                    ]
+                ),
+                (
+                    '2024-06-25 20:25:56.313993',
+                    [
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                            12, 13, 14, 15, 16, 17, 18],
+                        [1, 2],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                            12, 13, 14, 15, 16, 17, 18, 19],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
+                        [1, 2]
+                    ]
+                ),
+                (
+                    '2024-07-04 20:25:56.313993',
+                    [
+                        [1, 2, 3, 4, 5],
+                        [1, 2, 3],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+                        [1, 2, 3, 4, 5, 6, 7]
+                    ]
+                ),
+                (
+                    '2024-07-16 20:25:56.313993',
+                    [
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                            13, 14, 15, 16, 17, 18, 19, 20],
+                        [1, 2, 3, 4, 5, 6],
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                            11, 12, 13, 14, 15, 16, 17, 18]
+                    ]
+                )
+            ]
+        ],
+        [
+            # print([(m.id, [r.id for r in m.roles]) for m in matchees]) to get the below
+            Member(i, [Role(r) for r in roles]) for (i, roles) in
+            [
+                (10, [1, 2, 3]),
+                (4, [1, 2, 3]),
+                (5, [1, 2]),
+                (13, [1, 2]),
+                (3, [1, 2, 3, 4]),
+                (14, [1]),
+                (6, [1, 2, 3, 4]),
+                (11, [1]),
+                (9, [1]),
+                (1, [1, 2, 3]),
+                (16, [1, 2]),
+                (15, [1, 2]),
+                (2, [1, 2, 3]),
+                (7, [1, 2, 3]),
+                (12, [1, 2]),
+                (8, [1, 2, 3, 4])
+            ]
+        ],
+        5,
+        [
+            # Nothing
+        ]
     )
-], ids=['simple_history', 'fallback', 'example_1', 'example_2'])
+], ids=['simple_history', 'fallback', 'example_1', 'example_2', 'example_3'])
 def test_members_to_groups_with_history(history_data, matchees, per_group, checks):
     """Test more advanced group matching works"""
     tmp_state = state.State()
@@ -215,66 +326,65 @@ def test_members_to_groups_with_history(history_data, matchees, per_group, check
     for d in history_data:
         tmp_state.log_groups(d["groups"], d["ts"])
 
-    groups = inner_validate_members_to_groups(matchees, tmp_state, per_group)
+    groups = members_to_groups_validate(matchees, tmp_state, per_group)
 
     # Run the custom validate functions
     for check in checks:
         assert check(groups)
 
 
-# Allows controling of the scale of the stress test
-# Try and keep it under 10s when committed, but otherwise these numbers can be fudged
-# Especially to test a wide range of weird situations
-_STRESS_TEST_RANGE_PER_GROUP = range(2, 6)
-_STRESS_TEST_RANGE_NUM_MEMBERS = range(1, 5)
-_STRESS_TEST_RANGE_NUM_HISTORIES = range(8)
+def random_chunk(li, min_chunk, max_chunk, rand):
+    """
+    "Borrowed" from https://stackoverflow.com/questions/21439011/best-way-to-split-a-list-into-randomly-sized-chunks
+    """
+    it = iter(li)
+    while True:
+        nxt = list(itertools.islice(it, rand.randint(min_chunk, max_chunk)))
+        if nxt:
+            yield nxt
+        else:
+            break
 
 
-def test_members_to_groups_stress_test():
-    """stress test firing significant random data at the code"""
+# Generate a large set of "interesting" tests that replay a fake history onto random people
+# Increase these numbers for some extreme programming
+@pytest.mark.parametrize("per_group, num_members, num_history", (
+    (per_group, num_members, num_history)
+    for per_group in range(2, 4)
+    for num_members in range(6, 24, 3)
+    for num_history in range(0, 4)))
+def test_stess_random_groups(per_group, num_members, num_history):
+    """Run a randomised test based on the input"""
 
-    # Use a stable rand, feel free to adjust this if needed but this lets the test be stable
-    rand = random.Random(123)
+    # Seed the random based on the inputs paird with primes
+    # Ensures the test has interesting fake data, but is stable
+    rand = random.Random(per_group*3 + num_members*5 + num_history*7)
 
-    # Slowly ramp up the group size
-    for per_group in _STRESS_TEST_RANGE_PER_GROUP:
+    # Start with a list of all possible members
+    possible_members = [Member(i) for i in range(num_members*2)]
+    for member in possible_members:
+        # Give each member 3 random roles from 1-7
+        member.roles = [Role(i) for i in rand.sample(range(1, 8), 3)]
 
-        # Slowly ramp a randomized shuffled list of members with randomised roles
-        for num_members in _STRESS_TEST_RANGE_NUM_MEMBERS:
-            matchees = [Member(i, [Role(i) for i in range(1, rand.randint(2, num_members*2 + 1))])
-                        for i in range(1, rand.randint(2, num_members*10 + 1))]
-            rand.shuffle(matchees)
+    # Grab a subset for our members
+    rand.shuffle(possible_members)
+    members = copy.deepcopy(possible_members[:num_members])
 
-            for num_history in _STRESS_TEST_RANGE_NUM_HISTORIES:
+    history_data = {}
+    for i in range(num_history):
+        possible_members = copy.deepcopy(possible_members)
+        rand.shuffle(possible_members)
+        history_data[datetime.now() - timedelta(days=i)] = [
+            chunk for chunk in random_chunk(possible_members, per_group, per_group+2, rand)
+        ]
 
-                # Generate some super random history
-                # Start some time from now to the past
-                time = datetime.now() - timedelta(days=rand.randint(0, num_history*5))
-                history_data = []
-                for _ in range(0, num_history):
-                    run = {
-                        "ts": time
-                    }
-                    groups = []
-                    for y in range(1, num_history):
-                        groups.append([Member(i)
-                                       for i in range(1, max(num_members, rand.randint(2, num_members*10 + 1)))])
-                    run["groups"] = groups
-                    history_data.append(run)
+    replay_state = state.State()
 
-                    # Step some time backwards in time
-                    time -= timedelta(days=rand.randint(1, num_history))
+    # Replay the history
+    for ts, groups in history_data.items():
+        replay_state.log_groups(groups, ts)
 
-                # No guarantees on history data order so make it a little harder for matchy
-                rand.shuffle(history_data)
-
-                # Replay the history
-                tmp_state = state.State()
-                for d in history_data:
-                    tmp_state.log_groups(d["groups"], d["ts"])
-
-                inner_validate_members_to_groups(
-                    matchees, tmp_state, per_group)
+    members_to_groups_validate(members, replay_state, per_group)
 
 
 def test_auth_scopes():
diff --git a/py/matchy.py b/py/matchy.py
index f43d1f9..fe5fd9c 100755
--- a/py/matchy.py
+++ b/py/matchy.py
@@ -12,9 +12,7 @@ import re
 
 
 STATE_FILE = "state.json"
-CONFIG_FILE = "config.json"
 
-Config = config.load_from_file(CONFIG_FILE)
 State = state.load_from_file(STATE_FILE)
 
 logger = logging.getLogger("matchy")
@@ -229,4 +227,4 @@ def active_members_to_groups(channel: discord.channel, min_members: int):
 
 if __name__ == "__main__":
     handler = logging.StreamHandler()
-    bot.run(Config.token, log_handler=handler, root_logger=True)
+    bot.run(config.Config.token, log_handler=handler, root_logger=True)
diff --git a/py/state.py b/py/state.py
index 859b999..f75be41 100644
--- a/py/state.py
+++ b/py/state.py
@@ -130,10 +130,9 @@ class State():
             dict = self._dict
         _SCHEMA.validate(dict)
 
-    def get_oldest_timestamp(self) -> datetime:
-        """Grab the oldest timestamp in history"""
-        times = (ts_to_datetime(dt) for dt in self._history.keys())
-        return next(times, None)
+    def get_history_timestamps(self) -> list[datetime]:
+        """Grab all timestamps in the history"""
+        return sorted([ts_to_datetime(dt) for dt in self._history.keys()])
 
     def get_user_matches(self, id: int) -> list[int]:
         return self._users.get(str(id), {}).get(_Key.MATCHES, {})