From 874a24dd1d5fc7a773dc53f2ceb9f91837f78cb6 Mon Sep 17 00:00:00 2001 From: Marc Di Luzio Date: Sat, 10 Aug 2024 15:12:14 +0100 Subject: [PATCH] Implement a history-based matching algorythm The bot will attempt to keep producing groups with entirely unique matches based on the full history of matches until it can't. It'll then step forward and ignore a week of history and try again, ignoring more history until no history is left --- .github/workflows/pytest.yml | 2 +- README.md | 2 +- config.py | 30 +++---- files.py | 10 ++- history.py | 145 +++++++++++++++++++++------------ matching.py | 154 +++++++++++++++++++++++++++++------ matching_test.py | 147 +++++++++++++++++++++++++++++++-- matchy.py | 2 +- 8 files changed, 388 insertions(+), 104 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c6a4ac6..eff1511 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -23,4 +23,4 @@ jobs: flake8 --max-line-length 120 $(git ls-files '*.py') - name: Run tests with pytest run: | - pytest + pytest --timeout=60 diff --git a/README.md b/README.md index 255c780..36f1665 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,6 @@ User IDs can be grabbed by turning on Discord's developer mode and right clickin ## TODO * Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html) -* Add tracking of past groups to ensure unique pairings +* Add matching based on unique rolls? * Add scheduling functionality * Improve the weirdo \ No newline at end of file diff --git a/config.py b/config.py index 66fb52e..9882d99 100644 --- a/config.py +++ b/config.py @@ -2,11 +2,22 @@ from schema import Schema, And, Use import files -FILE = "config.json" +_FILE = "config.json" +_SCHEMA = Schema( + { + # Discord bot token + "token": And(Use(str)), + + # ids of owners authorised to use owner-only commands + "owners": And(Use(list[int])), + } +) class Config(): def __init__(self, data: dict): + """Initialise and validate the config""" + _SCHEMA.validate(data) self.__dict__ = data @property @@ -16,23 +27,12 @@ class Config(): @property def owners(self) -> list[int]: return self.__dict__["owners"] - + def reload(self) -> None: """Reload the config back into the dict""" self.__dict__ = load().__dict__ def load() -> Config: - """Load the config and validate it""" - config = files.load(FILE) - Schema( - { - # Discord bot token - "token": And(Use(str)), - - # ids of owners authorised to use owner-only commands - "owners": And(Use(list[int])), - } - ).validate(config) - - return Config(config) + """Load the config""" + return Config(files.load(_FILE)) diff --git a/files.py b/files.py index 3a76b6b..a076286 100644 --- a/files.py +++ b/files.py @@ -1,5 +1,6 @@ """File operation helpers""" import json +import shutil def load(file: str) -> dict: @@ -9,6 +10,11 @@ def load(file: str) -> dict: def save(file: str, content: dict): - """Save out a content dictionary to a file""" - with open(file, "w") as f: + """ + Save out a content dictionary to a file + Stores it in an intermediary file first incase the dump fails + """ + intermediate = file + ".nxt" + with open(intermediate, "w") as f: json.dump(content, f, indent=4) + shutil.move(intermediate, file) diff --git a/history.py b/history.py index f0dbfbc..06b77e4 100644 --- a/history.py +++ b/history.py @@ -1,11 +1,43 @@ """Store matching history""" import os -import time +from datetime import datetime from schema import Schema, And, Use, Optional from typing import Protocol import files +import copy -FILE = "history.json" +_FILE = "history.json" + +# Warning: Changing any of the below needs proper thought to ensure backwards compatibility +_DEFAULT_DICT = { + "history": {}, + "matchees": {} +} +_TIME_FORMAT = "%a %b %d %H:%M:%S %Y" +_SCHEMA = Schema( + { + Optional("history"): { + Optional(str): { # a datetime + "groups": [ + { + "members": [ + # The ID of each matchee in the match + And(Use(int)) + ] + } + ] + } + }, + Optional("matchees"): { + Optional(str): { + Optional("matches"): { + # Matchee ID and Datetime pair + Optional(str): And(Use(str)) + } + } + } + } +) class Member(Protocol): @@ -14,13 +46,25 @@ class Member(Protocol): pass +def ts_to_datetime(ts: str) -> datetime: + """Convert a ts to datetime using the history format""" + return datetime.strptime(ts, _TIME_FORMAT) + + +def validate(dict: dict): + """Initialise and validate the history""" + _SCHEMA.validate(dict) + + class History(): - def __init__(self, data: dict): - self.__dict__ = data + def __init__(self, data: dict = _DEFAULT_DICT): + """Initialise and validate the history""" + validate(data) + self.__dict__ = copy.deepcopy(data) @property - def groups(self) -> list[dict]: - return self.__dict__["groups"] + def history(self) -> list[dict]: + return self.__dict__["history"] @property def matchees(self) -> dict[str, dict]: @@ -28,55 +72,54 @@ class History(): def save(self) -> None: """Save out the history""" - files.save(FILE, self.__dict__) + files.save(_FILE, self.__dict__) + + def oldest(self) -> datetime: + """Grab the oldest timestamp in history""" + if not self.history: + return None + times = (ts_to_datetime(dt) for dt in self.history.keys()) + return sorted(times)[0] + + def log_groups_to_history(self, groups: list[list[Member]], ts: datetime = datetime.now()) -> None: + """Log the groups""" + tmp_history = History(self.__dict__) + ts = datetime.strftime(ts, _TIME_FORMAT) + + # Grab or create the hitory item for this set of groups + history_item = {} + tmp_history.history[ts] = history_item + history_item_groups = [] + history_item["groups"] = history_item_groups + + for group in groups: + + # Add the group data + history_item_groups.append({ + "members": list(m.id for m in group) + }) + + # Update the matchee data with the matches + for m in group: + matchee = tmp_history.matchees.get(str(m.id), {}) + matchee_matches = matchee.get("matches", {}) + + for o in (o for o in group if o.id != m.id): + matchee_matches[str(o.id)] = ts + + matchee["matches"] = matchee_matches + tmp_history.matchees[str(m.id)] = matchee + + # Validate before storing the result + validate(self.__dict__) + self.__dict__ = tmp_history.__dict__ def save_groups_to_history(self, groups: list[list[Member]]) -> None: """Save out the groups to the history file""" - ts = time.time() - for group in groups: - # Add the group - self.groups.append({ - "ts": ts, - "matchees": list(m.id for m in group) - }) - # Add the matches to the matchee data - for m in group: - matchee = self.matchees.get(str(m.id), {"matches": []}) - for o in (o for o in group if o.id != m.id): - matchee["matches"].append({"ts": ts, "id": o.id}) - self.matchees[str(m.id)] = matchee - + self.log_groups_to_history(groups) self.save() def load() -> History: - """Load the history and validate it""" - history = files.load(FILE) if os.path.isfile(FILE) else { - "groups": [], - "matchees": {} - } - Schema( - { - Optional("groups"): [ - { - "ts": And(Use(str)), - "matchees": [ - And(Use(int)) - ] - } - ], - Optional("matchees"): { - Optional(str): { - "matches": [ - { - "ts": And(Use(str)), - "id": And(Use(int)), - } - ] - } - - } - } - ).validate(history) - - return History(history) + """Load the history""" + return History(files.load(_FILE) if os.path.isfile(_FILE) else _DEFAULT_DICT) diff --git a/matching.py b/matching.py index d203b6c..f493221 100644 --- a/matching.py +++ b/matching.py @@ -1,27 +1,145 @@ """Utility functions for matchy""" -import random -from typing import Protocol +import logging +from datetime import datetime, timedelta +from typing import Protocol, runtime_checkable +import history +# Number of days to step forward from the start of history for each match attempt +_ATTEMPT_RELEVANCY_STEP = timedelta(days=7) + +# Attempts for each of those time periods +_ATTEMPTS_PER_TIME = 3 + +# Mamum attempts worth taking +_MAX_ATTEMPTS = _ATTEMPTS_PER_TIME*10 + +logger = logging.getLogger("matching") +logger.setLevel(logging.INFO) + + +@runtime_checkable class Member(Protocol): + @property + def mention(self) -> str: + pass + @property def id(self) -> int: pass -def members_to_groups(matchees: list[Member], - per_group: int) -> list[list[Member]]: - """Generate the groups from the set of matchees""" - random.shuffle(matchees) - num_groups = max(len(matchees)//per_group, 1) +@runtime_checkable +class Role(Protocol): + @property + def name(self) -> str: + pass + + +@runtime_checkable +class Guild(Protocol): + @property + def roles(self) -> list[Role]: + pass + + +def members_to_groups_simple(matchees: list[Member], num_groups: int) -> tuple[bool, list[list[Member]]]: + """Super simple group matching, literally no logic""" return [matchees[i::num_groups] for i in range(num_groups)] -class Member(Protocol): - """Protocol for the type of Member""" - @property - def mention(self) -> str: - pass +def circular_iterator(lst, start_index): + for i in range(start_index, len(lst)): + yield i, lst[i] + for i in range(0, start_index): + yield i, lst[i] + + +def attempt_create_groups(matchees: list[Member], + hist: history.History, + oldest_relevant_ts: datetime, + num_groups: int) -> tuple[bool, list[list[Member]]]: + """History aware group matching""" + + # Set up the groups in place + groups = list([] for _ in range(num_groups)) + + matchees_left = matchees.copy() + + # Sequentially try and fit each matchy into groups one by one + current_group = 0 + while matchees_left: + # Get the next matchee to place + matchee = matchees_left.pop() + matchee_matches = hist.matchees.get( + str(matchee.id), {}).get("matches", {}) + relevant_matches = list(int(id) for id, ts in matchee_matches.items() + if history.ts_to_datetime(ts) >= oldest_relevant_ts) + + # Try every single group from the current group onwards + # Progressing through the groups like this ensures we slowly fill them up with compatible people + added = False + for idx, group in circular_iterator(groups, current_group): + current_group = idx # Track the current group + + # Current compatibilty is simply whether or not the group has any members with previous matches in it + if not any(m.id in relevant_matches for m in group): + group.append(matchee) + added = True + break + + # If we failed to add this matchee, bail on the group creation as it could not be done + if not added: + return None + + # Move on to the next group + current_group += 1 + if current_group >= len(groups): + current_group = 0 + + return groups + + +def members_to_groups(matchees: list[Member], + hist: history.History = history.History(), + per_group: int = 3, max_attempts: int = _MAX_ATTEMPTS) -> list[list[Member]]: + """Generate the groups from the set of matchees""" + num_groups = max(len(matchees)//per_group, 1) + attempts = max_attempts + + # Only both with the complicated matching if we have a history + # TODO: When matching takes into account more than history this should change + if not hist.history: + logger.info("No history so matched groups with simple method") + return members_to_groups_simple(matchees, num_groups) + + # Grab the oldest timestamp + oldest_relevant_datetime = hist.oldest() + + # Loop until we find a valid set of groups + while attempts: + attempts -= 1 + + groups = attempt_create_groups( + matchees, hist, oldest_relevant_datetime, num_groups) + + if groups: + logger.info("Matched groups after %s attempt(s)", + _MAX_ATTEMPTS - attempts) + return groups + + # In case we still don't have groups we should progress and + # walk the oldest relevant timestamp forward a week + # Stop bothering if we've gone beyond today + if attempts % _ATTEMPTS_PER_TIME == 0: + oldest_relevant_datetime += _ATTEMPT_RELEVANCY_STEP + if oldest_relevant_datetime > datetime.now(): + break + + # If we've still failed, just use the simple method + logger.info("Fell back to simple groups after %s attempt(s)", + _MAX_ATTEMPTS - attempts) + return members_to_groups_simple(matchees, num_groups) def group_to_message(group: list[Member]) -> str: @@ -34,18 +152,6 @@ def group_to_message(group: list[Member]) -> str: return f"Matched up {mentions}!" -class Role(Protocol): - @property - def name(self) -> str: - pass - - -class Guild(Protocol): - @property - def roles(self) -> list[Role]: - pass - - def get_role_from_guild(guild: Guild, role: str) -> Role: """Find a role in a guild""" return next((r for r in guild.roles if r.name == role), None) diff --git a/matching_test.py b/matching_test.py index c59a7ec..405a1f5 100644 --- a/matching_test.py +++ b/matching_test.py @@ -4,18 +4,147 @@ import discord import pytest import matching +import history +from datetime import datetime, timedelta + + +def test_protocols(): + """Verify the protocols we're using match the discord ones""" + assert isinstance(discord.Member, matching.Member) + assert isinstance(discord.Guild, matching.Guild) + assert isinstance(discord.Role, matching.Role) + + +class TestMember(): + def __init__(self, id: int): + self._id = id + + @property + def mention(self) -> str: + return f"<@{self._id}>" + + @property + def id(self) -> int: + return self._id + + @id.setter + def id(self, value): + self._id = value @pytest.mark.parametrize("matchees, per_group", [ - ([discord.Member.__new__(discord.Member)] * 100, 3), - ([discord.Member.__new__(discord.Member)] * 12, 5), - ([discord.Member.__new__(discord.Member)] * 11, 2), - ([discord.Member.__new__(discord.Member)] * 356, 8), + # Simplest test possible + ([TestMember(1)], 1), + + # More requested than we have + ([TestMember(1)], 2), + + # A selection of hyper-simple checks to validate core functionality + ([TestMember(1)] * 100, 3), + ([TestMember(1)] * 12, 5), + ([TestMember(1)] * 11, 2), + ([TestMember(1)] * 356, 8), ]) -def test_matchees_to_groups(matchees, per_group): +def test_matchees_to_groups_no_history(matchees, per_group): """Test simple group matching works""" - groups = matching.members_to_groups(matchees, per_group) + hist = history.History() + core_validate_members_to_groups(matchees, hist, per_group) + + +def items_found_in_lists(list_of_lists, items): + """validates if any sets of items are found in individual lists""" + for sublist in list_of_lists: + if all(item in sublist for item in items): + return True + return False + + +@pytest.mark.parametrize("history_data, matchees, per_group, checks", [ + # Slightly more difficult test + # Describe a history where we previously matched up some people and ensure they don't get rematched + ( + [ + { + "ts": datetime.now() - timedelta(days=1), + "groups": [ + [TestMember(1), TestMember(2)], + [TestMember(3), TestMember(4)], + ] + } + ], + [ + TestMember(1), + TestMember(2), + TestMember(3), + TestMember(4), + ], + 2, + [ + lambda groups: not items_found_in_lists( + groups, [TestMember(1), TestMember(2)]), + lambda groups: not items_found_in_lists( + groups, [TestMember(3), TestMember(4)]) + ] + ), + # Feed the system an "impossible" test + # The function should fall back to ignoring history and still give us something + ( + [ + { + "ts": datetime.now() - timedelta(days=1), + "groups": [ + [TestMember(1), TestMember(2), TestMember(3)], + [TestMember(4), TestMember(5), TestMember(6)], + ] + } + ], + [ + TestMember(1), + TestMember(2), + TestMember(3), + TestMember(4), + TestMember(5), + TestMember(6), + ], + 3, + [ + # Nothing specific to validate + ] + ), + +]) +def test_matchees_to_groups_with_history(history_data, matchees, per_group, checks): + """Test simple group matching works""" + hist = history.History() + + # Replay the history + for d in history_data: + hist.log_groups_to_history(d["groups"], d["ts"]) + + groups = core_validate_members_to_groups(matchees, hist, per_group) + + # Run the custom validate functions + for check in checks: + assert check(groups) + + +def core_validate_members_to_groups(matchees: list[TestMember], hist: history.History, per_group: int): + # Convert members to groups + groups = matching.members_to_groups(matchees, hist, per_group) + + # We should always have one group + assert len(groups) + + # Log the groups to history + # This will validate the internals + hist.log_groups_to_history(groups) + + # Ensure each group contains within the bounds of expected members for group in groups: - # Ensure the group contains the right number of members - assert len(group) >= per_group - assert len(group) < per_group*2 + if len(matchees) >= per_group: + assert len(group) >= per_group + else: + assert len(group) == len(matchees) + assert len(group) < per_group*2 # TODO: We could be more strict here + + return groups diff --git a/matchy.py b/matchy.py index 12977e3..8a83537 100755 --- a/matchy.py +++ b/matchy.py @@ -91,7 +91,7 @@ async def match(interaction: discord.Interaction, group_min: int = None, matchee # Create our groups! matchees = list( m for m in interaction.channel.members if matchee in m.roles) - groups = matching.members_to_groups(matchees, group_min) + groups = matching.members_to_groups(matchees, History, group_min) # Post about all the groups with a button to send to the channel msg = '\n'.join(matching.group_to_message(g) for g in groups)