Implement a history-based matching algorythm

The bot will attempt to keep producing groups with entirely unique matches based on the full history of matches until it can't. It'll then step forward and ignore a week of history and try again, ignoring more history until no history is left
This commit is contained in:
Marc Di Luzio 2024-08-10 15:12:14 +01:00
parent ed2375386b
commit 874a24dd1d
8 changed files with 388 additions and 104 deletions

View file

@ -23,4 +23,4 @@ jobs:
flake8 --max-line-length 120 $(git ls-files '*.py') flake8 --max-line-length 120 $(git ls-files '*.py')
- name: Run tests with pytest - name: Run tests with pytest
run: | run: |
pytest pytest --timeout=60

View file

@ -27,6 +27,6 @@ User IDs can be grabbed by turning on Discord's developer mode and right clickin
## TODO ## TODO
* Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html) * Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html)
* Add tracking of past groups to ensure unique pairings * Add matching based on unique rolls?
* Add scheduling functionality * Add scheduling functionality
* Improve the weirdo * Improve the weirdo

View file

@ -2,11 +2,22 @@
from schema import Schema, And, Use from schema import Schema, And, Use
import files import files
FILE = "config.json" _FILE = "config.json"
_SCHEMA = Schema(
{
# Discord bot token
"token": And(Use(str)),
# ids of owners authorised to use owner-only commands
"owners": And(Use(list[int])),
}
)
class Config(): class Config():
def __init__(self, data: dict): def __init__(self, data: dict):
"""Initialise and validate the config"""
_SCHEMA.validate(data)
self.__dict__ = data self.__dict__ = data
@property @property
@ -23,16 +34,5 @@ class Config():
def load() -> Config: def load() -> Config:
"""Load the config and validate it""" """Load the config"""
config = files.load(FILE) return Config(files.load(_FILE))
Schema(
{
# Discord bot token
"token": And(Use(str)),
# ids of owners authorised to use owner-only commands
"owners": And(Use(list[int])),
}
).validate(config)
return Config(config)

View file

@ -1,5 +1,6 @@
"""File operation helpers""" """File operation helpers"""
import json import json
import shutil
def load(file: str) -> dict: def load(file: str) -> dict:
@ -9,6 +10,11 @@ def load(file: str) -> dict:
def save(file: str, content: dict): def save(file: str, content: dict):
"""Save out a content dictionary to a file""" """
with open(file, "w") as f: Save out a content dictionary to a file
Stores it in an intermediary file first incase the dump fails
"""
intermediate = file + ".nxt"
with open(intermediate, "w") as f:
json.dump(content, f, indent=4) json.dump(content, f, indent=4)
shutil.move(intermediate, file)

View file

@ -1,11 +1,43 @@
"""Store matching history""" """Store matching history"""
import os import os
import time from datetime import datetime
from schema import Schema, And, Use, Optional from schema import Schema, And, Use, Optional
from typing import Protocol from typing import Protocol
import files import files
import copy
FILE = "history.json" _FILE = "history.json"
# Warning: Changing any of the below needs proper thought to ensure backwards compatibility
_DEFAULT_DICT = {
"history": {},
"matchees": {}
}
_TIME_FORMAT = "%a %b %d %H:%M:%S %Y"
_SCHEMA = Schema(
{
Optional("history"): {
Optional(str): { # a datetime
"groups": [
{
"members": [
# The ID of each matchee in the match
And(Use(int))
]
}
]
}
},
Optional("matchees"): {
Optional(str): {
Optional("matches"): {
# Matchee ID and Datetime pair
Optional(str): And(Use(str))
}
}
}
}
)
class Member(Protocol): class Member(Protocol):
@ -14,13 +46,25 @@ class Member(Protocol):
pass pass
def ts_to_datetime(ts: str) -> datetime:
"""Convert a ts to datetime using the history format"""
return datetime.strptime(ts, _TIME_FORMAT)
def validate(dict: dict):
"""Initialise and validate the history"""
_SCHEMA.validate(dict)
class History(): class History():
def __init__(self, data: dict): def __init__(self, data: dict = _DEFAULT_DICT):
self.__dict__ = data """Initialise and validate the history"""
validate(data)
self.__dict__ = copy.deepcopy(data)
@property @property
def groups(self) -> list[dict]: def history(self) -> list[dict]:
return self.__dict__["groups"] return self.__dict__["history"]
@property @property
def matchees(self) -> dict[str, dict]: def matchees(self) -> dict[str, dict]:
@ -28,55 +72,54 @@ class History():
def save(self) -> None: def save(self) -> None:
"""Save out the history""" """Save out the history"""
files.save(FILE, self.__dict__) files.save(_FILE, self.__dict__)
def oldest(self) -> datetime:
"""Grab the oldest timestamp in history"""
if not self.history:
return None
times = (ts_to_datetime(dt) for dt in self.history.keys())
return sorted(times)[0]
def log_groups_to_history(self, groups: list[list[Member]], ts: datetime = datetime.now()) -> None:
"""Log the groups"""
tmp_history = History(self.__dict__)
ts = datetime.strftime(ts, _TIME_FORMAT)
# Grab or create the hitory item for this set of groups
history_item = {}
tmp_history.history[ts] = history_item
history_item_groups = []
history_item["groups"] = history_item_groups
for group in groups:
# Add the group data
history_item_groups.append({
"members": list(m.id for m in group)
})
# Update the matchee data with the matches
for m in group:
matchee = tmp_history.matchees.get(str(m.id), {})
matchee_matches = matchee.get("matches", {})
for o in (o for o in group if o.id != m.id):
matchee_matches[str(o.id)] = ts
matchee["matches"] = matchee_matches
tmp_history.matchees[str(m.id)] = matchee
# Validate before storing the result
validate(self.__dict__)
self.__dict__ = tmp_history.__dict__
def save_groups_to_history(self, groups: list[list[Member]]) -> None: def save_groups_to_history(self, groups: list[list[Member]]) -> None:
"""Save out the groups to the history file""" """Save out the groups to the history file"""
ts = time.time() self.log_groups_to_history(groups)
for group in groups:
# Add the group
self.groups.append({
"ts": ts,
"matchees": list(m.id for m in group)
})
# Add the matches to the matchee data
for m in group:
matchee = self.matchees.get(str(m.id), {"matches": []})
for o in (o for o in group if o.id != m.id):
matchee["matches"].append({"ts": ts, "id": o.id})
self.matchees[str(m.id)] = matchee
self.save() self.save()
def load() -> History: def load() -> History:
"""Load the history and validate it""" """Load the history"""
history = files.load(FILE) if os.path.isfile(FILE) else { return History(files.load(_FILE) if os.path.isfile(_FILE) else _DEFAULT_DICT)
"groups": [],
"matchees": {}
}
Schema(
{
Optional("groups"): [
{
"ts": And(Use(str)),
"matchees": [
And(Use(int))
]
}
],
Optional("matchees"): {
Optional(str): {
"matches": [
{
"ts": And(Use(str)),
"id": And(Use(int)),
}
]
}
}
}
).validate(history)
return History(history)

View file

@ -1,27 +1,145 @@
"""Utility functions for matchy""" """Utility functions for matchy"""
import random import logging
from typing import Protocol from datetime import datetime, timedelta
from typing import Protocol, runtime_checkable
import history
# Number of days to step forward from the start of history for each match attempt
_ATTEMPT_RELEVANCY_STEP = timedelta(days=7)
# Attempts for each of those time periods
_ATTEMPTS_PER_TIME = 3
# Mamum attempts worth taking
_MAX_ATTEMPTS = _ATTEMPTS_PER_TIME*10
logger = logging.getLogger("matching")
logger.setLevel(logging.INFO)
@runtime_checkable
class Member(Protocol): class Member(Protocol):
@property
def mention(self) -> str:
pass
@property @property
def id(self) -> int: def id(self) -> int:
pass pass
def members_to_groups(matchees: list[Member], @runtime_checkable
per_group: int) -> list[list[Member]]: class Role(Protocol):
"""Generate the groups from the set of matchees""" @property
random.shuffle(matchees) def name(self) -> str:
num_groups = max(len(matchees)//per_group, 1) pass
@runtime_checkable
class Guild(Protocol):
@property
def roles(self) -> list[Role]:
pass
def members_to_groups_simple(matchees: list[Member], num_groups: int) -> tuple[bool, list[list[Member]]]:
"""Super simple group matching, literally no logic"""
return [matchees[i::num_groups] for i in range(num_groups)] return [matchees[i::num_groups] for i in range(num_groups)]
class Member(Protocol): def circular_iterator(lst, start_index):
"""Protocol for the type of Member""" for i in range(start_index, len(lst)):
@property yield i, lst[i]
def mention(self) -> str: for i in range(0, start_index):
pass yield i, lst[i]
def attempt_create_groups(matchees: list[Member],
hist: history.History,
oldest_relevant_ts: datetime,
num_groups: int) -> tuple[bool, list[list[Member]]]:
"""History aware group matching"""
# Set up the groups in place
groups = list([] for _ in range(num_groups))
matchees_left = matchees.copy()
# Sequentially try and fit each matchy into groups one by one
current_group = 0
while matchees_left:
# Get the next matchee to place
matchee = matchees_left.pop()
matchee_matches = hist.matchees.get(
str(matchee.id), {}).get("matches", {})
relevant_matches = list(int(id) for id, ts in matchee_matches.items()
if history.ts_to_datetime(ts) >= oldest_relevant_ts)
# Try every single group from the current group onwards
# Progressing through the groups like this ensures we slowly fill them up with compatible people
added = False
for idx, group in circular_iterator(groups, current_group):
current_group = idx # Track the current group
# Current compatibilty is simply whether or not the group has any members with previous matches in it
if not any(m.id in relevant_matches for m in group):
group.append(matchee)
added = True
break
# If we failed to add this matchee, bail on the group creation as it could not be done
if not added:
return None
# Move on to the next group
current_group += 1
if current_group >= len(groups):
current_group = 0
return groups
def members_to_groups(matchees: list[Member],
hist: history.History = history.History(),
per_group: int = 3, max_attempts: int = _MAX_ATTEMPTS) -> list[list[Member]]:
"""Generate the groups from the set of matchees"""
num_groups = max(len(matchees)//per_group, 1)
attempts = max_attempts
# Only both with the complicated matching if we have a history
# TODO: When matching takes into account more than history this should change
if not hist.history:
logger.info("No history so matched groups with simple method")
return members_to_groups_simple(matchees, num_groups)
# Grab the oldest timestamp
oldest_relevant_datetime = hist.oldest()
# Loop until we find a valid set of groups
while attempts:
attempts -= 1
groups = attempt_create_groups(
matchees, hist, oldest_relevant_datetime, num_groups)
if groups:
logger.info("Matched groups after %s attempt(s)",
_MAX_ATTEMPTS - attempts)
return groups
# In case we still don't have groups we should progress and
# walk the oldest relevant timestamp forward a week
# Stop bothering if we've gone beyond today
if attempts % _ATTEMPTS_PER_TIME == 0:
oldest_relevant_datetime += _ATTEMPT_RELEVANCY_STEP
if oldest_relevant_datetime > datetime.now():
break
# If we've still failed, just use the simple method
logger.info("Fell back to simple groups after %s attempt(s)",
_MAX_ATTEMPTS - attempts)
return members_to_groups_simple(matchees, num_groups)
def group_to_message(group: list[Member]) -> str: def group_to_message(group: list[Member]) -> str:
@ -34,18 +152,6 @@ def group_to_message(group: list[Member]) -> str:
return f"Matched up {mentions}!" return f"Matched up {mentions}!"
class Role(Protocol):
@property
def name(self) -> str:
pass
class Guild(Protocol):
@property
def roles(self) -> list[Role]:
pass
def get_role_from_guild(guild: Guild, role: str) -> Role: def get_role_from_guild(guild: Guild, role: str) -> Role:
"""Find a role in a guild""" """Find a role in a guild"""
return next((r for r in guild.roles if r.name == role), None) return next((r for r in guild.roles if r.name == role), None)

View file

@ -4,18 +4,147 @@
import discord import discord
import pytest import pytest
import matching import matching
import history
from datetime import datetime, timedelta
def test_protocols():
"""Verify the protocols we're using match the discord ones"""
assert isinstance(discord.Member, matching.Member)
assert isinstance(discord.Guild, matching.Guild)
assert isinstance(discord.Role, matching.Role)
class TestMember():
def __init__(self, id: int):
self._id = id
@property
def mention(self) -> str:
return f"<@{self._id}>"
@property
def id(self) -> int:
return self._id
@id.setter
def id(self, value):
self._id = value
@pytest.mark.parametrize("matchees, per_group", [ @pytest.mark.parametrize("matchees, per_group", [
([discord.Member.__new__(discord.Member)] * 100, 3), # Simplest test possible
([discord.Member.__new__(discord.Member)] * 12, 5), ([TestMember(1)], 1),
([discord.Member.__new__(discord.Member)] * 11, 2),
([discord.Member.__new__(discord.Member)] * 356, 8), # More requested than we have
([TestMember(1)], 2),
# A selection of hyper-simple checks to validate core functionality
([TestMember(1)] * 100, 3),
([TestMember(1)] * 12, 5),
([TestMember(1)] * 11, 2),
([TestMember(1)] * 356, 8),
]) ])
def test_matchees_to_groups(matchees, per_group): def test_matchees_to_groups_no_history(matchees, per_group):
"""Test simple group matching works""" """Test simple group matching works"""
groups = matching.members_to_groups(matchees, per_group) hist = history.History()
core_validate_members_to_groups(matchees, hist, per_group)
def items_found_in_lists(list_of_lists, items):
"""validates if any sets of items are found in individual lists"""
for sublist in list_of_lists:
if all(item in sublist for item in items):
return True
return False
@pytest.mark.parametrize("history_data, matchees, per_group, checks", [
# Slightly more difficult test
# Describe a history where we previously matched up some people and ensure they don't get rematched
(
[
{
"ts": datetime.now() - timedelta(days=1),
"groups": [
[TestMember(1), TestMember(2)],
[TestMember(3), TestMember(4)],
]
}
],
[
TestMember(1),
TestMember(2),
TestMember(3),
TestMember(4),
],
2,
[
lambda groups: not items_found_in_lists(
groups, [TestMember(1), TestMember(2)]),
lambda groups: not items_found_in_lists(
groups, [TestMember(3), TestMember(4)])
]
),
# Feed the system an "impossible" test
# The function should fall back to ignoring history and still give us something
(
[
{
"ts": datetime.now() - timedelta(days=1),
"groups": [
[TestMember(1), TestMember(2), TestMember(3)],
[TestMember(4), TestMember(5), TestMember(6)],
]
}
],
[
TestMember(1),
TestMember(2),
TestMember(3),
TestMember(4),
TestMember(5),
TestMember(6),
],
3,
[
# Nothing specific to validate
]
),
])
def test_matchees_to_groups_with_history(history_data, matchees, per_group, checks):
"""Test simple group matching works"""
hist = history.History()
# Replay the history
for d in history_data:
hist.log_groups_to_history(d["groups"], d["ts"])
groups = core_validate_members_to_groups(matchees, hist, per_group)
# Run the custom validate functions
for check in checks:
assert check(groups)
def core_validate_members_to_groups(matchees: list[TestMember], hist: history.History, per_group: int):
# Convert members to groups
groups = matching.members_to_groups(matchees, hist, per_group)
# We should always have one group
assert len(groups)
# Log the groups to history
# This will validate the internals
hist.log_groups_to_history(groups)
# Ensure each group contains within the bounds of expected members
for group in groups: for group in groups:
# Ensure the group contains the right number of members if len(matchees) >= per_group:
assert len(group) >= per_group assert len(group) >= per_group
assert len(group) < per_group*2 else:
assert len(group) == len(matchees)
assert len(group) < per_group*2 # TODO: We could be more strict here
return groups

View file

@ -91,7 +91,7 @@ async def match(interaction: discord.Interaction, group_min: int = None, matchee
# Create our groups! # Create our groups!
matchees = list( matchees = list(
m for m in interaction.channel.members if matchee in m.roles) m for m in interaction.channel.members if matchee in m.roles)
groups = matching.members_to_groups(matchees, group_min) groups = matching.members_to_groups(matchees, History, group_min)
# Post about all the groups with a button to send to the channel # Post about all the groups with a button to send to the channel
msg = '\n'.join(matching.group_to_message(g) for g in groups) msg = '\n'.join(matching.group_to_message(g) for g in groups)