Move core matching factors to the config file

Bonus changes here were making the config a singleton, fixing some more tests and then re-writing the stress test because it was pissing me off.
This commit is contained in:
Marc Di Luzio 2024-08-11 22:07:43 +01:00
parent a5d7dae851
commit 9043615498
6 changed files with 254 additions and 86 deletions

View file

@ -24,16 +24,25 @@ Only usable by `OWNER` users, reloads the config and syncs commands, or closes d
## Config
Matchy is configured by a `config.json` file that takes this format:
```
```json
{
"version": 1,
"token": "<<github bot token>>",
"version" : 1,
"token" : "<<github bot token>>",
"match" : {
"score_factors": {
"repeat_role" : 4,
"repeat_match" : 8,
"extra_member" : 32,
"upper_threshold" : 64
}
}
}
```
Only token and version are required. See [`py/config.py`](for explanations for any of these)
## TODO
* Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html)
* Move more constants to the config
* Add scheduling functionality
* Fix logging in some sub files (doesn't seem to actually be output?)
* Improve the weirdo

View file

@ -1,5 +1,5 @@
"""Very simple config loading library"""
from schema import Schema, And, Use
from schema import Schema, And, Use, Optional
import files
import os
import logging
@ -13,10 +13,18 @@ _FILE = "config.json"
_VERSION = 1
class _Keys():
class _Key():
TOKEN = "token"
VERSION = "version"
MATCH = "match"
SCORE_FACTORS = "score_factors"
REPEAT_ROLE = "repeat_role"
REPEAT_MATCH = "repeat_match"
EXTRA_MEMBER = "extra_member"
UPPER_THRESHOLD = "upper_threshold"
# Removed
OWNERS = "owners"
@ -24,10 +32,21 @@ class _Keys():
_SCHEMA = Schema(
{
# The current version
_Keys.VERSION: And(Use(int)),
_Key.VERSION: And(Use(int)),
# Discord bot token
_Keys.TOKEN: And(Use(str)),
_Key.TOKEN: And(Use(str)),
# Settings for the match algorithmn, see matching.py for explanations on usage
Optional(_Key.MATCH): {
Optional(_Key.SCORE_FACTORS): {
Optional(_Key.REPEAT_ROLE): And(Use(int)),
Optional(_Key.REPEAT_MATCH): And(Use(int)),
Optional(_Key.EXTRA_MEMBER): And(Use(int)),
Optional(_Key.UPPER_THRESHOLD): And(Use(int)),
}
}
}
)
@ -35,7 +54,7 @@ _SCHEMA = Schema(
def _migrate_to_v1(d: dict):
# Owners moved to History in v1
# Note: owners will be required to be re-added to the state.json
owners = d.pop(_Keys.OWNERS)
owners = d.pop(_Key.OWNERS)
logger.warn(
"Migration removed owners from config, these must be re-added to the state.json")
logger.warn("Owners: %s", owners)
@ -47,7 +66,29 @@ _MIGRATIONS = [
]
class Config():
class _ScoreFactors():
def __init__(self, data: dict):
"""Initialise and validate the config"""
self._dict = data
@property
def repeat_role(self) -> int:
return self._dict.get(_Key.REPEAT_ROLE, None)
@property
def repeat_match(self) -> int:
return self._dict.get(_Key.REPEAT_MATCH, None)
@property
def extra_member(self) -> int:
return self._dict.get(_Key.EXTRA_MEMBER, None)
@property
def upper_threshold(self) -> int:
return self._dict.get(_Key.UPPER_THRESHOLD, None)
class _Config():
def __init__(self, data: dict):
"""Initialise and validate the config"""
_SCHEMA.validate(data)
@ -57,6 +98,10 @@ class Config():
def token(self) -> str:
return self._dict["token"]
@property
def score_factors(self) -> _ScoreFactors:
return _ScoreFactors(self._dict.get(_Key.SCORE_FACTORS, {}))
def _migrate(dict: dict):
"""Migrate a dict through versions"""
@ -66,7 +111,7 @@ def _migrate(dict: dict):
dict["version"] = _VERSION
def load_from_file(file: str = _FILE) -> Config:
def _load_from_file(file: str = _FILE) -> _Config:
"""
Load the state from a file
Apply any required migrations
@ -74,4 +119,9 @@ def load_from_file(file: str = _FILE) -> Config:
assert os.path.isfile(file)
loaded = files.load(file)
_migrate(loaded)
return Config(loaded)
return _Config(loaded)
# Core config for users to use
# Singleton as there should only be one, and it's global
Config = _load_from_file()

View file

@ -3,20 +3,24 @@ import logging
from datetime import datetime, timedelta
from typing import Protocol, runtime_checkable
import state
# Number of days to step forward from the start of history for each match attempt
_ATTEMPT_TIMESTEP_INCREMENT = timedelta(days=7)
import config
class _ScoreFactors(int):
"""Various eligability scoring factors for group meetups"""
REPEAT_ROLE = 2**2
REPEAT_MATCH = 2**3
EXTRA_MEMBER = 2**5
"""
Score factors used when trying to build up "best fit" groups
Matchees are sequentially placed into the lowest scoring available group
"""
# Scores higher than this are fully rejected
UPPER_THRESHOLD = 2**6
# Added for each role the matchee has that another group member has
REPEAT_ROLE = config.Config.score_factors.repeat_role or 2**2
# Added for each member in the group that the matchee has already matched with
REPEAT_MATCH = config.Config.score_factors.repeat_match or 2**3
# Added for each additional member over the set "per group" value
EXTRA_MEMBER = config.Config.score_factors.extra_member or 2**5
# Upper threshold, if the user scores higher than this they will not be placed in that group
UPPER_THRESHOLD = config.Config.score_factors.upper_threshold or 2**6
logger = logging.getLogger("matching")
@ -76,8 +80,8 @@ def get_member_group_eligibility_score(member: Member,
return rating
# Add score based on prior matchups of this user
rating += sum(m.id in prior_matches for m in group) * \
_ScoreFactors.REPEAT_MATCH
num_prior = sum(m.id in prior_matches for m in group)
rating += num_prior * _ScoreFactors.REPEAT_MATCH
# Calculate the number of roles that match
all_role_ids = set(r.id for mr in [r.roles for r in group] for r in mr)
@ -159,7 +163,7 @@ def iterate_all_shifts(list: list):
def members_to_groups(matchees: list[Member],
hist: state.State = state.State(),
st: state.State = state.State(),
per_group: int = 3,
allow_fallback: bool = False) -> list[list[Member]]:
"""Generate the groups from the set of matchees"""
@ -170,18 +174,16 @@ def members_to_groups(matchees: list[Member],
if not matchees:
return []
# Grab the oldest timestamp
history_start = hist.get_oldest_timestamp() or datetime.now()
# Walk from the start of time until now using the timestep increment
for oldest_relevant_datetime in datetime_range(history_start, _ATTEMPT_TIMESTEP_INCREMENT, datetime.now()):
# Walk from the start of history until now trying to match up groups
# Or if there's no
for oldest_relevant_datetime in st.get_history_timestamps() + [datetime.now()]:
# Attempt with each starting matchee
for shifted_matchees in iterate_all_shifts(matchees):
attempts += 1
groups = attempt_create_groups(
shifted_matchees, hist, oldest_relevant_datetime, per_group)
shifted_matchees, st, oldest_relevant_datetime, per_group)
# Fail the match if our groups aren't big enough
if num_groups <= 1 or (groups and all(len(g) >= per_group for g in groups)):

View file

@ -6,6 +6,8 @@ import pytest
import random
import matching
import state
import copy
import itertools
from datetime import datetime, timedelta
@ -40,12 +42,16 @@ class Member():
def roles(self) -> list[Role]:
return self._roles
@roles.setter
def roles(self, roles: list[Role]):
self._roles = roles
@property
def id(self) -> int:
return self._id
def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.State, per_group: int):
def members_to_groups_validate(matchees: list[Member], tmp_state: state.State, per_group: int):
"""Inner function to validate the main output of the groups function"""
groups = matching.members_to_groups(matchees, tmp_state, per_group)
@ -83,7 +89,7 @@ def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.St
def test_members_to_groups_no_history(matchees, per_group):
"""Test simple group matching works"""
tmp_state = state.State()
inner_validate_members_to_groups(matchees, tmp_state, per_group)
members_to_groups_validate(matchees, tmp_state, per_group)
def items_found_in_lists(list_of_lists, items):
@ -205,8 +211,113 @@ def items_found_in_lists(list_of_lists, items):
[
# Nothing else
]
),
# Another weird one pulled out of the stress test
(
[
# print([(str(h["ts"]), [[f"Member({gm.id})" for gm in g] for g in h["groups"]]) for h in history_data])
{"ts": datetime.strptime(ts, r"%Y-%m-%d %H:%M:%S.%f"), "groups": [
[Member(m) for m in group] for group in groups]}
for (ts, groups) in
[
(
'2024-07-07 20:25:56.313993',
[
[1, 2, 3, 4, 5],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[1],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
[1, 2, 3, 4, 5, 6, 7, 8]
]
),
(
'2024-07-13 20:25:56.313993',
[
[1, 2],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[1]
]
),
(
'2024-06-29 20:25:56.313993',
[
[1, 2, 3, 4, 5],
[1, 2, 3, 4, 5, 6, 7],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20]
]
),
(
'2024-06-25 20:25:56.313993',
[
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18],
[1, 2],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
[1, 2]
]
),
(
'2024-07-04 20:25:56.313993',
[
[1, 2, 3, 4, 5],
[1, 2, 3],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[1, 2, 3, 4, 5, 6, 7]
]
),
(
'2024-07-16 20:25:56.313993',
[
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
[1, 2, 3, 4, 5, 6, 7, 8, 9],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20],
[1, 2, 3, 4, 5, 6],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18]
]
)
]
],
[
# print([(m.id, [r.id for r in m.roles]) for m in matchees]) to get the below
Member(i, [Role(r) for r in roles]) for (i, roles) in
[
(10, [1, 2, 3]),
(4, [1, 2, 3]),
(5, [1, 2]),
(13, [1, 2]),
(3, [1, 2, 3, 4]),
(14, [1]),
(6, [1, 2, 3, 4]),
(11, [1]),
(9, [1]),
(1, [1, 2, 3]),
(16, [1, 2]),
(15, [1, 2]),
(2, [1, 2, 3]),
(7, [1, 2, 3]),
(12, [1, 2]),
(8, [1, 2, 3, 4])
]
],
5,
[
# Nothing
]
)
], ids=['simple_history', 'fallback', 'example_1', 'example_2'])
], ids=['simple_history', 'fallback', 'example_1', 'example_2', 'example_3'])
def test_members_to_groups_with_history(history_data, matchees, per_group, checks):
"""Test more advanced group matching works"""
tmp_state = state.State()
@ -215,66 +326,65 @@ def test_members_to_groups_with_history(history_data, matchees, per_group, check
for d in history_data:
tmp_state.log_groups(d["groups"], d["ts"])
groups = inner_validate_members_to_groups(matchees, tmp_state, per_group)
groups = members_to_groups_validate(matchees, tmp_state, per_group)
# Run the custom validate functions
for check in checks:
assert check(groups)
# Allows controling of the scale of the stress test
# Try and keep it under 10s when committed, but otherwise these numbers can be fudged
# Especially to test a wide range of weird situations
_STRESS_TEST_RANGE_PER_GROUP = range(2, 6)
_STRESS_TEST_RANGE_NUM_MEMBERS = range(1, 5)
_STRESS_TEST_RANGE_NUM_HISTORIES = range(8)
def random_chunk(li, min_chunk, max_chunk, rand):
"""
"Borrowed" from https://stackoverflow.com/questions/21439011/best-way-to-split-a-list-into-randomly-sized-chunks
"""
it = iter(li)
while True:
nxt = list(itertools.islice(it, rand.randint(min_chunk, max_chunk)))
if nxt:
yield nxt
else:
break
def test_members_to_groups_stress_test():
"""stress test firing significant random data at the code"""
# Generate a large set of "interesting" tests that replay a fake history onto random people
# Increase these numbers for some extreme programming
@pytest.mark.parametrize("per_group, num_members, num_history", (
(per_group, num_members, num_history)
for per_group in range(2, 4)
for num_members in range(6, 24, 3)
for num_history in range(0, 4)))
def test_stess_random_groups(per_group, num_members, num_history):
"""Run a randomised test based on the input"""
# Use a stable rand, feel free to adjust this if needed but this lets the test be stable
rand = random.Random(123)
# Seed the random based on the inputs paird with primes
# Ensures the test has interesting fake data, but is stable
rand = random.Random(per_group*3 + num_members*5 + num_history*7)
# Slowly ramp up the group size
for per_group in _STRESS_TEST_RANGE_PER_GROUP:
# Start with a list of all possible members
possible_members = [Member(i) for i in range(num_members*2)]
for member in possible_members:
# Give each member 3 random roles from 1-7
member.roles = [Role(i) for i in rand.sample(range(1, 8), 3)]
# Slowly ramp a randomized shuffled list of members with randomised roles
for num_members in _STRESS_TEST_RANGE_NUM_MEMBERS:
matchees = [Member(i, [Role(i) for i in range(1, rand.randint(2, num_members*2 + 1))])
for i in range(1, rand.randint(2, num_members*10 + 1))]
rand.shuffle(matchees)
# Grab a subset for our members
rand.shuffle(possible_members)
members = copy.deepcopy(possible_members[:num_members])
for num_history in _STRESS_TEST_RANGE_NUM_HISTORIES:
history_data = {}
for i in range(num_history):
possible_members = copy.deepcopy(possible_members)
rand.shuffle(possible_members)
history_data[datetime.now() - timedelta(days=i)] = [
chunk for chunk in random_chunk(possible_members, per_group, per_group+2, rand)
]
# Generate some super random history
# Start some time from now to the past
time = datetime.now() - timedelta(days=rand.randint(0, num_history*5))
history_data = []
for _ in range(0, num_history):
run = {
"ts": time
}
groups = []
for y in range(1, num_history):
groups.append([Member(i)
for i in range(1, max(num_members, rand.randint(2, num_members*10 + 1)))])
run["groups"] = groups
history_data.append(run)
replay_state = state.State()
# Step some time backwards in time
time -= timedelta(days=rand.randint(1, num_history))
# Replay the history
for ts, groups in history_data.items():
replay_state.log_groups(groups, ts)
# No guarantees on history data order so make it a little harder for matchy
rand.shuffle(history_data)
# Replay the history
tmp_state = state.State()
for d in history_data:
tmp_state.log_groups(d["groups"], d["ts"])
inner_validate_members_to_groups(
matchees, tmp_state, per_group)
members_to_groups_validate(members, replay_state, per_group)
def test_auth_scopes():

View file

@ -12,9 +12,7 @@ import re
STATE_FILE = "state.json"
CONFIG_FILE = "config.json"
Config = config.load_from_file(CONFIG_FILE)
State = state.load_from_file(STATE_FILE)
logger = logging.getLogger("matchy")
@ -229,4 +227,4 @@ def active_members_to_groups(channel: discord.channel, min_members: int):
if __name__ == "__main__":
handler = logging.StreamHandler()
bot.run(Config.token, log_handler=handler, root_logger=True)
bot.run(config.Config.token, log_handler=handler, root_logger=True)

View file

@ -130,10 +130,9 @@ class State():
dict = self._dict
_SCHEMA.validate(dict)
def get_oldest_timestamp(self) -> datetime:
"""Grab the oldest timestamp in history"""
times = (ts_to_datetime(dt) for dt in self._history.keys())
return next(times, None)
def get_history_timestamps(self) -> list[datetime]:
"""Grab all timestamps in the history"""
return sorted([ts_to_datetime(dt) for dt in self._history.keys()])
def get_user_matches(self, id: int) -> list[int]:
return self._users.get(str(id), {}).get(_Key.MATCHES, {})