Move core matching factors to the config file
Bonus changes here were making the config a singleton, fixing some more tests and then re-writing the stress test because it was pissing me off.
This commit is contained in:
parent
a5d7dae851
commit
9043615498
6 changed files with 254 additions and 86 deletions
17
README.md
17
README.md
|
@ -24,16 +24,25 @@ Only usable by `OWNER` users, reloads the config and syncs commands, or closes d
|
|||
|
||||
## Config
|
||||
Matchy is configured by a `config.json` file that takes this format:
|
||||
```
|
||||
```json
|
||||
{
|
||||
"version": 1,
|
||||
"token": "<<github bot token>>",
|
||||
"version" : 1,
|
||||
"token" : "<<github bot token>>",
|
||||
|
||||
"match" : {
|
||||
"score_factors": {
|
||||
"repeat_role" : 4,
|
||||
"repeat_match" : 8,
|
||||
"extra_member" : 32,
|
||||
"upper_threshold" : 64
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Only token and version are required. See [`py/config.py`](for explanations for any of these)
|
||||
|
||||
## TODO
|
||||
* Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html)
|
||||
* Move more constants to the config
|
||||
* Add scheduling functionality
|
||||
* Fix logging in some sub files (doesn't seem to actually be output?)
|
||||
* Improve the weirdo
|
66
py/config.py
66
py/config.py
|
@ -1,5 +1,5 @@
|
|||
"""Very simple config loading library"""
|
||||
from schema import Schema, And, Use
|
||||
from schema import Schema, And, Use, Optional
|
||||
import files
|
||||
import os
|
||||
import logging
|
||||
|
@ -13,10 +13,18 @@ _FILE = "config.json"
|
|||
_VERSION = 1
|
||||
|
||||
|
||||
class _Keys():
|
||||
class _Key():
|
||||
TOKEN = "token"
|
||||
VERSION = "version"
|
||||
|
||||
MATCH = "match"
|
||||
|
||||
SCORE_FACTORS = "score_factors"
|
||||
REPEAT_ROLE = "repeat_role"
|
||||
REPEAT_MATCH = "repeat_match"
|
||||
EXTRA_MEMBER = "extra_member"
|
||||
UPPER_THRESHOLD = "upper_threshold"
|
||||
|
||||
# Removed
|
||||
OWNERS = "owners"
|
||||
|
||||
|
@ -24,10 +32,21 @@ class _Keys():
|
|||
_SCHEMA = Schema(
|
||||
{
|
||||
# The current version
|
||||
_Keys.VERSION: And(Use(int)),
|
||||
_Key.VERSION: And(Use(int)),
|
||||
|
||||
# Discord bot token
|
||||
_Keys.TOKEN: And(Use(str)),
|
||||
_Key.TOKEN: And(Use(str)),
|
||||
|
||||
# Settings for the match algorithmn, see matching.py for explanations on usage
|
||||
Optional(_Key.MATCH): {
|
||||
Optional(_Key.SCORE_FACTORS): {
|
||||
|
||||
Optional(_Key.REPEAT_ROLE): And(Use(int)),
|
||||
Optional(_Key.REPEAT_MATCH): And(Use(int)),
|
||||
Optional(_Key.EXTRA_MEMBER): And(Use(int)),
|
||||
Optional(_Key.UPPER_THRESHOLD): And(Use(int)),
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -35,7 +54,7 @@ _SCHEMA = Schema(
|
|||
def _migrate_to_v1(d: dict):
|
||||
# Owners moved to History in v1
|
||||
# Note: owners will be required to be re-added to the state.json
|
||||
owners = d.pop(_Keys.OWNERS)
|
||||
owners = d.pop(_Key.OWNERS)
|
||||
logger.warn(
|
||||
"Migration removed owners from config, these must be re-added to the state.json")
|
||||
logger.warn("Owners: %s", owners)
|
||||
|
@ -47,7 +66,29 @@ _MIGRATIONS = [
|
|||
]
|
||||
|
||||
|
||||
class Config():
|
||||
class _ScoreFactors():
|
||||
def __init__(self, data: dict):
|
||||
"""Initialise and validate the config"""
|
||||
self._dict = data
|
||||
|
||||
@property
|
||||
def repeat_role(self) -> int:
|
||||
return self._dict.get(_Key.REPEAT_ROLE, None)
|
||||
|
||||
@property
|
||||
def repeat_match(self) -> int:
|
||||
return self._dict.get(_Key.REPEAT_MATCH, None)
|
||||
|
||||
@property
|
||||
def extra_member(self) -> int:
|
||||
return self._dict.get(_Key.EXTRA_MEMBER, None)
|
||||
|
||||
@property
|
||||
def upper_threshold(self) -> int:
|
||||
return self._dict.get(_Key.UPPER_THRESHOLD, None)
|
||||
|
||||
|
||||
class _Config():
|
||||
def __init__(self, data: dict):
|
||||
"""Initialise and validate the config"""
|
||||
_SCHEMA.validate(data)
|
||||
|
@ -57,6 +98,10 @@ class Config():
|
|||
def token(self) -> str:
|
||||
return self._dict["token"]
|
||||
|
||||
@property
|
||||
def score_factors(self) -> _ScoreFactors:
|
||||
return _ScoreFactors(self._dict.get(_Key.SCORE_FACTORS, {}))
|
||||
|
||||
|
||||
def _migrate(dict: dict):
|
||||
"""Migrate a dict through versions"""
|
||||
|
@ -66,7 +111,7 @@ def _migrate(dict: dict):
|
|||
dict["version"] = _VERSION
|
||||
|
||||
|
||||
def load_from_file(file: str = _FILE) -> Config:
|
||||
def _load_from_file(file: str = _FILE) -> _Config:
|
||||
"""
|
||||
Load the state from a file
|
||||
Apply any required migrations
|
||||
|
@ -74,4 +119,9 @@ def load_from_file(file: str = _FILE) -> Config:
|
|||
assert os.path.isfile(file)
|
||||
loaded = files.load(file)
|
||||
_migrate(loaded)
|
||||
return Config(loaded)
|
||||
return _Config(loaded)
|
||||
|
||||
|
||||
# Core config for users to use
|
||||
# Singleton as there should only be one, and it's global
|
||||
Config = _load_from_file()
|
||||
|
|
|
@ -3,20 +3,24 @@ import logging
|
|||
from datetime import datetime, timedelta
|
||||
from typing import Protocol, runtime_checkable
|
||||
import state
|
||||
|
||||
|
||||
# Number of days to step forward from the start of history for each match attempt
|
||||
_ATTEMPT_TIMESTEP_INCREMENT = timedelta(days=7)
|
||||
import config
|
||||
|
||||
|
||||
class _ScoreFactors(int):
|
||||
"""Various eligability scoring factors for group meetups"""
|
||||
REPEAT_ROLE = 2**2
|
||||
REPEAT_MATCH = 2**3
|
||||
EXTRA_MEMBER = 2**5
|
||||
"""
|
||||
Score factors used when trying to build up "best fit" groups
|
||||
Matchees are sequentially placed into the lowest scoring available group
|
||||
"""
|
||||
|
||||
# Scores higher than this are fully rejected
|
||||
UPPER_THRESHOLD = 2**6
|
||||
# Added for each role the matchee has that another group member has
|
||||
REPEAT_ROLE = config.Config.score_factors.repeat_role or 2**2
|
||||
# Added for each member in the group that the matchee has already matched with
|
||||
REPEAT_MATCH = config.Config.score_factors.repeat_match or 2**3
|
||||
# Added for each additional member over the set "per group" value
|
||||
EXTRA_MEMBER = config.Config.score_factors.extra_member or 2**5
|
||||
|
||||
# Upper threshold, if the user scores higher than this they will not be placed in that group
|
||||
UPPER_THRESHOLD = config.Config.score_factors.upper_threshold or 2**6
|
||||
|
||||
|
||||
logger = logging.getLogger("matching")
|
||||
|
@ -76,8 +80,8 @@ def get_member_group_eligibility_score(member: Member,
|
|||
return rating
|
||||
|
||||
# Add score based on prior matchups of this user
|
||||
rating += sum(m.id in prior_matches for m in group) * \
|
||||
_ScoreFactors.REPEAT_MATCH
|
||||
num_prior = sum(m.id in prior_matches for m in group)
|
||||
rating += num_prior * _ScoreFactors.REPEAT_MATCH
|
||||
|
||||
# Calculate the number of roles that match
|
||||
all_role_ids = set(r.id for mr in [r.roles for r in group] for r in mr)
|
||||
|
@ -159,7 +163,7 @@ def iterate_all_shifts(list: list):
|
|||
|
||||
|
||||
def members_to_groups(matchees: list[Member],
|
||||
hist: state.State = state.State(),
|
||||
st: state.State = state.State(),
|
||||
per_group: int = 3,
|
||||
allow_fallback: bool = False) -> list[list[Member]]:
|
||||
"""Generate the groups from the set of matchees"""
|
||||
|
@ -170,18 +174,16 @@ def members_to_groups(matchees: list[Member],
|
|||
if not matchees:
|
||||
return []
|
||||
|
||||
# Grab the oldest timestamp
|
||||
history_start = hist.get_oldest_timestamp() or datetime.now()
|
||||
|
||||
# Walk from the start of time until now using the timestep increment
|
||||
for oldest_relevant_datetime in datetime_range(history_start, _ATTEMPT_TIMESTEP_INCREMENT, datetime.now()):
|
||||
# Walk from the start of history until now trying to match up groups
|
||||
# Or if there's no
|
||||
for oldest_relevant_datetime in st.get_history_timestamps() + [datetime.now()]:
|
||||
|
||||
# Attempt with each starting matchee
|
||||
for shifted_matchees in iterate_all_shifts(matchees):
|
||||
|
||||
attempts += 1
|
||||
groups = attempt_create_groups(
|
||||
shifted_matchees, hist, oldest_relevant_datetime, per_group)
|
||||
shifted_matchees, st, oldest_relevant_datetime, per_group)
|
||||
|
||||
# Fail the match if our groups aren't big enough
|
||||
if num_groups <= 1 or (groups and all(len(g) >= per_group for g in groups)):
|
||||
|
|
|
@ -6,6 +6,8 @@ import pytest
|
|||
import random
|
||||
import matching
|
||||
import state
|
||||
import copy
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
|
@ -40,12 +42,16 @@ class Member():
|
|||
def roles(self) -> list[Role]:
|
||||
return self._roles
|
||||
|
||||
@roles.setter
|
||||
def roles(self, roles: list[Role]):
|
||||
self._roles = roles
|
||||
|
||||
@property
|
||||
def id(self) -> int:
|
||||
return self._id
|
||||
|
||||
|
||||
def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.State, per_group: int):
|
||||
def members_to_groups_validate(matchees: list[Member], tmp_state: state.State, per_group: int):
|
||||
"""Inner function to validate the main output of the groups function"""
|
||||
groups = matching.members_to_groups(matchees, tmp_state, per_group)
|
||||
|
||||
|
@ -83,7 +89,7 @@ def inner_validate_members_to_groups(matchees: list[Member], tmp_state: state.St
|
|||
def test_members_to_groups_no_history(matchees, per_group):
|
||||
"""Test simple group matching works"""
|
||||
tmp_state = state.State()
|
||||
inner_validate_members_to_groups(matchees, tmp_state, per_group)
|
||||
members_to_groups_validate(matchees, tmp_state, per_group)
|
||||
|
||||
|
||||
def items_found_in_lists(list_of_lists, items):
|
||||
|
@ -205,8 +211,113 @@ def items_found_in_lists(list_of_lists, items):
|
|||
[
|
||||
# Nothing else
|
||||
]
|
||||
),
|
||||
# Another weird one pulled out of the stress test
|
||||
(
|
||||
[
|
||||
# print([(str(h["ts"]), [[f"Member({gm.id})" for gm in g] for g in h["groups"]]) for h in history_data])
|
||||
{"ts": datetime.strptime(ts, r"%Y-%m-%d %H:%M:%S.%f"), "groups": [
|
||||
[Member(m) for m in group] for group in groups]}
|
||||
for (ts, groups) in
|
||||
[
|
||||
(
|
||||
'2024-07-07 20:25:56.313993',
|
||||
[
|
||||
[1, 2, 3, 4, 5],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
|
||||
[1],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8]
|
||||
]
|
||||
),
|
||||
(
|
||||
'2024-07-13 20:25:56.313993',
|
||||
[
|
||||
[1, 2],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
||||
[1]
|
||||
]
|
||||
),
|
||||
(
|
||||
'2024-06-29 20:25:56.313993',
|
||||
[
|
||||
[1, 2, 3, 4, 5],
|
||||
[1, 2, 3, 4, 5, 6, 7],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
|
||||
13, 14, 15, 16, 17, 18, 19, 20],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18, 19, 20]
|
||||
]
|
||||
),
|
||||
(
|
||||
'2024-06-25 20:25:56.313993',
|
||||
[
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18],
|
||||
[1, 2],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18, 19],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
|
||||
[1, 2]
|
||||
]
|
||||
),
|
||||
(
|
||||
'2024-07-04 20:25:56.313993',
|
||||
[
|
||||
[1, 2, 3, 4, 5],
|
||||
[1, 2, 3],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
|
||||
[1, 2, 3, 4, 5, 6, 7]
|
||||
]
|
||||
),
|
||||
(
|
||||
'2024-07-16 20:25:56.313993',
|
||||
[
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
|
||||
13, 14, 15, 16, 17, 18, 19, 20],
|
||||
[1, 2, 3, 4, 5, 6],
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18]
|
||||
]
|
||||
)
|
||||
]
|
||||
],
|
||||
[
|
||||
# print([(m.id, [r.id for r in m.roles]) for m in matchees]) to get the below
|
||||
Member(i, [Role(r) for r in roles]) for (i, roles) in
|
||||
[
|
||||
(10, [1, 2, 3]),
|
||||
(4, [1, 2, 3]),
|
||||
(5, [1, 2]),
|
||||
(13, [1, 2]),
|
||||
(3, [1, 2, 3, 4]),
|
||||
(14, [1]),
|
||||
(6, [1, 2, 3, 4]),
|
||||
(11, [1]),
|
||||
(9, [1]),
|
||||
(1, [1, 2, 3]),
|
||||
(16, [1, 2]),
|
||||
(15, [1, 2]),
|
||||
(2, [1, 2, 3]),
|
||||
(7, [1, 2, 3]),
|
||||
(12, [1, 2]),
|
||||
(8, [1, 2, 3, 4])
|
||||
]
|
||||
],
|
||||
5,
|
||||
[
|
||||
# Nothing
|
||||
]
|
||||
)
|
||||
], ids=['simple_history', 'fallback', 'example_1', 'example_2'])
|
||||
], ids=['simple_history', 'fallback', 'example_1', 'example_2', 'example_3'])
|
||||
def test_members_to_groups_with_history(history_data, matchees, per_group, checks):
|
||||
"""Test more advanced group matching works"""
|
||||
tmp_state = state.State()
|
||||
|
@ -215,66 +326,65 @@ def test_members_to_groups_with_history(history_data, matchees, per_group, check
|
|||
for d in history_data:
|
||||
tmp_state.log_groups(d["groups"], d["ts"])
|
||||
|
||||
groups = inner_validate_members_to_groups(matchees, tmp_state, per_group)
|
||||
groups = members_to_groups_validate(matchees, tmp_state, per_group)
|
||||
|
||||
# Run the custom validate functions
|
||||
for check in checks:
|
||||
assert check(groups)
|
||||
|
||||
|
||||
# Allows controling of the scale of the stress test
|
||||
# Try and keep it under 10s when committed, but otherwise these numbers can be fudged
|
||||
# Especially to test a wide range of weird situations
|
||||
_STRESS_TEST_RANGE_PER_GROUP = range(2, 6)
|
||||
_STRESS_TEST_RANGE_NUM_MEMBERS = range(1, 5)
|
||||
_STRESS_TEST_RANGE_NUM_HISTORIES = range(8)
|
||||
def random_chunk(li, min_chunk, max_chunk, rand):
|
||||
"""
|
||||
"Borrowed" from https://stackoverflow.com/questions/21439011/best-way-to-split-a-list-into-randomly-sized-chunks
|
||||
"""
|
||||
it = iter(li)
|
||||
while True:
|
||||
nxt = list(itertools.islice(it, rand.randint(min_chunk, max_chunk)))
|
||||
if nxt:
|
||||
yield nxt
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def test_members_to_groups_stress_test():
|
||||
"""stress test firing significant random data at the code"""
|
||||
# Generate a large set of "interesting" tests that replay a fake history onto random people
|
||||
# Increase these numbers for some extreme programming
|
||||
@pytest.mark.parametrize("per_group, num_members, num_history", (
|
||||
(per_group, num_members, num_history)
|
||||
for per_group in range(2, 4)
|
||||
for num_members in range(6, 24, 3)
|
||||
for num_history in range(0, 4)))
|
||||
def test_stess_random_groups(per_group, num_members, num_history):
|
||||
"""Run a randomised test based on the input"""
|
||||
|
||||
# Use a stable rand, feel free to adjust this if needed but this lets the test be stable
|
||||
rand = random.Random(123)
|
||||
# Seed the random based on the inputs paird with primes
|
||||
# Ensures the test has interesting fake data, but is stable
|
||||
rand = random.Random(per_group*3 + num_members*5 + num_history*7)
|
||||
|
||||
# Slowly ramp up the group size
|
||||
for per_group in _STRESS_TEST_RANGE_PER_GROUP:
|
||||
# Start with a list of all possible members
|
||||
possible_members = [Member(i) for i in range(num_members*2)]
|
||||
for member in possible_members:
|
||||
# Give each member 3 random roles from 1-7
|
||||
member.roles = [Role(i) for i in rand.sample(range(1, 8), 3)]
|
||||
|
||||
# Slowly ramp a randomized shuffled list of members with randomised roles
|
||||
for num_members in _STRESS_TEST_RANGE_NUM_MEMBERS:
|
||||
matchees = [Member(i, [Role(i) for i in range(1, rand.randint(2, num_members*2 + 1))])
|
||||
for i in range(1, rand.randint(2, num_members*10 + 1))]
|
||||
rand.shuffle(matchees)
|
||||
# Grab a subset for our members
|
||||
rand.shuffle(possible_members)
|
||||
members = copy.deepcopy(possible_members[:num_members])
|
||||
|
||||
for num_history in _STRESS_TEST_RANGE_NUM_HISTORIES:
|
||||
history_data = {}
|
||||
for i in range(num_history):
|
||||
possible_members = copy.deepcopy(possible_members)
|
||||
rand.shuffle(possible_members)
|
||||
history_data[datetime.now() - timedelta(days=i)] = [
|
||||
chunk for chunk in random_chunk(possible_members, per_group, per_group+2, rand)
|
||||
]
|
||||
|
||||
# Generate some super random history
|
||||
# Start some time from now to the past
|
||||
time = datetime.now() - timedelta(days=rand.randint(0, num_history*5))
|
||||
history_data = []
|
||||
for _ in range(0, num_history):
|
||||
run = {
|
||||
"ts": time
|
||||
}
|
||||
groups = []
|
||||
for y in range(1, num_history):
|
||||
groups.append([Member(i)
|
||||
for i in range(1, max(num_members, rand.randint(2, num_members*10 + 1)))])
|
||||
run["groups"] = groups
|
||||
history_data.append(run)
|
||||
replay_state = state.State()
|
||||
|
||||
# Step some time backwards in time
|
||||
time -= timedelta(days=rand.randint(1, num_history))
|
||||
# Replay the history
|
||||
for ts, groups in history_data.items():
|
||||
replay_state.log_groups(groups, ts)
|
||||
|
||||
# No guarantees on history data order so make it a little harder for matchy
|
||||
rand.shuffle(history_data)
|
||||
|
||||
# Replay the history
|
||||
tmp_state = state.State()
|
||||
for d in history_data:
|
||||
tmp_state.log_groups(d["groups"], d["ts"])
|
||||
|
||||
inner_validate_members_to_groups(
|
||||
matchees, tmp_state, per_group)
|
||||
members_to_groups_validate(members, replay_state, per_group)
|
||||
|
||||
|
||||
def test_auth_scopes():
|
||||
|
|
|
@ -12,9 +12,7 @@ import re
|
|||
|
||||
|
||||
STATE_FILE = "state.json"
|
||||
CONFIG_FILE = "config.json"
|
||||
|
||||
Config = config.load_from_file(CONFIG_FILE)
|
||||
State = state.load_from_file(STATE_FILE)
|
||||
|
||||
logger = logging.getLogger("matchy")
|
||||
|
@ -229,4 +227,4 @@ def active_members_to_groups(channel: discord.channel, min_members: int):
|
|||
|
||||
if __name__ == "__main__":
|
||||
handler = logging.StreamHandler()
|
||||
bot.run(Config.token, log_handler=handler, root_logger=True)
|
||||
bot.run(config.Config.token, log_handler=handler, root_logger=True)
|
||||
|
|
|
@ -130,10 +130,9 @@ class State():
|
|||
dict = self._dict
|
||||
_SCHEMA.validate(dict)
|
||||
|
||||
def get_oldest_timestamp(self) -> datetime:
|
||||
"""Grab the oldest timestamp in history"""
|
||||
times = (ts_to_datetime(dt) for dt in self._history.keys())
|
||||
return next(times, None)
|
||||
def get_history_timestamps(self) -> list[datetime]:
|
||||
"""Grab all timestamps in the history"""
|
||||
return sorted([ts_to_datetime(dt) for dt in self._history.keys()])
|
||||
|
||||
def get_user_matches(self, id: int) -> list[int]:
|
||||
return self._users.get(str(id), {}).get(_Key.MATCHES, {})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue