From c87d7705cf651f4114eccfebd2b4587d69627a1f Mon Sep 17 00:00:00 2001
From: Marc Di Luzio <marc.diluzio@gmail.com>
Date: Sat, 10 Aug 2024 21:47:32 +0100
Subject: [PATCH] Update the matching algorythm to take into account role
 similarity

---
 README.md        |   1 -
 matching.py      | 143 ++++++++++++++++++++++++++++++-----------------
 matching_test.py |  54 ++++++++++++------
 matchy.py        |   2 +-
 4 files changed, 128 insertions(+), 72 deletions(-)

diff --git a/README.md b/README.md
index 8db309b..b560dc8 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,5 @@ User IDs can be grabbed by turning on Discord's developer mode and right clickin
 
 ## TODO
 * Write bot tests with [dpytest](https://dpytest.readthedocs.io/en/latest/tutorials/getting_started.html)
-* Add matching based on unique rolls?
 * Add scheduling functionality
 * Improve the weirdo
\ No newline at end of file
diff --git a/matching.py b/matching.py
index cc7651a..ad262e5 100644
--- a/matching.py
+++ b/matching.py
@@ -1,20 +1,37 @@
 """Utility functions for matchy"""
 import logging
+import random
 from datetime import datetime, timedelta
 from typing import Protocol, runtime_checkable
 import history
 
 
 # Number of days to step forward from the start of history for each match attempt
-_ATTEMPT_RELEVANCY_TIMESTEP = timedelta(days=7)
+_ATTEMPT_TIMESTEP_INCREMENT = timedelta(days=7)
 
 # Attempts for each of those time periods
 _ATTEMPTS_PER_TIMESTEP = 3
 
+# Various eligability scoring factors for group meetups
+_SCORE_CURRENT_MEMBERS = 2**1
+_SCORE_REPEAT_ROLE = 2**2
+_SCORE_REPEAT_MATCH = 2**3
+_SCORE_EXTRA_MEMBERS = 2**4
+
+# Scores higher than this are fully rejected
+_SCORE_UPPER_THRESHOLD = 2**6
+
 logger = logging.getLogger("matching")
 logger.setLevel(logging.INFO)
 
 
+@runtime_checkable
+class Role(Protocol):
+    @property
+    def id(self) -> int:
+        pass
+
+
 @runtime_checkable
 class Member(Protocol):
     @property
@@ -25,6 +42,10 @@ class Member(Protocol):
     def id(self) -> int:
         pass
 
+    @property
+    def roles(self) -> list[Role]:
+        pass
+
 
 @runtime_checkable
 class Role(Protocol):
@@ -40,31 +61,45 @@ class Guild(Protocol):
         pass
 
 
-def members_to_groups_simple(matchees: list[Member], num_groups: int) -> tuple[bool, list[list[Member]]]:
+def members_to_groups_simple(matchees: list[Member], per_group: int) -> tuple[bool, list[list[Member]]]:
     """Super simple group matching, literally no logic"""
+    num_groups = max(len(matchees)//per_group, 1)
     return [matchees[i::num_groups] for i in range(num_groups)]
 
 
-def circular_iterator(lst, start_index):
-    for i in range(start_index, len(lst)):
-        yield i, lst[i]
-    for i in range(0, start_index):
-        yield i, lst[i]
+def get_member_group_eligibility_score(member: Member,
+                                       group: list[Member],
+                                       relevant_matches: list[int],
+                                       per_group: int) -> int:
+    """Rates a member against a group"""
+    rating = len(group) * _SCORE_CURRENT_MEMBERS
+
+    repeat_meetings = sum(m.id in relevant_matches for m in group)
+    rating += repeat_meetings * _SCORE_REPEAT_MATCH
+
+    repeat_roles = sum(r in member.roles for r in (m.roles for m in group))
+    rating += (repeat_roles * _SCORE_REPEAT_ROLE)
+
+    extra_members = len(group) - per_group
+    if extra_members > 0:
+        rating += extra_members * _SCORE_EXTRA_MEMBERS
+
+    return rating
 
 
 def attempt_create_groups(matchees: list[Member],
                           hist: history.History,
                           oldest_relevant_ts: datetime,
-                          num_groups: int) -> tuple[bool, list[list[Member]]]:
+                          per_group: int) -> tuple[bool, list[list[Member]]]:
     """History aware group matching"""
+    num_groups = max(len(matchees)//per_group, 1)
 
     # Set up the groups in place
     groups = list([] for _ in range(num_groups))
 
     matchees_left = matchees.copy()
 
-    # Sequentially try and fit each matchy into groups one by one
-    current_group = 0
+    # Sequentially try and fit each matchee into a group
     while matchees_left:
         # Get the next matchee to place
         matchee = matchees_left.pop()
@@ -75,67 +110,71 @@ def attempt_create_groups(matchees: list[Member],
 
         # Try every single group from the current group onwards
         # Progressing through the groups like this ensures we slowly fill them up with compatible people
-        added = False
-        for idx, group in circular_iterator(groups, current_group):
-            current_group = idx  # Track the current group
+        scores: list[tuple[int, int]] = []
+        for group in groups:
+                
+            score = get_member_group_eligibility_score(
+                matchee, group, relevant_matches, num_groups)
 
-            # Current compatibilty is simply whether or not the group has any members with previous matches in it
-            if not any(m.id in relevant_matches for m in group):
-                group.append(matchee)
-                added = True
+            # If the score isn't too high, consider this group
+            if score <= _SCORE_UPPER_THRESHOLD:
+                scores.append((group, score))
+
+            # Optimisation:
+            # A score of 0 means we've got something good enough and can skip
+            if score == 0:
                 break
 
-        # If we failed to add this matchee, bail on the group creation as it could not be done
-        if not added:
+        if scores:
+            (group, _) = sorted(scores, key=lambda pair: pair[1])[0]
+            group.append(matchee)
+        else:
+            # If we failed to add this matchee, bail on the group creation as it could not be done
             return None
 
-        # Move on to the next group
-        current_group += 1
-        if current_group >= len(groups):
-            current_group = 0
-
     return groups
 
 
+def datetime_range(start_time: datetime, increment: timedelta, end: datetime):
+    """Yields a datetime range with a given increment"""
+    current = start_time
+    while current <= end or end is None:
+        yield current
+        current += increment
+
+
 def members_to_groups(matchees: list[Member],
                       hist: history.History = history.History(),
-                      per_group: int = 3) -> list[list[Member]]:
+                      per_group: int = 3,
+                      allow_fallback: bool = False) -> list[list[Member]]:
     """Generate the groups from the set of matchees"""
-    num_groups = max(len(matchees)//per_group, 1)
-
-    # Only both with the complicated matching if we have a history
-    # TODO: When matching takes into account more than history this should change
-    if not hist.history:
-        logger.info("No history so matched groups with simple method")
-        return members_to_groups_simple(matchees, num_groups)
+    attempts = 0  # Tracking for logging purposes
+    rand = random.Random(117)  # Some stable randomness
 
     # Grab the oldest timestamp
-    oldest_relevant_datetime = hist.oldest()
+    history_start = hist.oldest() or datetime.now()
 
-    # Loop until we find a valid set of groups
-    attempts = 0
-    while True:
-        attempts += 1
+    # Walk from the start of time until now using the timestep increment
+    for oldest_relevant_datetime in datetime_range(history_start, _ATTEMPT_TIMESTEP_INCREMENT, datetime.now()):
 
-        groups = attempt_create_groups(
-            matchees, hist, oldest_relevant_datetime, num_groups)
+        # Have a few attempts before stepping forward in time
+        for _ in range(_ATTEMPTS_PER_TIMESTEP):
+                
+            rand.shuffle(matchees)  # Shuffle the matchees each attempt
 
-        # Fail the match if our groups aren't big enough
-        if groups and all(len(g) > per_group for g in groups):
-            logger.info("Matched groups after %s attempt(s)", attempts)
-            return groups
+            attempts += 1
+            groups = attempt_create_groups(
+                matchees, hist, oldest_relevant_datetime, per_group)
 
-        # In case we still don't have groups we should progress and
-        # walk the oldest relevant timestamp forward a week
-        # Stop bothering when we finally go beyond today
-        if attempts % _ATTEMPTS_PER_TIMESTEP == 0:
-            oldest_relevant_datetime += _ATTEMPT_RELEVANCY_TIMESTEP
-            if oldest_relevant_datetime > datetime.now():
-                break
+            # Fail the match if our groups aren't big enough
+            if (len(matchees)//per_group) <= 1 or (groups and all(len(g) >= per_group for g in groups)):
+                logger.info("Matched groups after %s attempt(s)", attempts)
+                return groups
 
     # If we've still failed, just use the simple method
-    logger.info("Fell back to simple groups after %s attempt(s)", attempts)
-    return members_to_groups_simple(matchees, num_groups)
+    if allow_fallback:
+        logger.info("Fell back to simple groups after %s attempt(s)", attempts)
+        return members_to_groups_simple(matchees, per_group)
 
 
 def group_to_message(group: list[Member]) -> str:
diff --git a/matching_test.py b/matching_test.py
index d6fe891..4331fe9 100644
--- a/matching_test.py
+++ b/matching_test.py
@@ -14,24 +14,35 @@ def test_protocols():
     assert isinstance(discord.Member, matching.Member)
     assert isinstance(discord.Guild, matching.Guild)
     assert isinstance(discord.Role, matching.Role)
+    assert isinstance(Member, matching.Member)
+    # assert isinstance(Role, matching.Role)
+
+
+class Role():
+    def __init__(self, id: int):
+        self._id = id
+
+    @property
+    def id(self) -> int:
+        return self._id
 
 
 class Member():
-    def __init__(self, id: int):
+    def __init__(self, id: int, roles: list[Role] = []):
         self._id = id
 
     @property
     def mention(self) -> str:
         return f"<@{self._id}>"
 
+    @property
+    def roles(self) -> list[Role]:
+        return []
+
     @property
     def id(self) -> int:
         return self._id
 
-    @id.setter
-    def id(self, value):
-        self._id = value
-
 
 def inner_validate_members_to_groups(matchees: list[Member], hist: history.History, per_group: int):
     """Inner function to validate the main output of the groups function"""
@@ -67,7 +78,7 @@ def inner_validate_members_to_groups(matchees: list[Member], hist: history.Histo
     ([Member(1)] * 12, 5),
     ([Member(1)] * 11, 2),
     ([Member(1)] * 356, 8),
-])
+], ids=['single', "larger_groups", "100_members", "5_group", "pairs", "356_big_groups"])
 def test_members_to_groups_no_history(matchees, per_group):
     """Test simple group matching works"""
     hist = history.History()
@@ -116,25 +127,33 @@ def items_found_in_lists(list_of_lists, items):
             {
                 "ts": datetime.now() - timedelta(days=1),
                 "groups": [
-                    [Member(1), Member(2), Member(3)],
-                    [Member(4), Member(5), Member(6)],
+                    [
+                        Member(1),
+                        Member(2),
+                        Member(3)
+                    ],
+                    [
+                        Member(4),
+                        Member(5),
+                        Member(6)
+                    ],
                 ]
             }
         ],
         [
-            Member(1),
-            Member(2),
-            Member(3),
-            Member(4),
-            Member(5),
-            Member(6),
+            Member(1, [Role(1), Role(2), Role(3), Role(4)]),
+            Member(2, [Role(1), Role(2), Role(3), Role(4)]),
+            Member(3, [Role(1), Role(2), Role(3), Role(4)]),
+            Member(4, [Role(1), Role(2), Role(3), Role(4)]),
+            Member(5, [Role(1), Role(2), Role(3), Role(4)]),
+            Member(6, [Role(1), Role(2), Role(3), Role(4)]),
         ],
         3,
         [
             # Nothing specific to validate
         ]
     ),
-])
+], ids=['simple_history', 'fallback'])
 def test_members_to_groups_with_history(history_data, matchees, per_group, checks):
     """Test more advanced group matching works"""
     hist = history.History()
@@ -159,10 +178,9 @@ def test_members_to_groups_stress_test():
     # Slowly ramp up the group size
     for per_group in range(2, 6):
 
-        # Slowly ramp a randomized shuffled list of members
+        # Slowly ramp a randomized shuffled list of members with randomised roles
         for num_members in range(1, 5):
-            # up to 50 total members
-            matchees = list(Member(i)
+            matchees = list(Member(i, list(Role(i) for i in range(1, rand.randint(2, num_members*2 + 1))))
                             for i in range(1, rand.randint(2, num_members*10 + 1)))
             rand.shuffle(matchees)
 
diff --git a/matchy.py b/matchy.py
index 8a83537..e10d849 100755
--- a/matchy.py
+++ b/matchy.py
@@ -91,7 +91,7 @@ async def match(interaction: discord.Interaction, group_min: int = None, matchee
     # Create our groups!
     matchees = list(
         m for m in interaction.channel.members if matchee in m.roles)
-    groups = matching.members_to_groups(matchees, History, group_min)
+    groups = matching.members_to_groups(matchees, History, group_min, allow_fallback=True)
 
     # Post about all the groups with a button to send to the channel
     msg = '\n'.join(matching.group_to_message(g) for g in groups)