Spaces:

KurtDu
/

S2S-Bench

Running

App Files Files Community

KurtDu commited on Nov 11, 2024

Commit

8efa040

verified ·

1 Parent(s): e18ae7e

Upload elo_rank.py

Browse files

Files changed (1) hide show

elo_rank.py +133 -0

elo_rank.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import random
+import json
+class EloRank:
+    def __init__(self, initial_rating=1000, k_factor=32):
+        """
+        Initialize the EloRank class.
+        :param initial_rating: Initial ELO rating for each model.
+        :param k_factor: The K-factor that determines the sensitivity of rating changes.
+        """
+        self.ratings = {}
+        self.initial_rating = initial_rating
+        self.k_factor = k_factor
+        self.wins = {}
+    def add_model(self, model_id):
+        """
+        Add a new model with the initial rating.
+        :param model_id: Unique identifier for the model.
+        """
+        self.ratings[model_id] = self.initial_rating
+        self.wins[model_id] = 0
+    def record_match(self, winner, loser):
+        """
+        Update the ratings based on a match result.
+        :param winner: Model ID of the winner.
+        :param loser: Model ID of the loser.
+        """
+        rating_winner = self.ratings[winner]
+        rating_loser = self.ratings[loser]
+        expected_winner = self.expected_score(rating_winner, rating_loser)
+        expected_loser = self.expected_score(rating_loser, rating_winner)
+        self.ratings[winner] += self.k_factor * (1 - expected_winner)
+        self.ratings[loser] += self.k_factor * (0 - expected_loser)
+        # Update win count
+        self.wins[winner] += 1
+    def expected_score(self, rating_a, rating_b):
+        """
+        Calculate the expected score for a model.
+        :param rating_a: Rating of model A.
+        :param rating_b: Rating of model B.
+        :return: Expected score.
+        """
+        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
+    def get_rating(self, model_id):
+        """
+        Get the current rating of a model.
+        :param model_id: Unique identifier for the model.
+        :return: Current rating of the model.
+        """
+        return self.ratings.get(model_id, None)
+    def get_wins(self, model_id):
+        """
+        Get the number of wins of a model.
+        :param model_id: Unique identifier for the model.
+        :return: Number of wins of the model.
+        """
+        return self.wins.get(model_id, 0)
+    def get_top_models(self, n=2):
+        """
+        Get the top N models by rating.
+        :param n: Number of top models to retrieve.
+        :return: List of model IDs of the top models.
+        """
+        return sorted(self.ratings, key=self.ratings.get, reverse=True)[:n]
+    def sample_next_match(self):
+        """
+        Sample the next match based on the probability proportional to the current rating.
+        This approach helps accelerate the convergence of ranking.
+        :return: Tuple of two model IDs for the next match.
+        """
+        model_ids = list(self.ratings.keys())
+        probabilities = [self.ratings[model_id] for model_id in model_ids]
+        total_rating = sum(probabilities)
+        probabilities = [rating / total_rating for rating in probabilities]
+        # Sample two different models for the next match
+        next_match = random.choices(model_ids, probabilities, k=2)
+        while next_match[0] == next_match[1]:
+            next_match = random.choices(model_ids, probabilities, k=2)
+        return tuple(next_match)
+    def process_match_records(self, file_path):
+        """
+        Process match records from a JSON file and update ratings and win counts accordingly.
+        :param file_path: Path to the JSON file containing match records.
+        """
+        with open(file_path, 'r') as file:
+            match_records = json.load(file)
+        for record in match_records:
+            winner = record['winner']
+            model_1 = record['model_1']
+            model_2 = record['model_2']
+            # Add models if they are not already added
+            if model_1 not in self.ratings:
+                self.add_model(model_1)
+            if model_2 not in self.ratings:
+                self.add_model(model_2)
+            # Record the match result
+            if winner == model_1:
+                self.record_match(model_1, model_2)
+            elif winner == model_2:
+                self.record_match(model_2, model_1)
+# # Example Usage
+# e = EloRank()
+# e.add_model('model_A')
+# e.add_model('model_B')
+# e.add_model('model_C')
+# e.record_match('model_A', 'model_B')
+# print(e.get_rating('model_A'))  # Should be greater than the initial rating
+# print(e.get_rating('model_B'))  # Should be less than the initial rating
+# print(e.get_top_models(2))  # Get the top 2 models
+# print(e.sample_next_match())  # Sample the next match based on ratings
+# # Process match records from a JSON file
+# e.process_match_records('match_records.json')
+# print(e.get_wins('model_A'))  # Get the number of wins for model_A