diff --git a/documentation/docs/Organizers/Benchmark_Creation/Leaderboard-Functionality.md b/documentation/docs/Organizers/Benchmark_Creation/Leaderboard-Functionality.md index ded5fffa4..799e971b6 100644 --- a/documentation/docs/Organizers/Benchmark_Creation/Leaderboard-Functionality.md +++ b/documentation/docs/Organizers/Benchmark_Creation/Leaderboard-Functionality.md @@ -49,6 +49,7 @@ Computation options are: - avg - min - max + - avg_rank These are applied across the columns specified as `computation_indexes`. diff --git a/documentation/docs/Organizers/Benchmark_Creation/Yaml-Structure.md b/documentation/docs/Organizers/Benchmark_Creation/Yaml-Structure.md index 16bf7b0e3..12c8ffaa0 100644 --- a/documentation/docs/Organizers/Benchmark_Creation/Yaml-Structure.md +++ b/documentation/docs/Organizers/Benchmark_Creation/Yaml-Structure.md @@ -259,7 +259,7 @@ fact_sheet: { - Ascending: smaller scores are better - Descending: larger scores are better - **computation:** computation to be applied *must be accompanied by computation indexes* - - computation options: sum, avg, min, max + - computation options: sum, avg, min, max, avg_rank - **computation_indexes:** an array of indexes of the columns the computation should be applied to - **precision:** (*integer, default=2*) to round the score to *precision* number of digits - **hidden:** (*boolean, default=False*) to hide/unhide a column on leaderboard diff --git a/src/apps/api/serializers/leaderboards.py b/src/apps/api/serializers/leaderboards.py index f49e7c8df..0299923a6 100644 --- a/src/apps/api/serializers/leaderboards.py +++ b/src/apps/api/serializers/leaderboards.py @@ -101,18 +101,24 @@ def get_submissions(self, instance): # desc == -colname # asc == colname primary_col = instance.columns.get(index=instance.primary_index) - # Order first by primary column. Then order by other columns after for tie breakers. - ordering = [f'{"-" if primary_col.sorting == "desc" else ""}primary_col'] - submissions = ( + base_qs = ( Submission.objects.filter( leaderboard=instance, is_specific_task_re_run=False ) .select_related('owner') .prefetch_related('scores') - .annotate(primary_col=Sum('scores__score', filter=Q(scores__column=primary_col))) ) + # AVERAGE_RANK columns have no stored scores; skip DB-level sort and re-sort in the view. + if primary_col.computation == Column.AVERAGE_RANK: + ordering = ['created_when'] + submissions = base_qs + else: + ordering = [f'{"-" if primary_col.sorting == "desc" else ""}primary_col'] + submissions = base_qs.annotate(primary_col=Sum('scores__score', filter=Q(scores__column=primary_col))) for column in instance.columns.exclude(id=primary_col.id).order_by('index'): + if column.computation == Column.AVERAGE_RANK: + continue col_name = f'col{column.index}' ordering.append(f'{"-" if column.sorting == "desc" else ""}{col_name}') kwargs = { @@ -157,8 +163,7 @@ def get_submissions(self, instance): # desc == -colname # asc == colname primary_col = instance.leaderboard.columns.get(index=instance.leaderboard.primary_index) - ordering = [f'{"-" if primary_col.sorting == "desc" else ""}primary_col'] - submissions = ( + base_qs = ( Submission.objects.filter( phase=instance, is_soft_deleted=False, @@ -168,14 +173,22 @@ def get_submissions(self, instance): ) .select_related('owner') .prefetch_related('scores', 'scores__column') - .annotate(primary_col=Sum('scores__score', filter=Q(scores__column=primary_col))) ) + # AVERAGE_RANK columns have no stored scores; skip DB-level sort and re-sort in the view. + if primary_col.computation == Column.AVERAGE_RANK: + ordering = ['created_when'] + submissions = base_qs + else: + ordering = [f'{"-" if primary_col.sorting == "desc" else ""}primary_col'] + submissions = base_qs.annotate(primary_col=Sum('scores__score', filter=Q(scores__column=primary_col))) for column in ( instance.leaderboard.columns .filter(hidden=False) .exclude(id=primary_col.id) .order_by('index') ): + if column.computation == Column.AVERAGE_RANK: + continue col_name = f'col{column.index}' ordering.append(f'{"-" if column.sorting == "desc" else ""}{col_name}') kwargs = { diff --git a/src/apps/api/views/competitions.py b/src/apps/api/views/competitions.py index 53b9d2193..f577e1178 100644 --- a/src/apps/api/views/competitions.py +++ b/src/apps/api/views/competitions.py @@ -31,7 +31,8 @@ from datasets.models import Data from competitions.tasks import batch_send_email, manual_migration, create_competition_dump from competitions.utils import get_popular_competitions, get_recent_competitions -from leaderboards.models import Leaderboard +from leaderboards.models import Leaderboard, Column +from leaderboards.ranking import inject_average_ranks from utils.data import make_url_sassy from api.permissions import IsOrganizerOrCollaborator from django.db import transaction @@ -861,6 +862,12 @@ def get_leaderboard(self, request, pk): for k, v in submissions_keys.items(): response['submissions'][v]['detailed_results'] = submission_detailed_results[k] + # Compute average rank for any AVERAGE_RANK columns and inject into response. + col_by_index = {col['index']: col for col in columns} + avg_rank_cols = [col for col in columns if col.get('computation') == Column.AVERAGE_RANK] + if avg_rank_cols: + inject_average_ranks(response['submissions'], avg_rank_cols, col_by_index, response['primary_index']) + # --- pagination addition --- total_count = len(response['submissions']) paginator = DynamicChoicePagination() diff --git a/src/apps/competitions/models.py b/src/apps/competitions/models.py index 34e739e5d..01e71a1dd 100644 --- a/src/apps/competitions/models.py +++ b/src/apps/competitions/models.py @@ -12,7 +12,7 @@ from decimal import Decimal from celery_config import app, app_for_vhost -from leaderboards.models import SubmissionScore +from leaderboards.models import SubmissionScore, Column from profiles.models import User, Organization from utils.data import PathWrapper from utils.storage import BundleStorage @@ -694,7 +694,7 @@ def check_child_submission_statuses(self): def calculate_scores(self): # leaderboards = self.phase.competition.leaderboards.all() # for leaderboard in leaderboards: - columns = self.phase.leaderboard.columns.exclude(computation__isnull=True) + columns = self.phase.leaderboard.columns.exclude(computation__isnull=True).exclude(computation=Column.AVERAGE_RANK) for column in columns: scores = self.scores.filter(column__index__in=column.computation_indexes.split(',')).values_list('score', flat=True) diff --git a/src/apps/leaderboards/migrations/0010_alter_column_computation.py b/src/apps/leaderboards/migrations/0010_alter_column_computation.py new file mode 100644 index 000000000..11d39e8c3 --- /dev/null +++ b/src/apps/leaderboards/migrations/0010_alter_column_computation.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.12 on 2026-04-23 09:35 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('leaderboards', '0009_alter_column_id_alter_leaderboard_id_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='column', + name='computation', + field=models.TextField(blank=True, choices=[('avg', 'Average'), ('sum', 'Sum'), ('min', 'Min'), ('max', 'Max'), ('avg_rank', 'Average Rank')], null=True), + ), + ] diff --git a/src/apps/leaderboards/models.py b/src/apps/leaderboards/models.py index 4c9eda5a2..151d6d12a 100644 --- a/src/apps/leaderboards/models.py +++ b/src/apps/leaderboards/models.py @@ -36,11 +36,13 @@ class Column(models.Model): SUM = 'sum' MIN = 'min' MAX = 'max' + AVERAGE_RANK = 'avg_rank' COMPUTATION_CHOICES = ( (AVERAGE, 'Average'), (SUM, 'Sum'), (MIN, 'Min'), (MAX, 'Max'), + (AVERAGE_RANK, 'Average Rank'), ) SORTING = ( ('desc', 'Descending'), diff --git a/src/apps/leaderboards/ranking.py b/src/apps/leaderboards/ranking.py new file mode 100644 index 000000000..ab0ef8698 --- /dev/null +++ b/src/apps/leaderboards/ranking.py @@ -0,0 +1,108 @@ +from leaderboards.models import Column + + +def fractional_rank(values): + """ + Fractional (average) ranking: tied values receive the mean of the ranks they + would occupy, identical to scipy.stats.rankdata(method='average'). + Rank 1 is assigned to the smallest value. + """ + sorted_vals = sorted(values) + rank_sum = {} + rank_count = {} + for rank, val in enumerate(sorted_vals, start=1): + rank_sum[val] = rank_sum.get(val, 0) + rank + rank_count[val] = rank_count.get(val, 0) + 1 + return [rank_sum[v] / rank_count[v] for v in values] + + +def inject_average_ranks(submissions, avg_rank_cols, col_by_index, primary_index): + """ + For each AVERAGE_RANK column, rank submissions on each referenced sub-column + using fractional (average) ranking, compute the mean rank per submission, and + append it as a synthetic score entry. + If the primary column is AVERAGE_RANK, re-sort the list in-place afterward. + + Fractional ranking: tied submissions share the mean of the ranks they occupy + (e.g. two entries tying for positions 2 and 3 both receive rank 2.5). + Submissions missing a score for a sub-column are placed last (rank = n). + When a submission has multiple scores for the same column (multi-task), they are + summed before ranking, consistent with the ORM annotation in the serializer. + """ + # Pre-aggregate scores per submission per column (sum across tasks). + submission_col_scores = [] + for sub in submissions: + col_scores = {} + for s in sub['scores']: + idx = s['index'] + try: + val = float(s['score']) + except (ValueError, TypeError): + val = None + if idx not in col_scores: + col_scores[idx] = val + elif val is not None: + col_scores[idx] = (col_scores[idx] or 0) + val + submission_col_scores.append(col_scores) + + n = len(submissions) + + for col in avg_rank_cols: + if not col.get('computation_indexes'): + continue + sub_indices = [int(i) for i in col['computation_indexes']] + + per_column_ranks = [] + for sub_idx in sub_indices: + sub_col = col_by_index.get(sub_idx) + if sub_col is None: + continue + + valid_indices = [i for i in range(n) if submission_col_scores[i].get(sub_idx) is not None] + valid_scores = [submission_col_scores[i][sub_idx] for i in valid_indices] + + if not valid_scores: + continue + + # Negate descending columns so rank 1 = highest score. + scores_for_rank = [-s for s in valid_scores] if sub_col['sorting'] == 'desc' else valid_scores + fractions = fractional_rank(scores_for_rank) + + ranks = {i: float(n) for i in range(n)} # default: worst rank for unscored + for pos, sub_i in enumerate(valid_indices): + ranks[sub_i] = fractions[pos] + per_column_ranks.append(ranks) + + if not per_column_ranks: + continue + + is_primary = col['index'] == primary_index + for i, sub in enumerate(submissions): + sub_ranks = [r[i] for r in per_column_ranks] + avg_rank = sum(sub_ranks) / len(sub_ranks) + score_entry = { + 'index': col['index'], + 'column_key': col['key'], + 'score': str(round(avg_rank, col.get('precision', 2))), + 'is_primary': is_primary, + } + # The frontend matches scores by (task_id, column_key). Average rank is + # cross-task, so inject one copy per task that already has scores here. + task_ids = {s['task_id'] for s in sub['scores'] if s.get('task_id') is not None} + for task_id in task_ids: + sub['scores'].append({**score_entry, 'task_id': task_id}) + + primary_col = col_by_index.get(primary_index) + if primary_col and primary_col.get('computation') == Column.AVERAGE_RANK: + reverse = primary_col['sorting'] == 'desc' + + def _sort_key(sub): + for s in sub['scores']: + if s['index'] == primary_index: + try: + return float(s['score']) + except (ValueError, TypeError): + pass + return float('inf') if not reverse else float('-inf') + + submissions.sort(key=_sort_key, reverse=reverse) diff --git a/src/apps/leaderboards/tests/__init__.py b/src/apps/leaderboards/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/apps/leaderboards/tests/test_ranking.py b/src/apps/leaderboards/tests/test_ranking.py new file mode 100644 index 000000000..3056eb0af --- /dev/null +++ b/src/apps/leaderboards/tests/test_ranking.py @@ -0,0 +1,259 @@ +from leaderboards.ranking import fractional_rank, inject_average_ranks + + +# --------------------------------------------------------------------------- +# fractional_rank +# --------------------------------------------------------------------------- + +def test_fractional_rank_no_ties(): + # 3 distinct values → straight 1, 2, 3 + assert fractional_rank([3.0, 1.0, 2.0]) == [3.0, 1.0, 2.0] + + +def test_fractional_rank_two_way_tie(): + # Positions 1 and 2 are tied → both get 1.5 + assert fractional_rank([1.0, 1.0, 2.0]) == [1.5, 1.5, 3.0] + + +def test_fractional_rank_three_way_tie(): + # Positions 1, 2, 3 are all tied → mean(1,2,3) = 2.0 + assert fractional_rank([5.0, 5.0, 5.0]) == [2.0, 2.0, 2.0] + + +def test_fractional_rank_tie_at_end(): + # Two tied at the last positions + assert fractional_rank([1.0, 2.0, 2.0]) == [1.0, 2.5, 2.5] + + +def test_fractional_rank_single_element(): + assert fractional_rank([42.0]) == [1.0] + + +# --------------------------------------------------------------------------- +# Helpers for inject_average_ranks +# --------------------------------------------------------------------------- + +def _make_col(index, key, sorting='desc', computation_indexes=None, precision=2, computation='avg_rank'): + return { + 'id': index, + 'index': index, + 'key': key, + 'title': key, + 'sorting': sorting, + 'computation': computation, + 'computation_indexes': [str(i) for i in (computation_indexes or [])], + 'precision': precision, + 'hidden': False, + } + + +def _make_submission(scores): + """ + scores: list of (column_index, column_key, score_value, task_id) + """ + return { + 'scores': [ + {'index': idx, 'column_key': key, 'score': str(val), 'task_id': tid, 'is_primary': False} + for idx, key, val, tid in scores + ] + } + + +# --------------------------------------------------------------------------- +# inject_average_ranks +# --------------------------------------------------------------------------- + +def test_inject_average_ranks_basic_values(): + # 3 submissions, 1 descending sub-column (higher = better = rank 1). + # Scores 0.9, 0.6, 0.3 → ranks 1, 2, 3 → avg_rank 1.0, 2.0, 3.0 + col0 = _make_col(0, 'col0', sorting='desc') + avg_col = _make_col(2, 'avg_rank', computation_indexes=[0]) + + submissions = [ + _make_submission([(0, 'col0', 0.9, 1)]), + _make_submission([(0, 'col0', 0.6, 1)]), + _make_submission([(0, 'col0', 0.3, 1)]), + ] + + col_by_index = {0: col0, 2: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=0) + + avg_scores = [ + next(s for s in sub['scores'] if s['column_key'] == 'avg_rank') + for sub in submissions + ] + assert float(avg_scores[0]['score']) == 1.0 + assert float(avg_scores[1]['score']) == 2.0 + assert float(avg_scores[2]['score']) == 3.0 + + +def test_inject_average_ranks_with_ties(): + # Two submissions tied on the sub-column → both get fractional rank 1.5 + col0 = _make_col(0, 'col0', sorting='desc') + avg_col = _make_col(1, 'avg_rank', computation_indexes=[0]) + + submissions = [ + _make_submission([(0, 'col0', 0.8, 1)]), + _make_submission([(0, 'col0', 0.8, 1)]), + _make_submission([(0, 'col0', 0.5, 1)]), + ] + + col_by_index = {0: col0, 1: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=0) + + avg_scores = [ + float(next(s for s in sub['scores'] if s['column_key'] == 'avg_rank')['score']) + for sub in submissions + ] + assert avg_scores[0] == 1.5 + assert avg_scores[1] == 1.5 + assert avg_scores[2] == 3.0 + + +def test_inject_average_ranks_ascending_column(): + # Ascending column: lower score = better = rank 1 + col0 = _make_col(0, 'col0', sorting='asc') + avg_col = _make_col(1, 'avg_rank', computation_indexes=[0]) + + submissions = [ + _make_submission([(0, 'col0', 0.1, 1)]), # lowest → rank 1 + _make_submission([(0, 'col0', 0.5, 1)]), # rank 2 + _make_submission([(0, 'col0', 0.9, 1)]), # highest → rank 3 + ] + + col_by_index = {0: col0, 1: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=0) + + avg_scores = [ + float(next(s for s in sub['scores'] if s['column_key'] == 'avg_rank')['score']) + for sub in submissions + ] + assert avg_scores[0] == 1.0 + assert avg_scores[1] == 2.0 + assert avg_scores[2] == 3.0 + + +def test_inject_average_ranks_missing_score_gets_worst_rank(): + # Submission that has scores for other columns but not the avg_rank sub-column + # gets worst rank (= n = 3). It still has a task_id from its other scores so + # the injected entry can be matched by the frontend. + col0 = _make_col(0, 'col0', sorting='desc') + avg_col = _make_col(1, 'avg_rank', computation_indexes=[0]) + + submissions = [ + _make_submission([(0, 'col0', 0.9, 1)]), # rank 1 + _make_submission([(0, 'col0', 0.5, 1)]), # rank 2 + _make_submission([(99, 'other_col', 0.7, 1)]), # no col0 score → rank 3 + ] + + col_by_index = {0: col0, 1: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=0) + + avg_scores = [ + float(next(s for s in sub['scores'] if s['column_key'] == 'avg_rank')['score']) + for sub in submissions + ] + assert avg_scores[0] == 1.0 + assert avg_scores[1] == 2.0 + assert avg_scores[2] == 3.0 # worst rank = n = 3 + + +def test_inject_average_ranks_two_subcolumns(): + # Average over two sub-columns + col0 = _make_col(0, 'col0', sorting='desc') + col1 = _make_col(1, 'col1', sorting='desc') + avg_col = _make_col(2, 'avg_rank', computation_indexes=[0, 1]) + + # Sub0: col0=0.9 (rank1), col1=0.3 (rank3) → avg 2.0 + # Sub1: col0=0.6 (rank2), col1=0.6 (rank2) → avg 2.0 + # Sub2: col0=0.3 (rank3), col1=0.9 (rank1) → avg 2.0 + submissions = [ + _make_submission([(0, 'col0', 0.9, 1), (1, 'col1', 0.3, 1)]), + _make_submission([(0, 'col0', 0.6, 1), (1, 'col1', 0.6, 1)]), + _make_submission([(0, 'col0', 0.3, 1), (1, 'col1', 0.9, 1)]), + ] + + col_by_index = {0: col0, 1: col1, 2: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=0) + + avg_scores = [ + float(next(s for s in sub['scores'] if s['column_key'] == 'avg_rank')['score']) + for sub in submissions + ] + assert avg_scores[0] == 2.0 + assert avg_scores[1] == 2.0 + assert avg_scores[2] == 2.0 + + +def test_inject_average_ranks_task_id_propagation(): + # The injected score must carry the same task_id as existing scores so the + # frontend can match it via (task_id, column_key). + col0 = _make_col(0, 'col0', sorting='desc') + avg_col = _make_col(1, 'avg_rank', computation_indexes=[0]) + + task_id = 99 + submissions = [ + _make_submission([(0, 'col0', 0.9, task_id)]), + _make_submission([(0, 'col0', 0.5, task_id)]), + ] + + col_by_index = {0: col0, 1: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=0) + + for sub in submissions: + injected = [s for s in sub['scores'] if s['column_key'] == 'avg_rank'] + assert len(injected) == 1 + assert injected[0]['task_id'] == task_id + + +def test_inject_average_ranks_multi_task_injects_one_per_task(): + # Multi-task submissions have scores with different task_ids. + # One avg_rank entry must be injected per task_id. + col0 = _make_col(0, 'col0', sorting='desc') + avg_col = _make_col(1, 'avg_rank', computation_indexes=[0]) + + submissions = [ + _make_submission([(0, 'col0', 0.9, 10), (0, 'col0', 0.8, 20)]), + _make_submission([(0, 'col0', 0.5, 10), (0, 'col0', 0.4, 20)]), + ] + + col_by_index = {0: col0, 1: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=0) + + for sub in submissions: + injected = [s for s in sub['scores'] if s['column_key'] == 'avg_rank'] + injected_task_ids = {s['task_id'] for s in injected} + assert injected_task_ids == {10, 20} + + +def test_inject_average_ranks_sorts_by_primary_avg_rank(): + # When the avg_rank column is the primary, submissions are re-sorted + # ascending (rank 1 = best position = first row). + col0 = _make_col(0, 'col0', sorting='desc') + avg_col = _make_col(1, 'avg_rank', sorting='asc', computation_indexes=[0]) + + # Scores: 0.3 → rank3, 0.9 → rank1, 0.6 → rank2 + # After sort ascending by avg_rank: rank1 first, then rank2, then rank3 + submissions = [ + _make_submission([(0, 'col0', 0.3, 1)]), # will become rank 3 + _make_submission([(0, 'col0', 0.9, 1)]), # will become rank 1 + _make_submission([(0, 'col0', 0.6, 1)]), # will become rank 2 + ] + + col_by_index = {0: col0, 1: avg_col} + inject_average_ranks(submissions, [avg_col], col_by_index, primary_index=1) + + avg_scores = [ + float(next(s for s in sub['scores'] if s['column_key'] == 'avg_rank')['score']) + for sub in submissions + ] + assert avg_scores == [1.0, 2.0, 3.0] + + +def test_inject_average_ranks_no_avg_rank_cols_is_noop(): + submissions = [_make_submission([(0, 'col0', 0.9, 1)])] + original_scores = list(submissions[0]['scores']) + + inject_average_ranks(submissions, [], {}, primary_index=0) + + assert submissions[0]['scores'] == original_scores diff --git a/src/static/riot/competitions/editor/_leaderboard.tag b/src/static/riot/competitions/editor/_leaderboard.tag index 274cb4da6..c53882c06 100644 --- a/src/static/riot/competitions/editor/_leaderboard.tag +++ b/src/static/riot/competitions/editor/_leaderboard.tag @@ -139,6 +139,7 @@
Sum
Min
Max
+
Average Rank