File size: 17,231 Bytes
e48391e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
import json
import os
import re
from datetime import datetime
from typing import Tuple

import pandas as pd
from bs4 import BeautifulSoup


def format_datetime(dt_str: str) -> str:
    """
    Format a datetime string for display.

    :param dt_str: String representing a datetime in ISO format
    :return: Formatted datetime string
    """
    return dt_str.replace("T", " ").split("+")[0]


def read_json_line_by_line(file_path):
    """
    Read a JSON file line by line, parsing each line as a separate JSON object.

    :param file_path: Path to the JSON file
    :return: List of parsed JSON objects

    This function is useful for reading large JSON files that contain one JSON object
    per line. It handles JSON parsing errors gracefully, skipping invalid lines.
    """
    data = []
    with open(file_path, "r") as f:
        for line in f:
            try:
                item = json.loads(line.strip())
                data.append(item)
            except json.JSONDecodeError:
                print(f"Skipping invalid JSON in {file_path}: {line}")
    return data


def calculate_change(new: float, old: float, metric_name: str) -> Tuple[float, str]:
    """Calculate percentage change and return with appropriate emoji."""
    pct_change = new - old
    if abs(pct_change) < 1:
        emoji = "↔️"
    elif pct_change > 0:
        emoji = "🟒" if "wer" not in metric_name.lower() else "❌"
    else:
        emoji = "❌" if "wer" not in metric_name.lower() else "🟒"

    return (pct_change, emoji)


def has_changes(config, prev_dict, curr_dict):
    """Check if any metrics have changed."""
    curr = curr_dict[config]
    prev = prev_dict[config]

    metrics = ["speed", "tokens_per_second", "average_wer", "qoi"]
    for key in metrics:
        if key in curr and key in prev:
            curr_val = curr[key]
            prev_val = prev[key]
            if abs(curr_val - prev_val) >= 1:  # 1% threshold
                return True
    return False


def format_metrics_table(config, prev_dict, curr_dict):
    """Format metrics into a table string."""
    curr = curr_dict[config]
    prev = prev_dict[config]

    metrics = [
        ("Speed", "speed"),
        ("Tok/s", "tokens_per_second"),
        ("WER", "average_wer"),
        ("QoI", "qoi"),
    ]

    table = "```\nMetric    Previous    Current    Change\n--------------------------------\n"
    for metric_name, key in metrics:
        if key in curr and key in prev:
            curr_val = curr[key]
            prev_val = prev[key]
            pct_change, _ = calculate_change(curr_val, prev_val, metric_name)
            if abs(pct_change) >= 1:  # Only show metrics with changes
                table += f"{metric_name:<9} {prev_val:<11.2f} {curr_val:<10.2f} {pct_change:.2f}\n"
    table += "```"
    return table


def extract_status_and_os(cell_value):
    """
    Extract status and OS versions from a cell, handling both HTML and plain text.
    Returns list of tuples: [(status, os_version), ...]
    """
    results = []
    cell_value = str(cell_value)

    # First, handle the case where there's no HTML tags
    if cell_value == "Not Supported":
        return results

    # Split the cell into parts (first element and subsequent <p> elements)
    parts = cell_value.split("<p>")

    for part in parts:
        part = part.strip("</p>")
        if not part:
            continue

        # Check if part contains warning symbol
        if "⚠️" in part:
            # Parse HTML to extract OS version from anchor tag
            soup = BeautifulSoup(part, "html.parser")
            # Find text after href that contains OS version
            text = soup.get_text()
            os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", text)
            if os_match:
                os_version = os_match.group(0)
                results.append(("⚠️", os_version))
        else:
            # For success cases, OS version is directly in the text
            os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", part)
            if os_match:
                os_version = os_match.group(0)
                results.append(("βœ…", os_version))

    return results


def escape_string(s: str) -> str:
    """Escape a string to be used as a value in JSON."""
    return (
        s.replace("\\", "\\\\")
        .replace('"', '\\"')
        .replace("\n", "\\n")
        .replace("\r", "\\r")
    )


def analyze_support_changes(prev_csv, curr_csv):
    """Analyze support changes between CSV files."""
    # Read CSV files
    prev_df = pd.read_csv(prev_csv)
    prev_df.set_index(prev_df.columns[0], inplace=True)

    curr_df = pd.read_csv(curr_csv)
    curr_df.set_index(curr_df.columns[0], inplace=True)

    # Get device lists (excluding first column which is the index)
    prev_devices = sorted(prev_df.columns[1:])
    curr_devices = sorted(curr_df.columns[1:])

    # Calculate device ratio
    device_ratio = len(curr_devices) / len(prev_devices) if prev_devices else 1
    needs_alert = device_ratio < 0.9  # Alert if less than 90% of previous devices

    # Convert to dictionary for easier comparison
    prev_status = {}
    curr_status = {}

    # Process previous data
    for idx in range(len(prev_df)):
        model = prev_df.index[idx]
        for col_idx in range(1, len(prev_df.columns)):
            cell_value = prev_df.iloc[idx, col_idx]
            device = prev_df.columns[col_idx]
            statuses = extract_status_and_os(cell_value)
            for status, os_version in statuses:
                prev_status[(model, device, os_version)] = status

    # Process current data and track new configurations
    new_configs = []
    for idx in range(len(curr_df)):
        model = curr_df.index[idx]
        for col_idx in range(1, len(curr_df.columns)):
            cell_value = curr_df.iloc[idx, col_idx]
            device = curr_df.columns[col_idx]
            statuses = extract_status_and_os(cell_value)
            for status, os_version in statuses:
                curr_status[(model, device, os_version)] = status
                # Check if this is a new configuration
                if (model, device, os_version) not in prev_status:
                    new_configs.append((model, device, os_version))

    # Find changes
    fixed_errors = []
    new_errors = []

    # Check all configurations that exist in both datasets
    common_configs = set(prev_status.keys()) & set(curr_status.keys())
    for config in common_configs:
        model, device, os_version = config
        if prev_status[config] == "⚠️" and curr_status[config] == "βœ…":
            fixed_errors.append((model, device, os_version))
        elif prev_status[config] == "βœ…" and curr_status[config] == "⚠️":
            new_errors.append((model, device, os_version))

    return fixed_errors, new_errors, new_configs, needs_alert


def generate_report():
    # Load current and previous data
    prev_perf_data = read_json_line_by_line("report_data/performance_data.json")
    curr_perf_data = read_json_line_by_line("dashboard_data/performance_data.json")

    prev_dict = {(d["model"], d["device"], d["os"]): d for d in prev_perf_data}
    curr_dict = {(d["model"], d["device"], d["os"]): d for d in curr_perf_data}
    common_configs = set(curr_dict.keys()) & set(prev_dict.keys())

    # Load version data
    with open("report_data/version.json", "r") as f:
        prev_version = json.load(f)
    with open("dashboard_data/version.json", "r") as f:
        curr_version = json.load(f)

    prev_releases = set(prev_version.get("releases", []))
    curr_releases = set(curr_version.get("releases", []))
    new_releases = curr_releases - prev_releases
    removed_releases = prev_releases - curr_releases

    # Track metrics
    total_configs = len(common_configs)
    improved_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0}
    regressed_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0}
    new_data_points = len(set(curr_dict.keys()) - set(prev_dict.keys()))

    # Analyze support changes
    fixed_errors, new_errors, new_configs, needs_alert = analyze_support_changes(
        "report_data/support_data.csv", "dashboard_data/support_data.csv"
    )

    # Create Slack blocks
    current_time = datetime.now().strftime("%B %-d, %Y %H:%M:%S")
    prev_release_tag, curr_release_tag = (
        prev_version["whisperkit_version"],
        curr_version["whisperkit_version"],
    )
    slack_blocks = {
        "blocks": [
            {
                "type": "header",
                "text": {
                    "type": "plain_text",
                    "text": "πŸ”” WhisperKit Dataset Update Report πŸ””",
                    "emoji": True,
                },
            },
            {
                "type": "context",
                "elements": [{"text": f"*{current_time}*", "type": "mrkdwn"}],
            },
            {"type": "divider"},
            {
                "type": "section",
                "text": {"type": "mrkdwn", "text": "ℹ️ *CURRENT VERSION INFO* ℹ️"},
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *Last Modified:* `{format_datetime(curr_version['last_modified'])}`",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *Dataset SHA:* `{curr_version['sha']}`",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *Current Releases:* {', '.join(f'`{r}`' for r in curr_version['releases'])}",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *Current Release Tag:* `{curr_release_tag}`",
                },
            },
            {"type": "divider"},
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "πŸ”„ *SUMMARY OF PERFORMANCE UPDATES* πŸ”„",
                },
            },
        ]
    }

    # Add release information
    slack_blocks["blocks"].extend(
        [
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *Added Releases:* {', '.join(sorted(new_releases)) if new_releases else 'None'}",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *Removed Releases:* {', '.join(sorted(removed_releases)) if removed_releases else 'None'}",
                },
            },
        ]
    )
    if prev_release_tag != curr_release_tag:
        slack_blocks["blocks"].append(
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *Release Tag Change:* `{prev_release_tag}` β†’ `{curr_release_tag}`",
                },
            }
        )
    slack_blocks["blocks"].extend(
        [
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "\n",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *New Data Points:* `{new_data_points}` new configurations",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "\n",
                },
            },
        ]
    )

    # Add metrics summary
    for metric_name, key in [
        ("Speed", "speed"),
        ("Tok/s", "tokens_per_second"),
        ("WER", "average_wer"),
        ("QoI", "qoi"),
    ]:
        slack_blocks["blocks"].append(
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": f"β€’ *{metric_name}:* `{improved_metrics[key]}` improved, `{regressed_metrics[key]}` regressed",
                },
            }
        )

    # Add support changes section
    if fixed_errors or new_errors or new_configs:
        slack_blocks["blocks"].extend(
            [
                {"type": "divider"},
                {
                    "type": "section",
                    "text": {"type": "mrkdwn", "text": "πŸ“± *DEVICE SUPPORT CHANGES* πŸ“±"},
                },
            ]
        )

        if fixed_errors:
            slack_blocks["blocks"].extend(
                [
                    {
                        "type": "section",
                        "text": {
                            "type": "mrkdwn",
                            "text": "*Successful Configurations That Override Previous Failures*",
                        },
                    }
                ]
            )
            for model, device, os_version in sorted(fixed_errors):
                slack_blocks["blocks"].append(
                    {
                        "type": "section",
                        "text": {
                            "type": "mrkdwn",
                            "text": f"β€’ {model} on {device} ({os_version})",
                        },
                    }
                )

        if new_errors:
            slack_blocks["blocks"].extend(
                [
                    {
                        "type": "section",
                        "text": {
                            "type": "mrkdwn",
                            "text": "*Failed Configurations That Override Previous Successes*",
                        },
                    }
                ]
            )
            for model, device, os_version in sorted(new_errors):
                slack_blocks["blocks"].append(
                    {
                        "type": "section",
                        "text": {
                            "type": "mrkdwn",
                            "text": f"β€’ {model} on {device} ({os_version})",
                        },
                    }
                )

        if new_configs:
            slack_blocks["blocks"].extend(
                [
                    {
                        "type": "section",
                        "text": {
                            "type": "mrkdwn",
                            "text": "*Newly Tested Configurations*",
                        },
                    }
                ]
            )
            for model, device, os_version in sorted(new_configs):
                slack_blocks["blocks"].append(
                    {
                        "type": "section",
                        "text": {
                            "type": "mrkdwn",
                            "text": f"β€’ {model} on {device} ({os_version})",
                        },
                    }
                )

    # Add alert if significant decrease in device count
    if needs_alert:
        slack_blocks["blocks"].append(
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "⚠️ *ALERT:* Current device count is less than 90% of previous version's device count, test on more devices before updating the benchmark website!",
                },
            }
        )

    # Create performance text as a single mrkdwn string
    if common_configs:
        performance_text = "πŸ’‘ *Performance Updates* πŸ’‘\n\n"

        # Group by model for better organization
        models = sorted(set(model for model, _, _ in common_configs))

        for model in models:
            model_configs = sorted([cfg for cfg in common_configs if cfg[0] == model])

            for config in model_configs:
                device_info = f"*{model}* ({config[2]})"

                if not has_changes(config, prev_dict, curr_dict):
                    # If no changes, just add the model with a checkmark
                    performance_text += f"{device_info} βœ…\n\n"
                else:
                    # If there are changes, show the metrics
                    performance_text += f"{device_info}\n"
                    performance_text += format_metrics_table(
                        config, prev_dict, curr_dict
                    )
                    performance_text += "\n\n"

    # Write to GITHUB_OUTPUT
    github_output = os.getenv("GITHUB_OUTPUT")
    if github_output:
        with open(github_output, "a") as f:
            f.write("slack_message_payload<<EOF\n")
            json.dump(slack_blocks, f, indent=2)
            f.write("\nEOF\n")

        with open(github_output, "a") as f:
            escaped_text = escape_string(performance_text)
            print(f"performance_message={escaped_text}", file=f)


if __name__ == "__main__":
    generate_report()