Spaces:
Running
Running
import json | |
import os | |
import re | |
from datetime import datetime | |
from typing import Tuple | |
import pandas as pd | |
from bs4 import BeautifulSoup | |
def format_datetime(dt_str: str) -> str: | |
""" | |
Format a datetime string for display. | |
:param dt_str: String representing a datetime in ISO format | |
:return: Formatted datetime string | |
""" | |
return dt_str.replace("T", " ").split("+")[0] | |
def read_json_line_by_line(file_path): | |
""" | |
Read a JSON file line by line, parsing each line as a separate JSON object. | |
:param file_path: Path to the JSON file | |
:return: List of parsed JSON objects | |
This function is useful for reading large JSON files that contain one JSON object | |
per line. It handles JSON parsing errors gracefully, skipping invalid lines. | |
""" | |
data = [] | |
with open(file_path, "r") as f: | |
for line in f: | |
try: | |
item = json.loads(line.strip()) | |
data.append(item) | |
except json.JSONDecodeError: | |
print(f"Skipping invalid JSON in {file_path}: {line}") | |
return data | |
def calculate_change(new: float, old: float, metric_name: str) -> Tuple[float, str]: | |
"""Calculate percentage change and return with appropriate emoji.""" | |
pct_change = new - old | |
if abs(pct_change) < 1: | |
emoji = "βοΈ" | |
elif pct_change > 0: | |
emoji = "π’" if "wer" not in metric_name.lower() else "β" | |
else: | |
emoji = "β" if "wer" not in metric_name.lower() else "π’" | |
return (pct_change, emoji) | |
def has_changes(config, prev_dict, curr_dict): | |
"""Check if any metrics have changed.""" | |
curr = curr_dict[config] | |
prev = prev_dict[config] | |
metrics = ["speed", "tokens_per_second", "average_wer", "qoi"] | |
for key in metrics: | |
if key in curr and key in prev: | |
curr_val = curr[key] | |
prev_val = prev[key] | |
if abs(curr_val - prev_val) >= 1: # 1% threshold | |
return True | |
return False | |
def format_metrics_table(config, prev_dict, curr_dict): | |
"""Format metrics into a table string.""" | |
curr = curr_dict[config] | |
prev = prev_dict[config] | |
metrics = [ | |
("Speed", "speed"), | |
("Tok/s", "tokens_per_second"), | |
("WER", "average_wer"), | |
("QoI", "qoi"), | |
] | |
table = "```\nMetric Previous Current Change\n--------------------------------\n" | |
for metric_name, key in metrics: | |
if key in curr and key in prev: | |
curr_val = curr[key] | |
prev_val = prev[key] | |
pct_change, _ = calculate_change(curr_val, prev_val, metric_name) | |
if abs(pct_change) >= 1: # Only show metrics with changes | |
table += f"{metric_name:<9} {prev_val:<11.2f} {curr_val:<10.2f} {pct_change:.2f}\n" | |
table += "```" | |
return table | |
def extract_status_and_os(cell_value): | |
""" | |
Extract status and OS versions from a cell, handling both HTML and plain text. | |
Returns list of tuples: [(status, os_version), ...] | |
""" | |
results = [] | |
cell_value = str(cell_value) | |
# First, handle the case where there's no HTML tags | |
if cell_value == "Not Supported": | |
return results | |
# Split the cell into parts (first element and subsequent <p> elements) | |
parts = cell_value.split("<p>") | |
for part in parts: | |
part = part.strip("</p>") | |
if not part: | |
continue | |
# Check if part contains warning symbol | |
if "β οΈ" in part: | |
# Parse HTML to extract OS version from anchor tag | |
soup = BeautifulSoup(part, "html.parser") | |
# Find text after href that contains OS version | |
text = soup.get_text() | |
os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", text) | |
if os_match: | |
os_version = os_match.group(0) | |
results.append(("β οΈ", os_version)) | |
else: | |
# For success cases, OS version is directly in the text | |
os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", part) | |
if os_match: | |
os_version = os_match.group(0) | |
results.append(("β ", os_version)) | |
return results | |
def escape_string(s: str) -> str: | |
"""Escape a string to be used as a value in JSON.""" | |
return ( | |
s.replace("\\", "\\\\") | |
.replace('"', '\\"') | |
.replace("\n", "\\n") | |
.replace("\r", "\\r") | |
) | |
def analyze_support_changes(prev_csv, curr_csv): | |
"""Analyze support changes between CSV files.""" | |
# Read CSV files | |
prev_df = pd.read_csv(prev_csv) | |
prev_df.set_index(prev_df.columns[0], inplace=True) | |
curr_df = pd.read_csv(curr_csv) | |
curr_df.set_index(curr_df.columns[0], inplace=True) | |
# Get device lists (excluding first column which is the index) | |
prev_devices = sorted(prev_df.columns[1:]) | |
curr_devices = sorted(curr_df.columns[1:]) | |
# Calculate device ratio | |
device_ratio = len(curr_devices) / len(prev_devices) if prev_devices else 1 | |
needs_alert = device_ratio < 0.9 # Alert if less than 90% of previous devices | |
# Convert to dictionary for easier comparison | |
prev_status = {} | |
curr_status = {} | |
# Process previous data | |
for idx in range(len(prev_df)): | |
model = prev_df.index[idx] | |
for col_idx in range(1, len(prev_df.columns)): | |
cell_value = prev_df.iloc[idx, col_idx] | |
device = prev_df.columns[col_idx] | |
statuses = extract_status_and_os(cell_value) | |
for status, os_version in statuses: | |
prev_status[(model, device, os_version)] = status | |
# Process current data and track new configurations | |
new_configs = [] | |
for idx in range(len(curr_df)): | |
model = curr_df.index[idx] | |
for col_idx in range(1, len(curr_df.columns)): | |
cell_value = curr_df.iloc[idx, col_idx] | |
device = curr_df.columns[col_idx] | |
statuses = extract_status_and_os(cell_value) | |
for status, os_version in statuses: | |
curr_status[(model, device, os_version)] = status | |
# Check if this is a new configuration | |
if (model, device, os_version) not in prev_status: | |
new_configs.append((model, device, os_version)) | |
# Find changes | |
fixed_errors = [] | |
new_errors = [] | |
# Check all configurations that exist in both datasets | |
common_configs = set(prev_status.keys()) & set(curr_status.keys()) | |
for config in common_configs: | |
model, device, os_version = config | |
if prev_status[config] == "β οΈ" and curr_status[config] == "β ": | |
fixed_errors.append((model, device, os_version)) | |
elif prev_status[config] == "β " and curr_status[config] == "β οΈ": | |
new_errors.append((model, device, os_version)) | |
return fixed_errors, new_errors, new_configs, needs_alert | |
def generate_report(): | |
# Load current and previous data | |
prev_perf_data = read_json_line_by_line("report_data/performance_data.json") | |
curr_perf_data = read_json_line_by_line("dashboard_data/performance_data.json") | |
prev_dict = {(d["model"], d["device"], d["os"]): d for d in prev_perf_data} | |
curr_dict = {(d["model"], d["device"], d["os"]): d for d in curr_perf_data} | |
common_configs = set(curr_dict.keys()) & set(prev_dict.keys()) | |
# Load version data | |
with open("report_data/version.json", "r") as f: | |
prev_version = json.load(f) | |
with open("dashboard_data/version.json", "r") as f: | |
curr_version = json.load(f) | |
prev_releases = set(prev_version.get("releases", [])) | |
curr_releases = set(curr_version.get("releases", [])) | |
new_releases = curr_releases - prev_releases | |
removed_releases = prev_releases - curr_releases | |
# Track metrics | |
total_configs = len(common_configs) | |
improved_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} | |
regressed_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} | |
new_data_points = len(set(curr_dict.keys()) - set(prev_dict.keys())) | |
# Analyze support changes | |
fixed_errors, new_errors, new_configs, needs_alert = analyze_support_changes( | |
"report_data/support_data.csv", "dashboard_data/support_data.csv" | |
) | |
# Create Slack blocks | |
current_time = datetime.now().strftime("%B %-d, %Y %H:%M:%S") | |
prev_release_tag, curr_release_tag = ( | |
prev_version["whisperkit_version"], | |
curr_version["whisperkit_version"], | |
) | |
slack_blocks = { | |
"blocks": [ | |
{ | |
"type": "header", | |
"text": { | |
"type": "plain_text", | |
"text": "π WhisperKit Dataset Update Report π", | |
"emoji": True, | |
}, | |
}, | |
{ | |
"type": "context", | |
"elements": [{"text": f"*{current_time}*", "type": "mrkdwn"}], | |
}, | |
{"type": "divider"}, | |
{ | |
"type": "section", | |
"text": {"type": "mrkdwn", "text": "βΉοΈ *CURRENT VERSION INFO* βΉοΈ"}, | |
}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *Last Modified:* `{format_datetime(curr_version['last_modified'])}`", | |
}, | |
}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *Dataset SHA:* `{curr_version['sha']}`", | |
}, | |
}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *Current Releases:* {', '.join(f'`{r}`' for r in curr_version['releases'])}", | |
}, | |
}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *Current Release Tag:* `{curr_release_tag}`", | |
}, | |
}, | |
{"type": "divider"}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "π *SUMMARY OF PERFORMANCE UPDATES* π", | |
}, | |
}, | |
] | |
} | |
# Add release information | |
slack_blocks["blocks"].extend( | |
[ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *Added Releases:* {', '.join(sorted(new_releases)) if new_releases else 'None'}", | |
}, | |
}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *Removed Releases:* {', '.join(sorted(removed_releases)) if removed_releases else 'None'}", | |
}, | |
}, | |
] | |
) | |
if prev_release_tag != curr_release_tag: | |
slack_blocks["blocks"].append( | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *Release Tag Change:* `{prev_release_tag}` β `{curr_release_tag}`", | |
}, | |
} | |
) | |
slack_blocks["blocks"].extend( | |
[ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "\n", | |
}, | |
}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *New Data Points:* `{new_data_points}` new configurations", | |
}, | |
}, | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "\n", | |
}, | |
}, | |
] | |
) | |
# Add metrics summary | |
for metric_name, key in [ | |
("Speed", "speed"), | |
("Tok/s", "tokens_per_second"), | |
("WER", "average_wer"), | |
("QoI", "qoi"), | |
]: | |
slack_blocks["blocks"].append( | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ *{metric_name}:* `{improved_metrics[key]}` improved, `{regressed_metrics[key]}` regressed", | |
}, | |
} | |
) | |
# Add support changes section | |
if fixed_errors or new_errors or new_configs: | |
slack_blocks["blocks"].extend( | |
[ | |
{"type": "divider"}, | |
{ | |
"type": "section", | |
"text": {"type": "mrkdwn", "text": "π± *DEVICE SUPPORT CHANGES* π±"}, | |
}, | |
] | |
) | |
if fixed_errors: | |
slack_blocks["blocks"].extend( | |
[ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "*Successful Configurations That Override Previous Failures*", | |
}, | |
} | |
] | |
) | |
for model, device, os_version in sorted(fixed_errors): | |
slack_blocks["blocks"].append( | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ {model} on {device} ({os_version})", | |
}, | |
} | |
) | |
if new_errors: | |
slack_blocks["blocks"].extend( | |
[ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "*Failed Configurations That Override Previous Successes*", | |
}, | |
} | |
] | |
) | |
for model, device, os_version in sorted(new_errors): | |
slack_blocks["blocks"].append( | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ {model} on {device} ({os_version})", | |
}, | |
} | |
) | |
if new_configs: | |
slack_blocks["blocks"].extend( | |
[ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "*Newly Tested Configurations*", | |
}, | |
} | |
] | |
) | |
for model, device, os_version in sorted(new_configs): | |
slack_blocks["blocks"].append( | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": f"β’ {model} on {device} ({os_version})", | |
}, | |
} | |
) | |
# Add alert if significant decrease in device count | |
if needs_alert: | |
slack_blocks["blocks"].append( | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "β οΈ *ALERT:* Current device count is less than 90% of previous version's device count, test on more devices before updating the benchmark website!", | |
}, | |
} | |
) | |
# Create performance text as a single mrkdwn string | |
if common_configs: | |
performance_text = "π‘ *Performance Updates* π‘\n\n" | |
# Group by model for better organization | |
models = sorted(set(model for model, _, _ in common_configs)) | |
for model in models: | |
model_configs = sorted([cfg for cfg in common_configs if cfg[0] == model]) | |
for config in model_configs: | |
device_info = f"*{model}* ({config[2]})" | |
if not has_changes(config, prev_dict, curr_dict): | |
# If no changes, just add the model with a checkmark | |
performance_text += f"{device_info} β \n\n" | |
else: | |
# If there are changes, show the metrics | |
performance_text += f"{device_info}\n" | |
performance_text += format_metrics_table( | |
config, prev_dict, curr_dict | |
) | |
performance_text += "\n\n" | |
# Write to GITHUB_OUTPUT | |
github_output = os.getenv("GITHUB_OUTPUT") | |
if github_output: | |
with open(github_output, "a") as f: | |
f.write("slack_message_payload<<EOF\n") | |
json.dump(slack_blocks, f, indent=2) | |
f.write("\nEOF\n") | |
with open(github_output, "a") as f: | |
escaped_text = escape_string(performance_text) | |
print(f"performance_message={escaped_text}", file=f) | |
if __name__ == "__main__": | |
generate_report() | |