import gradio as gr
import pandas as pd
import json
def get_updated_df(df, df_output):
df = df.iloc[: len(df_output)].copy()
df["response"] = df_output["response"].tolist()
df["rationale"] = df_output["rationale"].tolist()
df["explanation"] = df_output["explanation"].tolist()
df["score"] = df_output["score"].tolist()
cols = [
"conversation",
"tools_langchain",
"n_turns",
"len_query",
"n_tools",
"response",
"rationale",
"explanation",
"score",
]
return df[cols]
def get_chat_and_score_df(model, dataset):
df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet")
df = pd.read_parquet(f"datasets/{dataset}.parquet")
df = get_updated_df(df, df_output)
return df
def format_chat_message(role, content, is_response=False):
"""Format individual chat messages with alignment based on role."""
role_style = role.lower()
alignment = "flex-end" if role_style == "user" else "flex-start"
max_width = "80%"
# Clean up any excessive whitespace while preserving intentional line breaks
cleaned_content = "\n".join(line.strip() for line in content.split("\n"))
background_color = (
"var(--response-bg)" if is_response else f"var(--message-bg-{role_style})"
)
return f"""
{role + (" Response" if is_response else "")}
{cleaned_content}
"""
def format_response(response):
"""Format the response data, handling both JSON and text."""
try:
# Try to parse as JSON
response_data = json.loads(response)
# Format JSON response nicely
formatted_response = json.dumps(response_data, indent=2)
except (json.JSONDecodeError, TypeError):
# If not JSON, use as is
formatted_response = str(response)
return formatted_response
def parse_tool_schema(tool):
"""Parse tool schema to extract name, description, and parameters properly."""
name = tool.get("title", "Unnamed Tool")
description = tool.get("description", "No description available")
parameters = {}
if "properties" in tool:
for param_name, param_data in tool["properties"].items():
param_desc = param_data.get("description", "No description")
param_type = param_data.get("type", "unknown")
parameters[param_name] = f"{param_desc} (Type: {param_type})"
return name, description, parameters
def format_tool_info(tools):
"""Format tool information with improved schema parsing and dark theme support."""
if isinstance(tools, str):
try:
tools = json.loads(tools)
except:
return 'No tool information available
'
if not tools:
return 'No tool information available
'
tool_html = ""
for tool in tools:
name, description, parameters = parse_tool_schema(tool)
tool_html += f"""
{name}
{description}
{format_parameters(parameters)}
"""
return f"""
{tool_html}
"""
def format_parameters(parameters):
if not parameters:
return 'No parameters
'
params_html = ""
for name, desc in parameters.items():
params_html += f"""
"""
return params_html
def format_metrics(score, rationale, explanation):
"""Format metrics display with improved dark theme support."""
score_color = (
"var(--score-high)"
if score >= 0.7
else "var(--score-med)" if score >= 0.4 else "var(--score-low)"
)
return f"""
Explanation
{explanation}
"""
def update_chat_display(df, index):
"""Update the chat visualization with improved dark theme support."""
if df is None or df.empty or index >= len(df):
return (
'No data available
',
'No metrics available
',
'No tool information available
',
)
row = df.iloc[index]
messages = json.loads(row["conversation"])
response = row["response"]
formatted_response = format_response(response)
# Create list of all messages including the response
all_messages = [
format_chat_message(msg["role"], msg["content"]) for msg in messages
]
all_messages.append(
format_chat_message("Assistant", formatted_response, is_response=True)
)
chat_html = f"""
{"".join(all_messages)}
"""
metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"])
tool_html = format_tool_info(row["tools_langchain"])
return chat_html, metrics_html, tool_html
def filter_and_update_display(model, dataset, min_score, max_score, current_index):
try:
df_chat = get_chat_and_score_df(model, dataset)
df_chat = df_chat[
(df_chat["score"] >= min_score) & (df_chat["score"] <= max_score)
]
if df_chat.empty:
return (
'No data available for selected filters
',
'No metrics available
',
'No tool information available
',
"0/0",
)
max_index = len(df_chat) - 1
current_index = min(current_index, max_index)
chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index)
index_display = f'{current_index + 1}/{len(df_chat)}
'
return chat_html, metrics_html, tool_html, index_display
except Exception as e:
error_html = f"""
Error: {str(e)}
"""
return (
error_html,
'No metrics available
',
'No tool information available
',
"0/0",
)