import gradio as gr import pandas as pd import json def get_updated_df(df, df_output): df = df.iloc[: len(df_output)].copy() df["response"] = df_output["response"].tolist() df["rationale"] = df_output["rationale"].tolist() df["explanation"] = df_output["explanation"].tolist() df["score"] = df_output["score"].tolist() cols = [ "conversation", "tools_langchain", "n_turns", "len_query", "n_tools", "response", "rationale", "explanation", "score", ] return df[cols] def get_chat_and_score_df(model, dataset): df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet") df = pd.read_parquet(f"datasets/{dataset}.parquet") df = get_updated_df(df, df_output) return df def format_chat_message(role, content, is_response=False): """Format individual chat messages with alignment based on role.""" role_style = role.lower() alignment = "flex-end" if role_style == "user" else "flex-start" max_width = "80%" # Clean up any excessive whitespace while preserving intentional line breaks cleaned_content = "\n".join(line.strip() for line in content.split("\n")) background_color = ( "var(--response-bg)" if is_response else f"var(--message-bg-{role_style})" ) return f"""
{role + (" Response" if is_response else "")}
{cleaned_content}
""" def format_response(response): """Format the response data, handling both JSON and text.""" try: # Try to parse as JSON response_data = json.loads(response) # Format JSON response nicely formatted_response = json.dumps(response_data, indent=2) except (json.JSONDecodeError, TypeError): # If not JSON, use as is formatted_response = str(response) return formatted_response def parse_tool_schema(tool): """Parse tool schema to extract name, description, and parameters properly.""" name = tool.get("title", "Unnamed Tool") description = tool.get("description", "No description available") parameters = {} if "properties" in tool: for param_name, param_data in tool["properties"].items(): param_desc = param_data.get("description", "No description") param_type = param_data.get("type", "unknown") parameters[param_name] = f"{param_desc} (Type: {param_type})" return name, description, parameters def format_tool_info(tools): """Format tool information with improved schema parsing and dark theme support.""" if isinstance(tools, str): try: tools = json.loads(tools) except: return '
No tool information available
' if not tools: return '
No tool information available
' tool_html = "" for tool in tools: name, description, parameters = parse_tool_schema(tool) tool_html += f"""
{name}
{description}
{format_parameters(parameters)}
""" return f"""
{tool_html}
""" def format_parameters(parameters): if not parameters: return '
No parameters
' params_html = "" for name, desc in parameters.items(): params_html += f"""
{name}
{desc}
""" return params_html def format_metrics(score, rationale, explanation): """Format metrics display with improved dark theme support.""" score_color = ( "var(--score-high)" if score >= 0.7 else "var(--score-med)" if score >= 0.4 else "var(--score-low)" ) return f"""

TSQ Score

{score:.2f}

Rationale

{rationale}

Explanation

{explanation}
""" def update_chat_display(df, index): """Update the chat visualization with improved dark theme support.""" if df is None or df.empty or index >= len(df): return ( '
No data available
', '
No metrics available
', '
No tool information available
', ) row = df.iloc[index] messages = json.loads(row["conversation"]) response = row["response"] formatted_response = format_response(response) # Create list of all messages including the response all_messages = [ format_chat_message(msg["role"], msg["content"]) for msg in messages ] all_messages.append( format_chat_message("Assistant", formatted_response, is_response=True) ) chat_html = f"""
{"".join(all_messages)}
""" metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"]) tool_html = format_tool_info(row["tools_langchain"]) return chat_html, metrics_html, tool_html def filter_and_update_display(model, dataset, min_score, max_score, current_index): try: df_chat = get_chat_and_score_df(model, dataset) df_chat = df_chat[ (df_chat["score"] >= min_score) & (df_chat["score"] <= max_score) ] if df_chat.empty: return ( '
No data available for selected filters
', '
No metrics available
', '
No tool information available
', "0/0", ) max_index = len(df_chat) - 1 current_index = min(current_index, max_index) chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index) index_display = f'
{current_index + 1}/{len(df_chat)}
' return chat_html, metrics_html, tool_html, index_display except Exception as e: error_html = f"""
Error: {str(e)}
""" return ( error_html, '
No metrics available
', '
No tool information available
', "0/0", )