File size: 4,940 Bytes
3c48254 1352e18 de9e814 3c48254 1352e18 3c48254 1352e18 39e83f7 3c48254 39e83f7 3c48254 1352e18 ea60d34 1352e18 3af9af7 5627e88 f6555fb 15420a6 f6555fb 3af9af7 f6555fb 15420a6 f6555fb 3af9af7 5627e88 f6555fb 5627e88 f6555fb 5627e88 15420a6 5627e88 de9e814 1352e18 39e83f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
import pandas as pd
import logging
import re
from task_visualizations import TaskVisualizations
import plotly.graph_objects as go
logging.basicConfig(level=logging.INFO)
class AppConfig:
repo_representations_path = "data/repo_representations.jsonl"
task_counts_path = "data/repos_task_counts.csv"
selected_task_counts_path = "data/selected_repos_task_counts.csv"
tasks_path = "data/paperswithcode_tasks.csv"
def load_repo_df(repo_representations_path):
data = pd.read_json(repo_representations_path, lines=True, orient="records")
return data.assign(
text=data["text"]
.str.replace(r"<img.*\/>", "", regex=True)
.str.replace("│", "\n")
.str.replace("⋮", "\n")
)
def display_representations(repo, representation1, representation2):
repo_data = repos_df[repos_df["repo_name"] == repo]
logging.info(f"repo_data: {repo_data}")
text1 = (
repo_data[repo_data["representation"] == representation1]["text"].iloc[0]
if not repo_data[repo_data["representation"] == representation1].empty
else "No data available"
)
text2 = (
repo_data[repo_data["representation"] == representation2]["text"].iloc[0]
if not repo_data[repo_data["representation"] == representation2].empty
else "No data available"
)
return text1, text2
def setup_repository_representations_tab(repos, representation_types):
gr.Markdown("Select a repository and two representation types to compare them.")
with gr.Row():
repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
representation1 = gr.Dropdown(
choices=representation_types, label="Representation 1", value="readme"
)
representation2 = gr.Dropdown(
choices=representation_types,
label="Representation 2",
value="generated_readme",
)
with gr.Row():
with gr.Column(
elem_id="column1",
variant="panel",
scale=1,
min_width=300,
):
text1 = gr.Markdown()
with gr.Column(
elem_id="column2",
variant="panel",
scale=1,
min_width=300,
):
text2 = gr.Markdown()
def update_representations(repo, representation1, representation2):
text1_content, text2_content = display_representations(
repo, representation1, representation2
)
return (
f"### Representation 1: {representation1}\n\n{text1_content}",
f"### Representation 2: {representation2}\n\n{text2_content}",
)
# Initial call to populate textboxes with default values
text1.value, text2.value = update_representations(
repos[0], "readme", "generated_readme"
)
for component in [repo, representation1, representation2]:
component.change(
fn=update_representations,
inputs=[repo, representation1, representation2],
outputs=[text1, text2],
)
## main
repos_df = load_repo_df(AppConfig.repo_representations_path)
repos = list(repos_df["repo_name"].unique())
representation_types = list(repos_df["representation"].unique())
logging.info(f"found {len(repos)} repositories")
logging.info(f"representation types: {representation_types}")
task_visualizations = TaskVisualizations(
AppConfig.task_counts_path,
AppConfig.selected_task_counts_path,
AppConfig.tasks_path,
)
with gr.Blocks() as demo:
with gr.Tab("Explore Repository Representations"):
setup_repository_representations_tab(repos, representation_types)
with gr.Tab("Explore PapersWithCode Tasks"):
task_counts_description = """
## PapersWithCode Tasks Visualization
PapersWithCode tasks are grouped by area.
""".strip()
gr.Markdown(task_counts_description)
with gr.Row():
min_task_counts_slider_all = gr.Slider(
minimum=10,
maximum=1000,
value=100,
step=10,
label="Minimum Task Count (All Repositories)",
)
min_task_counts_slider_selected = gr.Slider(
minimum=10,
maximum=1000,
value=100,
step=10,
label="Minimum Task Count (Selected Repositories)",
)
update_button = gr.Button("Update Plots")
with gr.Row("Task Counts"):
all_repos_tasks_plot = gr.Plot(label="All Repositories")
selected_repos_tasks_plot = gr.Plot(label="Selected Repositories")
update_button.click(
fn=task_visualizations.get_tasks_sunbursts,
inputs=[min_task_counts_slider_all, min_task_counts_slider_selected],
outputs=[all_repos_tasks_plot, selected_repos_tasks_plot],
)
demo.launch()
|