KNGCRIMSON's picture
readme fixed
c973dd5
import gradio as gr
import pandas as pd
import pandas as pd
from src.utils.io_utils import PROJECT_ROOT
from run_augmenter import negative_sampler , positive_sampler
from pathlib import Path
def augment_interface(factor, type_or_difficulty, use_default, csv_file=None):
"""Negative Tool Sampler: Wrapper to handle negative dataset augmentation."""
try:
if use_default:
input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv"
if not Path(input_csv_path).exists():
return "Error: Default CSV file not found!", None, gr.update(visible=False)
elif csv_file is not None:
input_csv_path = csv_file.name
else:
return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False)
augmented_df = negative_sampler(input_csv_path, factor, type_or_difficulty)
output_csv_path = "augmented_dataset.csv"
augmented_df.to_csv(output_csv_path, index=False)
return output_csv_path, augmented_df.head(), gr.update(visible=True)
except Exception as e:
return f"Error during processing: {str(e)}", None, gr.update(visible=False)
def positive_sampler_interface(use_default, csv_file=None, size=10, random=True, seed=42, full=False):
"""Positive Tool Sampler: Wrapper to handle positive dataset augmentation with additional arguments."""
try:
if use_default:
input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv"
if not Path(input_csv_path).exists():
return "Error: Default CSV file not found!", None, gr.update(visible=False)
elif csv_file is not None:
input_csv_path = csv_file.name
else:
return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False)
# Call the positive sampler function with additional arguments
augmented_df = positive_sampler(
optional_path=input_csv_path,
size=size,
random=random,
seed=seed,
full=full
)
output_csv_path = "positive_augmented_dataset.csv"
augmented_df.to_csv(output_csv_path, index=False)
return output_csv_path, augmented_df.head(), gr.update(visible=True)
except Exception as e:
return f"Error during processing: {str(e)}", None, gr.update(visible=False)
def reset_output():
"""Resets the output fields by returning None and hiding the DataFrame."""
return None, None, gr.update(visible=False)
with gr.Blocks(css=f"""
.gradio-container {{
font-family: Arial, sans-serif;
max-width: 900px;
margin: auto;
}}
h1 {{
text-align: center;
color: white;
font-size: 60px;
margin-bottom: 0px;
}}
h2 {{
text-align: center;
color: #ff0000;
font-size: 16px;
font-weight: normal;
margin-top: 0px;
}}
.title {{
text-align: center;
font-size: 40px;
margin-top: 30px;
margin-bottom: 20px;
}}
.title .positive {{
color: #ff0000;
}}
.title .negative {{
color: #ff0000;
}}
.title .tool {{
color: white;
}}
.title .sampler {{
color: #ff0000;
}}
.description {{
text-align: center;
margin-bottom: 20px;
}}
#submit-button {{
background-color: #ff0000;
color: white;
font-size: 16px;
border: none;
border-radius: 5px;
padding: 10px 20px;
}}
#reset-button {{
background-color: #d3d3d3;
color: black;
font-size: 16px;
border: none;
border-radius: 5px;
padding: 10px 20px;
}}
""") as app:
# Main Title Section
gr.Markdown("""
<h1>ENTC</h1>
<h2>Entrepreneurship and Technology Commercialization Β· EPFL</h2>
""")
# Positive Tool Sampler Section
gr.Markdown("""
<div class="title">
<span class="positive">Positive</span>
<span class="tool">Tool</span>
<span class="sampler">Sampler</span>
</div>
""")
gr.Markdown("""
<p class="description">
This tool takes a list of DOIs and augments them using the OpenAlex API.
It is designed to complement the Negative Tool Sampler, enabling the creation of complete datasets.
</p>
""")
with gr.Group():
with gr.Row():
pos_use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True)
pos_csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False)
with gr.Row():
size_input = gr.Number(label="Number of Samples", value=10, info="Specify the number of samples to generate.")
random_input = gr.Checkbox(label="Sample Randomly", value=True, info="Whether to sample randomly.")
seed_input = gr.Number(label="Random Seed", value=42, info="Random seed for reproducibility.")
full_input = gr.Checkbox(label="Full Dataset Mode", value=False, info="Indicate whether to use the full dataset.")
with gr.Group():
pos_output_file = gr.File(label="Download Augmented Dataset")
pos_dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False)
with gr.Row():
pos_submit_button = gr.Button("Submit πŸš€", elem_id="submit-button")
pos_reset_button = gr.Button("Reset πŸ”„", elem_id="reset-button")
# Button Actions
pos_submit_button.click(
positive_sampler_interface,
inputs=[pos_use_default_checkbox, pos_csv_file_input, size_input, random_input, seed_input, full_input],
outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview]
)
pos_reset_button.click(
reset_output,
inputs=[],
outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview]
)
# Toggle File Input
def toggle_pos_csv_input(use_default):
return gr.update(visible=not use_default)
pos_use_default_checkbox.change(
toggle_pos_csv_input,
inputs=[pos_use_default_checkbox],
outputs=[pos_csv_file_input]
)
# Negative Tool Sampler Section
gr.Markdown("""
<div class="title">
<span class="negative">Negative</span>
<span class="tool">Tool</span>
<span class="sampler">Sampler</span>
</div>
""")
gr.Markdown("""
<p class="description">
This tool generates datasets by creating negative samples from positive matches between preprints and articles.
Customize the difficulty and the augmentation factor to meet your needs.
</p>
""")
with gr.Group():
with gr.Row():
factor_input = gr.Number(
label="Factor (int)", value=1, info="Specify the number of negative samples per positive sample."
)
type_dropdown = gr.Dropdown(
["random", "similar topics", "overlapping authors", "random authors", "fuzzed title"],
label="Select Difficulty or Augmentation Type"
)
with gr.Row():
use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True)
csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False)
with gr.Group():
output_file = gr.File(label="Download Augmented Dataset")
dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False)
with gr.Row():
submit_button = gr.Button("Submit πŸš€", elem_id="submit-button")
reset_button = gr.Button("Reset πŸ”„", elem_id="reset-button")
# Button Actions
submit_button.click(
augment_interface,
inputs=[factor_input, type_dropdown, use_default_checkbox, csv_file_input],
outputs=[output_file, dataset_preview, dataset_preview]
)
reset_button.click(
reset_output,
inputs=[],
outputs=[output_file, dataset_preview, dataset_preview]
)
# Toggle File Input
def toggle_csv_input(use_default):
return gr.update(visible=not use_default)
use_default_checkbox.change(
toggle_csv_input,
inputs=[use_default_checkbox],
outputs=[csv_file_input]
)
# Launch the app
if __name__ == "__main__":
app.launch()