Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import pandas as pd | |
from src.utils.io_utils import PROJECT_ROOT | |
from run_augmenter import negative_sampler , positive_sampler | |
from pathlib import Path | |
def augment_interface(factor, type_or_difficulty, use_default, csv_file=None): | |
"""Negative Tool Sampler: Wrapper to handle negative dataset augmentation.""" | |
try: | |
if use_default: | |
input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv" | |
if not Path(input_csv_path).exists(): | |
return "Error: Default CSV file not found!", None, gr.update(visible=False) | |
elif csv_file is not None: | |
input_csv_path = csv_file.name | |
else: | |
return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False) | |
augmented_df = negative_sampler(input_csv_path, factor, type_or_difficulty) | |
output_csv_path = "augmented_dataset.csv" | |
augmented_df.to_csv(output_csv_path, index=False) | |
return output_csv_path, augmented_df.head(), gr.update(visible=True) | |
except Exception as e: | |
return f"Error during processing: {str(e)}", None, gr.update(visible=False) | |
def positive_sampler_interface(use_default, csv_file=None, size=10, random=True, seed=42, full=False): | |
"""Positive Tool Sampler: Wrapper to handle positive dataset augmentation with additional arguments.""" | |
try: | |
if use_default: | |
input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv" | |
if not Path(input_csv_path).exists(): | |
return "Error: Default CSV file not found!", None, gr.update(visible=False) | |
elif csv_file is not None: | |
input_csv_path = csv_file.name | |
else: | |
return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False) | |
# Call the positive sampler function with additional arguments | |
augmented_df = positive_sampler( | |
optional_path=input_csv_path, | |
size=size, | |
random=random, | |
seed=seed, | |
full=full | |
) | |
output_csv_path = "positive_augmented_dataset.csv" | |
augmented_df.to_csv(output_csv_path, index=False) | |
return output_csv_path, augmented_df.head(), gr.update(visible=True) | |
except Exception as e: | |
return f"Error during processing: {str(e)}", None, gr.update(visible=False) | |
def reset_output(): | |
"""Resets the output fields by returning None and hiding the DataFrame.""" | |
return None, None, gr.update(visible=False) | |
with gr.Blocks(css=f""" | |
.gradio-container {{ | |
font-family: Arial, sans-serif; | |
max-width: 900px; | |
margin: auto; | |
}} | |
h1 {{ | |
text-align: center; | |
color: white; | |
font-size: 60px; | |
margin-bottom: 0px; | |
}} | |
h2 {{ | |
text-align: center; | |
color: #ff0000; | |
font-size: 16px; | |
font-weight: normal; | |
margin-top: 0px; | |
}} | |
.title {{ | |
text-align: center; | |
font-size: 40px; | |
margin-top: 30px; | |
margin-bottom: 20px; | |
}} | |
.title .positive {{ | |
color: #ff0000; | |
}} | |
.title .negative {{ | |
color: #ff0000; | |
}} | |
.title .tool {{ | |
color: white; | |
}} | |
.title .sampler {{ | |
color: #ff0000; | |
}} | |
.description {{ | |
text-align: center; | |
margin-bottom: 20px; | |
}} | |
#submit-button {{ | |
background-color: #ff0000; | |
color: white; | |
font-size: 16px; | |
border: none; | |
border-radius: 5px; | |
padding: 10px 20px; | |
}} | |
#reset-button {{ | |
background-color: #d3d3d3; | |
color: black; | |
font-size: 16px; | |
border: none; | |
border-radius: 5px; | |
padding: 10px 20px; | |
}} | |
""") as app: | |
# Main Title Section | |
gr.Markdown(""" | |
<h1>ENTC</h1> | |
<h2>Entrepreneurship and Technology Commercialization Β· EPFL</h2> | |
""") | |
# Positive Tool Sampler Section | |
gr.Markdown(""" | |
<div class="title"> | |
<span class="positive">Positive</span> | |
<span class="tool">Tool</span> | |
<span class="sampler">Sampler</span> | |
</div> | |
""") | |
gr.Markdown(""" | |
<p class="description"> | |
This tool takes a list of DOIs and augments them using the OpenAlex API. | |
It is designed to complement the Negative Tool Sampler, enabling the creation of complete datasets. | |
</p> | |
""") | |
with gr.Group(): | |
with gr.Row(): | |
pos_use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True) | |
pos_csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False) | |
with gr.Row(): | |
size_input = gr.Number(label="Number of Samples", value=10, info="Specify the number of samples to generate.") | |
random_input = gr.Checkbox(label="Sample Randomly", value=True, info="Whether to sample randomly.") | |
seed_input = gr.Number(label="Random Seed", value=42, info="Random seed for reproducibility.") | |
full_input = gr.Checkbox(label="Full Dataset Mode", value=False, info="Indicate whether to use the full dataset.") | |
with gr.Group(): | |
pos_output_file = gr.File(label="Download Augmented Dataset") | |
pos_dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False) | |
with gr.Row(): | |
pos_submit_button = gr.Button("Submit π", elem_id="submit-button") | |
pos_reset_button = gr.Button("Reset π", elem_id="reset-button") | |
# Button Actions | |
pos_submit_button.click( | |
positive_sampler_interface, | |
inputs=[pos_use_default_checkbox, pos_csv_file_input, size_input, random_input, seed_input, full_input], | |
outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview] | |
) | |
pos_reset_button.click( | |
reset_output, | |
inputs=[], | |
outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview] | |
) | |
# Toggle File Input | |
def toggle_pos_csv_input(use_default): | |
return gr.update(visible=not use_default) | |
pos_use_default_checkbox.change( | |
toggle_pos_csv_input, | |
inputs=[pos_use_default_checkbox], | |
outputs=[pos_csv_file_input] | |
) | |
# Negative Tool Sampler Section | |
gr.Markdown(""" | |
<div class="title"> | |
<span class="negative">Negative</span> | |
<span class="tool">Tool</span> | |
<span class="sampler">Sampler</span> | |
</div> | |
""") | |
gr.Markdown(""" | |
<p class="description"> | |
This tool generates datasets by creating negative samples from positive matches between preprints and articles. | |
Customize the difficulty and the augmentation factor to meet your needs. | |
</p> | |
""") | |
with gr.Group(): | |
with gr.Row(): | |
factor_input = gr.Number( | |
label="Factor (int)", value=1, info="Specify the number of negative samples per positive sample." | |
) | |
type_dropdown = gr.Dropdown( | |
["random", "similar topics", "overlapping authors", "random authors", "fuzzed title"], | |
label="Select Difficulty or Augmentation Type" | |
) | |
with gr.Row(): | |
use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True) | |
csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False) | |
with gr.Group(): | |
output_file = gr.File(label="Download Augmented Dataset") | |
dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False) | |
with gr.Row(): | |
submit_button = gr.Button("Submit π", elem_id="submit-button") | |
reset_button = gr.Button("Reset π", elem_id="reset-button") | |
# Button Actions | |
submit_button.click( | |
augment_interface, | |
inputs=[factor_input, type_dropdown, use_default_checkbox, csv_file_input], | |
outputs=[output_file, dataset_preview, dataset_preview] | |
) | |
reset_button.click( | |
reset_output, | |
inputs=[], | |
outputs=[output_file, dataset_preview, dataset_preview] | |
) | |
# Toggle File Input | |
def toggle_csv_input(use_default): | |
return gr.update(visible=not use_default) | |
use_default_checkbox.change( | |
toggle_csv_input, | |
inputs=[use_default_checkbox], | |
outputs=[csv_file_input] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
app.launch() | |