Spaces:

KNGCRIMSON
/

MatchPrePrintArticles

Sleeping

App Files Files Community

MatchPrePrintArticles / app.py

KNGCRIMSON

readme fixed

c973dd5 3 months ago

raw

history blame contribute delete

8.68 kB

	import gradio as gr
	import pandas as pd
	import pandas as pd
	from src.utils.io_utils import PROJECT_ROOT
	from run_augmenter import negative_sampler , positive_sampler
	from pathlib import Path

	def augment_interface(factor, type_or_difficulty, use_default, csv_file=None):
	"""Negative Tool Sampler: Wrapper to handle negative dataset augmentation."""
	try:
	if use_default:
	input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv"
	if not Path(input_csv_path).exists():
	return "Error: Default CSV file not found!", None, gr.update(visible=False)
	elif csv_file is not None:
	input_csv_path = csv_file.name
	else:
	return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False)

	augmented_df = negative_sampler(input_csv_path, factor, type_or_difficulty)
	output_csv_path = "augmented_dataset.csv"
	augmented_df.to_csv(output_csv_path, index=False)

	return output_csv_path, augmented_df.head(), gr.update(visible=True)

	except Exception as e:
	return f"Error during processing: {str(e)}", None, gr.update(visible=False)


	def positive_sampler_interface(use_default, csv_file=None, size=10, random=True, seed=42, full=False):
	"""Positive Tool Sampler: Wrapper to handle positive dataset augmentation with additional arguments."""
	try:
	if use_default:
	input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv"
	if not Path(input_csv_path).exists():
	return "Error: Default CSV file not found!", None, gr.update(visible=False)
	elif csv_file is not None:
	input_csv_path = csv_file.name
	else:
	return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False)

	# Call the positive sampler function with additional arguments
	augmented_df = positive_sampler(
	optional_path=input_csv_path,
	size=size,
	random=random,
	seed=seed,
	full=full
	)
	output_csv_path = "positive_augmented_dataset.csv"
	augmented_df.to_csv(output_csv_path, index=False)

	return output_csv_path, augmented_df.head(), gr.update(visible=True)

	except Exception as e:
	return f"Error during processing: {str(e)}", None, gr.update(visible=False)


	def reset_output():
	"""Resets the output fields by returning None and hiding the DataFrame."""
	return None, None, gr.update(visible=False)

	with gr.Blocks(css=f"""
	.gradio-container {{
	font-family: Arial, sans-serif;
	max-width: 900px;
	margin: auto;
	}}
	h1 {{
	text-align: center;
	color: white;
	font-size: 60px;
	margin-bottom: 0px;
	}}
	h2 {{
	text-align: center;
	color: #ff0000;
	font-size: 16px;
	font-weight: normal;
	margin-top: 0px;
	}}
	.title {{
	text-align: center;
	font-size: 40px;
	margin-top: 30px;
	margin-bottom: 20px;
	}}
	.title .positive {{
	color: #ff0000;
	}}
	.title .negative {{
	color: #ff0000;
	}}
	.title .tool {{
	color: white;
	}}
	.title .sampler {{
	color: #ff0000;
	}}
	.description {{
	text-align: center;
	margin-bottom: 20px;
	}}
	#submit-button {{
	background-color: #ff0000;
	color: white;
	font-size: 16px;
	border: none;
	border-radius: 5px;
	padding: 10px 20px;
	}}
	#reset-button {{
	background-color: #d3d3d3;
	color: black;
	font-size: 16px;
	border: none;
	border-radius: 5px;
	padding: 10px 20px;
	}}
	""") as app:
	# Main Title Section
	gr.Markdown("""
	<h1>ENTC</h1>
	<h2>Entrepreneurship and Technology Commercialization · EPFL</h2>
	""")

	# Positive Tool Sampler Section
	gr.Markdown("""
	<div class="title">
	<span class="positive">Positive</span>
	<span class="tool">Tool</span>
	<span class="sampler">Sampler</span>
	</div>
	""")

	gr.Markdown("""
	<p class="description">
	This tool takes a list of DOIs and augments them using the OpenAlex API.
	It is designed to complement the Negative Tool Sampler, enabling the creation of complete datasets.
	</p>
	""")

	with gr.Group():
	with gr.Row():
	pos_use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True)
	pos_csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False)

	with gr.Row():
	size_input = gr.Number(label="Number of Samples", value=10, info="Specify the number of samples to generate.")
	random_input = gr.Checkbox(label="Sample Randomly", value=True, info="Whether to sample randomly.")
	seed_input = gr.Number(label="Random Seed", value=42, info="Random seed for reproducibility.")
	full_input = gr.Checkbox(label="Full Dataset Mode", value=False, info="Indicate whether to use the full dataset.")

	with gr.Group():
	pos_output_file = gr.File(label="Download Augmented Dataset")
	pos_dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False)
	with gr.Row():
	pos_submit_button = gr.Button("Submit 🚀", elem_id="submit-button")
	pos_reset_button = gr.Button("Reset 🔄", elem_id="reset-button")

	# Button Actions
	pos_submit_button.click(
	positive_sampler_interface,
	inputs=[pos_use_default_checkbox, pos_csv_file_input, size_input, random_input, seed_input, full_input],
	outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview]
	)

	pos_reset_button.click(
	reset_output,
	inputs=[],
	outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview]
	)

	# Toggle File Input
	def toggle_pos_csv_input(use_default):
	return gr.update(visible=not use_default)

	pos_use_default_checkbox.change(
	toggle_pos_csv_input,
	inputs=[pos_use_default_checkbox],
	outputs=[pos_csv_file_input]
	)

	# Negative Tool Sampler Section
	gr.Markdown("""
	<div class="title">
	<span class="negative">Negative</span>
	<span class="tool">Tool</span>
	<span class="sampler">Sampler</span>
	</div>
	""")

	gr.Markdown("""
	<p class="description">
	This tool generates datasets by creating negative samples from positive matches between preprints and articles.
	Customize the difficulty and the augmentation factor to meet your needs.
	</p>
	""")

	with gr.Group():
	with gr.Row():
	factor_input = gr.Number(
	label="Factor (int)", value=1, info="Specify the number of negative samples per positive sample."
	)
	type_dropdown = gr.Dropdown(
	["random", "similar topics", "overlapping authors", "random authors", "fuzzed title"],
	label="Select Difficulty or Augmentation Type"
	)
	with gr.Row():
	use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True)
	csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False)

	with gr.Group():
	output_file = gr.File(label="Download Augmented Dataset")
	dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False)
	with gr.Row():
	submit_button = gr.Button("Submit 🚀", elem_id="submit-button")
	reset_button = gr.Button("Reset 🔄", elem_id="reset-button")

	# Button Actions
	submit_button.click(
	augment_interface,
	inputs=[factor_input, type_dropdown, use_default_checkbox, csv_file_input],
	outputs=[output_file, dataset_preview, dataset_preview]
	)

	reset_button.click(
	reset_output,
	inputs=[],
	outputs=[output_file, dataset_preview, dataset_preview]
	)

	# Toggle File Input
	def toggle_csv_input(use_default):
	return gr.update(visible=not use_default)

	use_default_checkbox.change(
	toggle_csv_input,
	inputs=[use_default_checkbox],
	outputs=[csv_file_input]
	)

	# Launch the app
	if __name__ == "__main__":
	app.launch()