import json from datetime import datetime import gradio as gr import pandas as pd def read_json(file_name): with open(file_name, "r") as f: json_data = json.load(f) return json_data def truncate_text(text, max_length=40): if len(text) > max_length: return text[: max_length - 1] + "…" else: return text json_file = "awesome-japanese-nlp-resources-search.json" json_data = read_json(json_file) data = { "project_name": [], "downloads": [], "stars": [], "description": [], "first_commit": [], "latest_commit": [], "source": [], "languages": [], "type": [], } for data_json in json_data: url = data_json["url"] description = data_json["description"].lower() project_name = data_json["project_name"] source = data_json["source"] languages = data_json["languages"] repo_type = data_json["model_or_dataset"] first_commit = data_json["first_commit"] if first_commit: first_commit = datetime.strptime(first_commit, "%Y-%m-%d %H:%M:%S") first_commit = first_commit.date() latest_commit = data_json["latest_commit"] if latest_commit: latest_commit = datetime.strptime(latest_commit, "%Y-%m-%d %H:%M:%S") latest_commit = latest_commit.date() if "stargazers_count" in data_json: data["stars"].append(data_json["stargazers_count"]) else: data["stars"].append(None) if "downloads" in data_json: data["downloads"].append(data_json["downloads"]) else: data["downloads"].append(None) data["project_name"].append(f"[{truncate_text(project_name)}]({url})") data["source"].append(source) data["description"].append(description) data["languages"].append(languages) data["type"].append(repo_type) data["first_commit"].append(first_commit) data["latest_commit"].append(latest_commit) data = pd.DataFrame(data) def show_search_results( language_filter, queries, source_checkbox, show_checkbox ): queries = queries.lower() queries = queries.split() df_search = data if language_filter: def contains_language(language_list, filter_lang): return filter_lang in language_list matches = df_search["languages"].apply( contains_language, filter_lang=language_filter ) df_search = df_search[matches] # source_checkbox if "GitHub" not in source_checkbox: df_search = df_search[df_search["source"] != "GitHub"] df_search = df_search.drop("stars", axis=1) if "Hugging Face" not in source_checkbox: df_search = df_search[df_search["source"] != "Hugging Face"] df_search = df_search.drop("downloads", axis=1) if "Dataset" in source_checkbox: df_search = df_search[df_search["type"] == "dataset"] if "Model" in source_checkbox: df_search = df_search[df_search["type"] == "model"] # show_checkbox if "project_name" not in show_checkbox: df_search = df_search.drop("project_name", axis=1) if "downloads" not in show_checkbox: df_search = df_search.drop("downloads", axis=1) if "stars" not in show_checkbox: df_search = df_search.drop("stars", axis=1) if "first_commit" not in show_checkbox: df_search = df_search.drop("first_commit", axis=1) if "latest_commit" not in show_checkbox: df_search = df_search.drop("latest_commit", axis=1) if "description" not in show_checkbox: df_search = df_search.drop("description", axis=1) if "source" not in show_checkbox: df_search = df_search.drop("source", axis=1) if "languages" not in show_checkbox: df_search = df_search.drop("languages", axis=1) if "type" not in show_checkbox: df_search = df_search.drop("type", axis=1) for query in queries: contained_description = data["description"].str.contains(query) contained_project_name = data["project_name"].str.contains(query) df_search = df_search[contained_description | contained_project_name] return df_search with gr.Blocks() as demo: gr.Markdown( """ # Awesome Japanese NLP resources search 🔎 You can search for open-source software from [1250+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources). """ ) query = gr.Textbox(label="Search words", placeholder="llm") with gr.Row(): languages = [ "Python", "Jupyter Notebook", "Java", "C++", "JavaScript", "TypeScript", "C#", "Rust", "Go", "C", "Kotlin", "Ruby", "Perl", ] language_selector = gr.Dropdown( label="Programming Language", choices=languages, ) source_checkbox = gr.CheckboxGroup( ["GitHub", "Hugging Face", "Dataset", "Model"], value=["GitHub", "Hugging Face"], label="Source", ) show_checkbox = gr.CheckboxGroup( [ "project_name", "downloads", "stars", "description", "first_commit", "latest_commit", "source", "type", "languages", ], value=[ "project_name", "downloads", "stars", "description", ], label="Display columns in a table", ) df = gr.DataFrame( value=data, type="pandas", datatype="markdown", height=600, ) query.change( fn=show_search_results, inputs=[ language_selector, query, source_checkbox, show_checkbox, ], outputs=df, ) language_selector.change( fn=show_search_results, inputs=[ language_selector, query, source_checkbox, show_checkbox, ], outputs=df, ) source_checkbox.change( fn=show_search_results, inputs=[ language_selector, query, source_checkbox, show_checkbox, ], outputs=df, ) show_checkbox.change( fn=show_search_results, inputs=[ language_selector, query, source_checkbox, show_checkbox, ], outputs=df, ) demo.launch()