update app.py
Browse files- app.py +51 -6
- awesome-japanese-nlp-resources-search.json +0 -0
app.py
CHANGED
@@ -38,6 +38,7 @@ def convert_to_dataframe():
|
|
38 |
"latest_commit",
|
39 |
"languages",
|
40 |
"model_or_dataset",
|
|
|
41 |
]
|
42 |
]
|
43 |
df = df.sort_values(by="score", ascending=False)
|
@@ -57,6 +58,7 @@ def convert_to_dataframe():
|
|
57 |
stopwords = dataset["nagisa_stopwords"]["words"]
|
58 |
|
59 |
def tokenize_description(description):
|
|
|
60 |
tokens = nagisa.filter(description, filter_postags=["ε©θ©", "ε©εθ©"])
|
61 |
words = tokens.words
|
62 |
words = [word for word in words if len(word.strip()) > 0]
|
@@ -92,9 +94,10 @@ def main():
|
|
92 |
|
93 |
query = st.text_input(label="Search keyword")
|
94 |
|
95 |
-
source_type = ["GitHub", "Hugging Face"]
|
|
|
96 |
selected_source_type = st.selectbox(
|
97 |
-
"Choose a source type:
|
98 |
)
|
99 |
|
100 |
# Filtering GitHub or Hugging Face
|
@@ -141,6 +144,16 @@ def main():
|
|
141 |
value=(min_downloads, max_downloads),
|
142 |
)
|
143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
min_activity_period = int(df["activity_period"].min())
|
145 |
max_activity_period = int(df["activity_period"].max())
|
146 |
|
@@ -177,10 +190,23 @@ def main():
|
|
177 |
& (df["stargazers_count"] <= stars_range[1])
|
178 |
]
|
179 |
else:
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
df = df[
|
186 |
(df["activity_period"] >= activity_period_range[0])
|
@@ -275,6 +301,25 @@ def main():
|
|
275 |
|
276 |
st.markdown("### Language Usage Table")
|
277 |
st.dataframe(language_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
with col2:
|
280 |
if selected_source_type == "GitHub":
|
|
|
38 |
"latest_commit",
|
39 |
"languages",
|
40 |
"model_or_dataset",
|
41 |
+
"model_size",
|
42 |
]
|
43 |
]
|
44 |
df = df.sort_values(by="score", ascending=False)
|
|
|
58 |
stopwords = dataset["nagisa_stopwords"]["words"]
|
59 |
|
60 |
def tokenize_description(description):
|
61 |
+
description = description.lower()
|
62 |
tokens = nagisa.filter(description, filter_postags=["ε©θ©", "ε©εθ©"])
|
63 |
words = tokens.words
|
64 |
words = [word for word in words if len(word.strip()) > 0]
|
|
|
94 |
|
95 |
query = st.text_input(label="Search keyword")
|
96 |
|
97 |
+
# source_type = ["GitHub", "Hugging Face"]
|
98 |
+
source_type = ["Hugging Face", "GitHub"]
|
99 |
selected_source_type = st.selectbox(
|
100 |
+
"Choose a source type: Hugging Face or GitHub", source_type
|
101 |
)
|
102 |
|
103 |
# Filtering GitHub or Hugging Face
|
|
|
144 |
value=(min_downloads, max_downloads),
|
145 |
)
|
146 |
|
147 |
+
min_model_size = int(df["model_size"].min())
|
148 |
+
max_model_size = int(df["model_size"].max())
|
149 |
+
|
150 |
+
model_size_range = st.slider(
|
151 |
+
"Choose the range for the model size (billion)",
|
152 |
+
min_value=min_model_size,
|
153 |
+
max_value=max_model_size,
|
154 |
+
value=(min_model_size, max_model_size),
|
155 |
+
)
|
156 |
+
|
157 |
min_activity_period = int(df["activity_period"].min())
|
158 |
max_activity_period = int(df["activity_period"].max())
|
159 |
|
|
|
190 |
& (df["stargazers_count"] <= stars_range[1])
|
191 |
]
|
192 |
else:
|
193 |
+
if (
|
194 |
+
downloads_range[0] > min_downloads
|
195 |
+
or downloads_range[1] < max_downloads
|
196 |
+
):
|
197 |
+
df = df[
|
198 |
+
(df["downloads"] >= downloads_range[0])
|
199 |
+
& (df["downloads"] <= downloads_range[1])
|
200 |
+
]
|
201 |
+
|
202 |
+
if (
|
203 |
+
model_size_range[0] > min_model_size
|
204 |
+
or model_size_range[1] < max_model_size
|
205 |
+
):
|
206 |
+
df = df[
|
207 |
+
(df["model_size"] >= model_size_range[0])
|
208 |
+
& (df["model_size"] <= model_size_range[1])
|
209 |
+
]
|
210 |
|
211 |
df = df[
|
212 |
(df["activity_period"] >= activity_period_range[0])
|
|
|
301 |
|
302 |
st.markdown("### Language Usage Table")
|
303 |
st.dataframe(language_df)
|
304 |
+
else:
|
305 |
+
st.markdown("### Model size vs downloads")
|
306 |
+
chart = (
|
307 |
+
alt.Chart(df)
|
308 |
+
.mark_circle(size=60)
|
309 |
+
.encode(
|
310 |
+
x="model_size",
|
311 |
+
y="downloads",
|
312 |
+
tooltip=["project_name", "model_size", "downloads"],
|
313 |
+
)
|
314 |
+
.properties(
|
315 |
+
title=(
|
316 |
+
"Relationship between model size (Billion) and"
|
317 |
+
" downloads"
|
318 |
+
),
|
319 |
+
)
|
320 |
+
.interactive()
|
321 |
+
)
|
322 |
+
st.altair_chart(chart, use_container_width=True)
|
323 |
|
324 |
with col2:
|
325 |
if selected_source_type == "GitHub":
|
awesome-japanese-nlp-resources-search.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|