taishi-i commited on
Commit
62ea5f3
β€’
1 Parent(s): b9ed42e

update app.py

Browse files
app.py CHANGED
@@ -38,6 +38,7 @@ def convert_to_dataframe():
38
  "latest_commit",
39
  "languages",
40
  "model_or_dataset",
 
41
  ]
42
  ]
43
  df = df.sort_values(by="score", ascending=False)
@@ -57,6 +58,7 @@ def convert_to_dataframe():
57
  stopwords = dataset["nagisa_stopwords"]["words"]
58
 
59
  def tokenize_description(description):
 
60
  tokens = nagisa.filter(description, filter_postags=["助詞", "εŠ©ε‹•θ©ž"])
61
  words = tokens.words
62
  words = [word for word in words if len(word.strip()) > 0]
@@ -92,9 +94,10 @@ def main():
92
 
93
  query = st.text_input(label="Search keyword")
94
 
95
- source_type = ["GitHub", "Hugging Face"]
 
96
  selected_source_type = st.selectbox(
97
- "Choose a source type: GitHub or Hugging Face", source_type
98
  )
99
 
100
  # Filtering GitHub or Hugging Face
@@ -141,6 +144,16 @@ def main():
141
  value=(min_downloads, max_downloads),
142
  )
143
 
 
 
 
 
 
 
 
 
 
 
144
  min_activity_period = int(df["activity_period"].min())
145
  max_activity_period = int(df["activity_period"].max())
146
 
@@ -177,10 +190,23 @@ def main():
177
  & (df["stargazers_count"] <= stars_range[1])
178
  ]
179
  else:
180
- df = df[
181
- (df["downloads"] >= downloads_range[0])
182
- & (df["downloads"] <= downloads_range[1])
183
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  df = df[
186
  (df["activity_period"] >= activity_period_range[0])
@@ -275,6 +301,25 @@ def main():
275
 
276
  st.markdown("### Language Usage Table")
277
  st.dataframe(language_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
  with col2:
280
  if selected_source_type == "GitHub":
 
38
  "latest_commit",
39
  "languages",
40
  "model_or_dataset",
41
+ "model_size",
42
  ]
43
  ]
44
  df = df.sort_values(by="score", ascending=False)
 
58
  stopwords = dataset["nagisa_stopwords"]["words"]
59
 
60
  def tokenize_description(description):
61
+ description = description.lower()
62
  tokens = nagisa.filter(description, filter_postags=["助詞", "εŠ©ε‹•θ©ž"])
63
  words = tokens.words
64
  words = [word for word in words if len(word.strip()) > 0]
 
94
 
95
  query = st.text_input(label="Search keyword")
96
 
97
+ # source_type = ["GitHub", "Hugging Face"]
98
+ source_type = ["Hugging Face", "GitHub"]
99
  selected_source_type = st.selectbox(
100
+ "Choose a source type: Hugging Face or GitHub", source_type
101
  )
102
 
103
  # Filtering GitHub or Hugging Face
 
144
  value=(min_downloads, max_downloads),
145
  )
146
 
147
+ min_model_size = int(df["model_size"].min())
148
+ max_model_size = int(df["model_size"].max())
149
+
150
+ model_size_range = st.slider(
151
+ "Choose the range for the model size (billion)",
152
+ min_value=min_model_size,
153
+ max_value=max_model_size,
154
+ value=(min_model_size, max_model_size),
155
+ )
156
+
157
  min_activity_period = int(df["activity_period"].min())
158
  max_activity_period = int(df["activity_period"].max())
159
 
 
190
  & (df["stargazers_count"] <= stars_range[1])
191
  ]
192
  else:
193
+ if (
194
+ downloads_range[0] > min_downloads
195
+ or downloads_range[1] < max_downloads
196
+ ):
197
+ df = df[
198
+ (df["downloads"] >= downloads_range[0])
199
+ & (df["downloads"] <= downloads_range[1])
200
+ ]
201
+
202
+ if (
203
+ model_size_range[0] > min_model_size
204
+ or model_size_range[1] < max_model_size
205
+ ):
206
+ df = df[
207
+ (df["model_size"] >= model_size_range[0])
208
+ & (df["model_size"] <= model_size_range[1])
209
+ ]
210
 
211
  df = df[
212
  (df["activity_period"] >= activity_period_range[0])
 
301
 
302
  st.markdown("### Language Usage Table")
303
  st.dataframe(language_df)
304
+ else:
305
+ st.markdown("### Model size vs downloads")
306
+ chart = (
307
+ alt.Chart(df)
308
+ .mark_circle(size=60)
309
+ .encode(
310
+ x="model_size",
311
+ y="downloads",
312
+ tooltip=["project_name", "model_size", "downloads"],
313
+ )
314
+ .properties(
315
+ title=(
316
+ "Relationship between model size (Billion) and"
317
+ " downloads"
318
+ ),
319
+ )
320
+ .interactive()
321
+ )
322
+ st.altair_chart(chart, use_container_width=True)
323
 
324
  with col2:
325
  if selected_source_type == "GitHub":
awesome-japanese-nlp-resources-search.json CHANGED
The diff for this file is too large to render. See raw diff