nielsr HF staff commited on
Commit
9325c4d
·
1 Parent(s): 70d5ad7

More improvements

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -2,10 +2,10 @@ from datetime import datetime
2
 
3
  import streamlit as st
4
  import pandas as pd
 
5
  import matplotlib.pyplot as plt
6
 
7
  # from load_dataframe import get_data
8
- from urllib.parse import quote
9
 
10
 
11
  def aggregated_data(df, aggregation_level="week"):
@@ -25,10 +25,17 @@ def aggregated_data(df, aggregation_level="week"):
25
 
26
  # Calculate the growth rate
27
  growth_rate = percentage_papers_with_artifacts.pct_change() * 100
 
 
 
 
 
28
 
29
- # Display the latest growth rate as a big number
30
- latest_growth_rate = growth_rate.iloc[-1] if not growth_rate.empty else 0
31
- st.metric(label=f"{aggregation_level.capitalize()}ly Growth Rate", value=f"{latest_growth_rate:.2f}%")
 
 
32
 
33
  # Create the plot
34
  plt.figure(figsize=(12, 6))
@@ -53,8 +60,9 @@ def display_data(df):
53
  percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
54
  percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
55
 
56
- # add reached out column
57
  df['reached_out'] = [False for _ in range(df.shape[0])]
 
58
 
59
  st.markdown(f"""
60
  ## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
@@ -67,7 +75,7 @@ def display_data(df):
67
  st.write("Papers with at least one artifact")
68
  st.data_editor(df[df['has_artifact']],
69
  hide_index=True,
70
- column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
71
  column_config={"github": st.column_config.LinkColumn(),
72
  "paper_page": st.column_config.LinkColumn(),
73
  "paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
@@ -77,7 +85,7 @@ def display_data(df):
77
  st.write("Papers without artifacts")
78
  st.data_editor(df[~df['has_artifact']],
79
  hide_index=True,
80
- column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
81
  column_config={"github": st.column_config.LinkColumn(),
82
  "paper_page": st.column_config.LinkColumn()},
83
  width=2000,
@@ -86,7 +94,7 @@ def display_data(df):
86
  st.write("Papers with a HF mention in README but no artifacts")
87
  st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
88
  hide_index=True,
89
- column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
90
  column_config={"github": st.column_config.LinkColumn(),
91
  "paper_page": st.column_config.LinkColumn()},
92
  width=2000,
@@ -109,9 +117,6 @@ def main():
109
  df.index = pd.to_datetime(df.index)
110
  df = df.sort_index()
111
 
112
- # hack: include title in URL column
113
- df['updated_url'] = df.apply(lambda row: f'{row["paper_page"]}/title/{quote(row["title"])}', axis=1)
114
-
115
  if selection == "Daily/weekly/monthly data":
116
  # Button to select day, month or week
117
  # Add streamlit selectbox.
@@ -123,8 +128,6 @@ def main():
123
  # convert to the day of a Pandas Timestamp
124
  day = pd.Timestamp(day)
125
 
126
- print("Day:", day)
127
-
128
  df = df[df.index.date == day.date()]
129
 
130
  st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
@@ -173,9 +176,6 @@ def main():
173
  else:
174
  st.write("Error: selection not recognized")
175
 
176
- # Display data based on aggregation level
177
-
178
-
179
 
180
  if __name__ == "__main__":
181
  main()
 
2
 
3
  import streamlit as st
4
  import pandas as pd
5
+ import numpy as np
6
  import matplotlib.pyplot as plt
7
 
8
  # from load_dataframe import get_data
 
9
 
10
 
11
  def aggregated_data(df, aggregation_level="week"):
 
25
 
26
  # Calculate the growth rate
27
  growth_rate = percentage_papers_with_artifacts.pct_change() * 100
28
+
29
+ print("Type of growth rate:", growth_rate)
30
+ print("Growth rate:", type(growth_rate))
31
+
32
+ # growth_rate = growth_rate.dropna()
33
 
34
+ print("Growht rate after removing nan:", growth_rate)
35
+
36
+ # Display the average growth rate as a big number
37
+ average_growth_rate = growth_rate.mean()
38
+ st.metric(label=f"{aggregation_level.capitalize()}ly average Growth Rate", value=f"{average_growth_rate:.2f}%")
39
 
40
  # Create the plot
41
  plt.figure(figsize=(12, 6))
 
60
  percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
61
  percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
62
 
63
+ # add reached out and reached out link columns
64
  df['reached_out'] = [False for _ in range(df.shape[0])]
65
+ df["reached_out_link"] = ["" for _ in range(df.shape[0])]
66
 
67
  st.markdown(f"""
68
  ## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
 
75
  st.write("Papers with at least one artifact")
76
  st.data_editor(df[df['has_artifact']],
77
  hide_index=True,
78
+ column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
79
  column_config={"github": st.column_config.LinkColumn(),
80
  "paper_page": st.column_config.LinkColumn(),
81
  "paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
 
85
  st.write("Papers without artifacts")
86
  st.data_editor(df[~df['has_artifact']],
87
  hide_index=True,
88
+ column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
89
  column_config={"github": st.column_config.LinkColumn(),
90
  "paper_page": st.column_config.LinkColumn()},
91
  width=2000,
 
94
  st.write("Papers with a HF mention in README but no artifacts")
95
  st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
96
  hide_index=True,
97
+ column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
98
  column_config={"github": st.column_config.LinkColumn(),
99
  "paper_page": st.column_config.LinkColumn()},
100
  width=2000,
 
117
  df.index = pd.to_datetime(df.index)
118
  df = df.sort_index()
119
 
 
 
 
120
  if selection == "Daily/weekly/monthly data":
121
  # Button to select day, month or week
122
  # Add streamlit selectbox.
 
128
  # convert to the day of a Pandas Timestamp
129
  day = pd.Timestamp(day)
130
 
 
 
131
  df = df[df.index.date == day.date()]
132
 
133
  st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
 
176
  else:
177
  st.write("Error: selection not recognized")
178
 
 
 
 
179
 
180
  if __name__ == "__main__":
181
  main()