Spaces:
Sleeping
Sleeping
More improvements
Browse files
app.py
CHANGED
@@ -2,10 +2,10 @@ from datetime import datetime
|
|
2 |
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
|
|
5 |
import matplotlib.pyplot as plt
|
6 |
|
7 |
# from load_dataframe import get_data
|
8 |
-
from urllib.parse import quote
|
9 |
|
10 |
|
11 |
def aggregated_data(df, aggregation_level="week"):
|
@@ -25,10 +25,17 @@ def aggregated_data(df, aggregation_level="week"):
|
|
25 |
|
26 |
# Calculate the growth rate
|
27 |
growth_rate = percentage_papers_with_artifacts.pct_change() * 100
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
32 |
|
33 |
# Create the plot
|
34 |
plt.figure(figsize=(12, 6))
|
@@ -53,8 +60,9 @@ def display_data(df):
|
|
53 |
percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
|
54 |
percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
|
55 |
|
56 |
-
# add reached out
|
57 |
df['reached_out'] = [False for _ in range(df.shape[0])]
|
|
|
58 |
|
59 |
st.markdown(f"""
|
60 |
## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
|
@@ -67,7 +75,7 @@ def display_data(df):
|
|
67 |
st.write("Papers with at least one artifact")
|
68 |
st.data_editor(df[df['has_artifact']],
|
69 |
hide_index=True,
|
70 |
-
column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
71 |
column_config={"github": st.column_config.LinkColumn(),
|
72 |
"paper_page": st.column_config.LinkColumn(),
|
73 |
"paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
|
@@ -77,7 +85,7 @@ def display_data(df):
|
|
77 |
st.write("Papers without artifacts")
|
78 |
st.data_editor(df[~df['has_artifact']],
|
79 |
hide_index=True,
|
80 |
-
column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
81 |
column_config={"github": st.column_config.LinkColumn(),
|
82 |
"paper_page": st.column_config.LinkColumn()},
|
83 |
width=2000,
|
@@ -86,7 +94,7 @@ def display_data(df):
|
|
86 |
st.write("Papers with a HF mention in README but no artifacts")
|
87 |
st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
|
88 |
hide_index=True,
|
89 |
-
column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
90 |
column_config={"github": st.column_config.LinkColumn(),
|
91 |
"paper_page": st.column_config.LinkColumn()},
|
92 |
width=2000,
|
@@ -109,9 +117,6 @@ def main():
|
|
109 |
df.index = pd.to_datetime(df.index)
|
110 |
df = df.sort_index()
|
111 |
|
112 |
-
# hack: include title in URL column
|
113 |
-
df['updated_url'] = df.apply(lambda row: f'{row["paper_page"]}/title/{quote(row["title"])}', axis=1)
|
114 |
-
|
115 |
if selection == "Daily/weekly/monthly data":
|
116 |
# Button to select day, month or week
|
117 |
# Add streamlit selectbox.
|
@@ -123,8 +128,6 @@ def main():
|
|
123 |
# convert to the day of a Pandas Timestamp
|
124 |
day = pd.Timestamp(day)
|
125 |
|
126 |
-
print("Day:", day)
|
127 |
-
|
128 |
df = df[df.index.date == day.date()]
|
129 |
|
130 |
st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
|
@@ -173,9 +176,6 @@ def main():
|
|
173 |
else:
|
174 |
st.write("Error: selection not recognized")
|
175 |
|
176 |
-
# Display data based on aggregation level
|
177 |
-
|
178 |
-
|
179 |
|
180 |
if __name__ == "__main__":
|
181 |
main()
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
import matplotlib.pyplot as plt
|
7 |
|
8 |
# from load_dataframe import get_data
|
|
|
9 |
|
10 |
|
11 |
def aggregated_data(df, aggregation_level="week"):
|
|
|
25 |
|
26 |
# Calculate the growth rate
|
27 |
growth_rate = percentage_papers_with_artifacts.pct_change() * 100
|
28 |
+
|
29 |
+
print("Type of growth rate:", growth_rate)
|
30 |
+
print("Growth rate:", type(growth_rate))
|
31 |
+
|
32 |
+
# growth_rate = growth_rate.dropna()
|
33 |
|
34 |
+
print("Growht rate after removing nan:", growth_rate)
|
35 |
+
|
36 |
+
# Display the average growth rate as a big number
|
37 |
+
average_growth_rate = growth_rate.mean()
|
38 |
+
st.metric(label=f"{aggregation_level.capitalize()}ly average Growth Rate", value=f"{average_growth_rate:.2f}%")
|
39 |
|
40 |
# Create the plot
|
41 |
plt.figure(figsize=(12, 6))
|
|
|
60 |
percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
|
61 |
percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
|
62 |
|
63 |
+
# add reached out and reached out link columns
|
64 |
df['reached_out'] = [False for _ in range(df.shape[0])]
|
65 |
+
df["reached_out_link"] = ["" for _ in range(df.shape[0])]
|
66 |
|
67 |
st.markdown(f"""
|
68 |
## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
|
|
|
75 |
st.write("Papers with at least one artifact")
|
76 |
st.data_editor(df[df['has_artifact']],
|
77 |
hide_index=True,
|
78 |
+
column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
79 |
column_config={"github": st.column_config.LinkColumn(),
|
80 |
"paper_page": st.column_config.LinkColumn(),
|
81 |
"paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
|
|
|
85 |
st.write("Papers without artifacts")
|
86 |
st.data_editor(df[~df['has_artifact']],
|
87 |
hide_index=True,
|
88 |
+
column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
89 |
column_config={"github": st.column_config.LinkColumn(),
|
90 |
"paper_page": st.column_config.LinkColumn()},
|
91 |
width=2000,
|
|
|
94 |
st.write("Papers with a HF mention in README but no artifacts")
|
95 |
st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
|
96 |
hide_index=True,
|
97 |
+
column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
|
98 |
column_config={"github": st.column_config.LinkColumn(),
|
99 |
"paper_page": st.column_config.LinkColumn()},
|
100 |
width=2000,
|
|
|
117 |
df.index = pd.to_datetime(df.index)
|
118 |
df = df.sort_index()
|
119 |
|
|
|
|
|
|
|
120 |
if selection == "Daily/weekly/monthly data":
|
121 |
# Button to select day, month or week
|
122 |
# Add streamlit selectbox.
|
|
|
128 |
# convert to the day of a Pandas Timestamp
|
129 |
day = pd.Timestamp(day)
|
130 |
|
|
|
|
|
131 |
df = df[df.index.date == day.date()]
|
132 |
|
133 |
st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
|
|
|
176 |
else:
|
177 |
st.write("Error: selection not recognized")
|
178 |
|
|
|
|
|
|
|
179 |
|
180 |
if __name__ == "__main__":
|
181 |
main()
|