Spaces:

nielsr
/

community-science-progress

Sleeping

App Files Files Community

nielsr HF staff commited on Jul 22, 2024

Commit

9325c4d

1 Parent(s): 70d5ad7

More improvements

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -2,10 +2,10 @@ from datetime import datetime
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 # from load_dataframe import get_data
-from urllib.parse import quote
 def aggregated_data(df, aggregation_level="week"):
@@ -25,10 +25,17 @@ def aggregated_data(df, aggregation_level="week"):
     # Calculate the growth rate
     growth_rate = percentage_papers_with_artifacts.pct_change() * 100
-    # Display the latest growth rate as a big number
-    latest_growth_rate = growth_rate.iloc[-1] if not growth_rate.empty else 0
-    st.metric(label=f"{aggregation_level.capitalize()}ly Growth Rate", value=f"{latest_growth_rate:.2f}%")
     # Create the plot
     plt.figure(figsize=(12, 6))
@@ -53,8 +60,9 @@ def display_data(df):
     percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
     percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
-    # add reached out column
     df['reached_out'] = [False for _ in range(df.shape[0])]
     st.markdown(f"""
     ## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
@@ -67,7 +75,7 @@ def display_data(df):
     st.write("Papers with at least one artifact")
     st.data_editor(df[df['has_artifact']],
                 hide_index=True,
-                column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
                 column_config={"github": st.column_config.LinkColumn(),
                                 "paper_page": st.column_config.LinkColumn(),
                                 "paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
@@ -77,7 +85,7 @@ def display_data(df):
     st.write("Papers without artifacts")
     st.data_editor(df[~df['has_artifact']],
                 hide_index=True,
-                column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
                 column_config={"github": st.column_config.LinkColumn(),
                                 "paper_page": st.column_config.LinkColumn()},
                 width=2000,
@@ -86,7 +94,7 @@ def display_data(df):
     st.write("Papers with a HF mention in README but no artifacts")
     st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
                 hide_index=True,
-                column_order=("reached_out", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
                 column_config={"github": st.column_config.LinkColumn(),
                                 "paper_page": st.column_config.LinkColumn()},
                 width=2000,
@@ -109,9 +117,6 @@ def main():
     df.index = pd.to_datetime(df.index)
     df = df.sort_index()
-    # hack: include title in URL column
-    df['updated_url'] = df.apply(lambda row: f'{row["paper_page"]}/title/{quote(row["title"])}', axis=1)
     if selection == "Daily/weekly/monthly data":
         # Button to select day, month or week
         # Add streamlit selectbox.
@@ -123,8 +128,6 @@ def main():
             # convert to the day of a Pandas Timestamp
             day = pd.Timestamp(day)
-            print("Day:", day)
             df = df[df.index.date == day.date()]
             st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
@@ -173,9 +176,6 @@ def main():
     else:
         st.write("Error: selection not recognized")
-    # Display data based on aggregation level
 if __name__ == "__main__":
     main()

 import streamlit as st
 import pandas as pd
+import numpy as np
 import matplotlib.pyplot as plt
 # from load_dataframe import get_data
 def aggregated_data(df, aggregation_level="week"):
     # Calculate the growth rate
     growth_rate = percentage_papers_with_artifacts.pct_change() * 100
+    print("Type of growth rate:", growth_rate)
+    print("Growth rate:", type(growth_rate))
+    # growth_rate = growth_rate.dropna()
+    print("Growht rate after removing nan:", growth_rate)
+    # Display the average growth rate as a big number
+    average_growth_rate = growth_rate.mean()
+    st.metric(label=f"{aggregation_level.capitalize()}ly average Growth Rate", value=f"{average_growth_rate:.2f}%")
     # Create the plot
     plt.figure(figsize=(12, 6))
     percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
     percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
+    # add reached out and reached out link columns
     df['reached_out'] = [False for _ in range(df.shape[0])]
+    df["reached_out_link"] = ["" for _ in range(df.shape[0])]
     st.markdown(f"""
     ## {percentage_of_at_least_one_artifact}% papers with at least one 🤗 artifact
     st.write("Papers with at least one artifact")
     st.data_editor(df[df['has_artifact']],
                 hide_index=True,
+                column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
                 column_config={"github": st.column_config.LinkColumn(),
                                 "paper_page": st.column_config.LinkColumn(),
                                 "paper_page_with_title": st.column_config.LinkColumn(display_text=r'\|(.*)')},
     st.write("Papers without artifacts")
     st.data_editor(df[~df['has_artifact']],
                 hide_index=True,
+                column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
                 column_config={"github": st.column_config.LinkColumn(),
                                 "paper_page": st.column_config.LinkColumn()},
                 width=2000,
     st.write("Papers with a HF mention in README but no artifacts")
     st.data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])],
                 hide_index=True,
+                column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
                 column_config={"github": st.column_config.LinkColumn(),
                                 "paper_page": st.column_config.LinkColumn()},
                 width=2000,
     df.index = pd.to_datetime(df.index)
     df = df.sort_index()
     if selection == "Daily/weekly/monthly data":
         # Button to select day, month or week
         # Add streamlit selectbox.
             # convert to the day of a Pandas Timestamp
             day = pd.Timestamp(day)
             df = df[df.index.date == day.date()]
             st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
     else:
         st.write("Error: selection not recognized")
 if __name__ == "__main__":
     main()