Spaces:

ipullrank
/

site-focus-calculator

Running

App Files Files Community

dejanseo commited on Nov 26, 2024

Commit

40d7e95

verified ·

1 Parent(s): 47c6dd1

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -15

app.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Install required libraries (in Streamlit you would install them via requirements.txt or manually in the terminal)
-# !pip install requests trafilatura sentence-transformers numpy torch tqdm scikit-learn pandas advertools streamlit
 import streamlit as st
 import requests
 import trafilatura
@@ -11,37 +8,44 @@ import advertools as adv
 from sklearn.cluster import KMeans
 from collections import Counter
 # Title of the app
 st.title("Site Focus Calculator")
 st.write("A tool for calculating the site focus score of a website or a series of URLs.")
 # Load the model
 model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
-#model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
-#mxbai-embed-xsmall-v1
 # Input fields for sitemap or list of URLs (separated by newlines)
-sitemap_url = st.text_input("Enter your XML sitemap URL (optional)", "")
-url_list_input = st.text_area("Enter a list of URLs (separated by newlines, optional)", "")
 # Add a "Run" button to trigger the URL processing
 if st.button("Run Analysis"):
-    # Process either sitemap or URL list
     urls = []
     if sitemap_url:
         st.write("Fetching URLs from the sitemap...")
-        # Read sitemap and extract URLs using advertools
         sitemap_df = adv.sitemap_to_df(sitemap_url)
         urls = sitemap_df['loc'].tolist()
-        #urls = urls[:50]  # Limit to first 50 URLs for testing purposes
         st.write(f"Processing {len(urls)} URLs from sitemap.")
     elif url_list_input:
-        # Parse URL list from input (newlines separated)
         urls = [url.strip() for url in url_list_input.split('\n') if url.strip()]
         st.write(f"Processing {len(urls)} URLs from the input list.")
     else:
         st.warning("Please provide either a sitemap URL or a list of URLs.")
@@ -140,6 +144,10 @@ if st.button("Run Analysis"):
                     'ClusterScore': page_cluster_scores
                 })
                 # Display the DataFrame
                 st.write("URL Analysis Results")
                 st.dataframe(df)
@@ -160,4 +168,13 @@ if st.button("Run Analysis"):
         error_df = pd.DataFrame(error_urls, columns=["URL", "Error"])
         st.dataframe(error_df)
 else:
     st.info("Click 'Run Analysis' to start the process.")

 import streamlit as st
 import requests
 import trafilatura
 from sklearn.cluster import KMeans
 from collections import Counter
+# Initialize session state variables
+if 'urls' not in st.session_state:
+    st.session_state.urls = []
+if 'results' not in st.session_state:
+    st.session_state.results = None
+if 'processing_complete' not in st.session_state:
+    st.session_state.processing_complete = False
 # Title of the app
 st.title("Site Focus Calculator")
 st.write("A tool for calculating the site focus score of a website or a series of URLs.")
 # Load the model
 model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
 # Input fields for sitemap or list of URLs (separated by newlines)
+sitemap_url = st.text_input("Enter your XML sitemap URL (optional)", st.session_state.get('sitemap_url', ""))
+url_list_input = st.text_area("Enter a list of URLs (separated by newlines, optional)", st.session_state.get('url_list_input', ""))
+# Store inputs in session state
+if sitemap_url:
+    st.session_state.sitemap_url = sitemap_url
+if url_list_input:
+    st.session_state.url_list_input = url_list_input
 # Add a "Run" button to trigger the URL processing
 if st.button("Run Analysis"):
+    st.session_state.processing_complete = False
     urls = []
     if sitemap_url:
         st.write("Fetching URLs from the sitemap...")
         sitemap_df = adv.sitemap_to_df(sitemap_url)
         urls = sitemap_df['loc'].tolist()
+        st.session_state.urls = urls  # Store URLs in session state
         st.write(f"Processing {len(urls)} URLs from sitemap.")
     elif url_list_input:
         urls = [url.strip() for url in url_list_input.split('\n') if url.strip()]
+        st.session_state.urls = urls  # Store URLs in session state
         st.write(f"Processing {len(urls)} URLs from the input list.")
     else:
         st.warning("Please provide either a sitemap URL or a list of URLs.")
                     'ClusterScore': page_cluster_scores
                 })
+                # Store results in session state
+                st.session_state.results = df
+                st.session_state.processing_complete = True
                 # Display the DataFrame
                 st.write("URL Analysis Results")
                 st.dataframe(df)
         error_df = pd.DataFrame(error_urls, columns=["URL", "Error"])
         st.dataframe(error_df)
 else:
+    # Display results if processing is complete
+    if st.session_state.processing_complete and st.session_state.results is not None:
+        st.write("URL Analysis Results")
+        st.dataframe(st.session_state.results)
+        # Option to download the results as CSV
+        csv = st.session_state.results.to_csv(index=False)
+        st.download_button(label="Download data as CSV", data=csv, file_name='url_analysis_results.csv', mime='text/csv')
     st.info("Click 'Run Analysis' to start the process.")