Spaces:

mtyrrell
/

maf_prefilter_app

Sleeping

App Files Files Community

mtyrrell commited on Apr 20, 2024

Commit

cb542f5

1 Parent(s): 60ede2d

excel template

Browse files

Files changed (4) hide show

.DS_Store +0 -0
app.py +66 -10
processed_applications.csv +0 -0
processed_data.csv +0 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app.py CHANGED Viewed

@@ -8,11 +8,36 @@ import os
 import torch
 import re
 import time
-from huggingface_hub import login, HfApi
 hf_token = os.environ["HF_TOKEN"]
 login(token=hf_token, add_to_git_credential=True)
 # Function to clean text
 def clean_text(input_text):
     cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,:;!?()\-\n]", "", input_text)
@@ -76,12 +101,12 @@ if 'data_processed' not in st.session_state:
 def process_data(uploaded_file):
     df = pd.read_excel(uploaded_file)
     # Column renaming and initial processing
-    df.rename(columns={'Signature (intern)': 'id', 'Scope of the project': 'scope_txt',
-                       'Business case / model related to the proposed technologies / practices': 'tech_txt',
-                       'Financial support mechanism(s)': 'fin_txt',
-                       'Barrier analysis': 'bar_txt',
-                       'Technical assistance (TA) measures': 'ta_txt',
-                       'Mitigation potential in tCO2e': 'ghg_txt'}, inplace=True)
     df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
     df.fillna('', inplace=True)
     df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
@@ -152,6 +177,30 @@ def process_data(uploaded_file):
 # Streamlit app
 st.title('MAF Application Pre-Filtering Tool')
 with st.expander("ℹ️ - About this app", expanded=False):
     st.write(
         """
@@ -164,7 +213,7 @@ with st.expander("ℹ️ - About this app", expanded=False):
         """)
     st.image('pipeline.png')
-uploaded_file = st.file_uploader("Choose a file")
 if uploaded_file is not None:
@@ -173,13 +222,20 @@ if uploaded_file is not None:
         st.session_state['data_processed'] = True
     df = st.session_state['df']
-    output_file = 'processed_data.csv'
     df.to_csv(output_file, index=False)
     st.download_button(
         label="Download data as CSV",
         data=open(output_file, 'rb'),
-        file_name='processed_results.csv',
         mime='text/csv',
     )

 import torch
 import re
 import time
+from huggingface_hub import login
+from openpyxl import Workbook
+from openpyxl.styles import Font, NamedStyle, PatternFill
+from openpyxl.styles.differential import DifferentialStyle
+from io import BytesIO
+from datetime import datetime
 hf_token = os.environ["HF_TOKEN"]
 login(token=hf_token, add_to_git_credential=True)
+def create_excel():
+    # Create a workbook and select the active worksheet
+    wb = Workbook()
+    sheet = wb.active
+    sheet.title = "template"
+    columns = ['id','scope','technology','financial','barrier']
+    sheet.append(columns)  # Appending columns to the first row
+    # formatting
+    for c in sheet['A1:E4'][0]:
+        c.fill = PatternFill('solid', fgColor = 'bad8e1')
+        c.font = Font(bold=True)
+    # Save to a BytesIO object
+    output = BytesIO()
+    wb.save(output)
+    return output.getvalue()
 # Function to clean text
 def clean_text(input_text):
     cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,:;!?()\-\n]", "", input_text)
 def process_data(uploaded_file):
     df = pd.read_excel(uploaded_file)
     # Column renaming and initial processing
+    df.rename(columns={
+        'id': 'id',
+        'scope': 'scope_txt',
+        'technology': 'tech_txt',
+        'financial': 'fin_txt',
+        'barrier': 'bar_txt'}, inplace=True)
     df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
     df.fillna('', inplace=True)
     df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
 # Streamlit app
 st.title('MAF Application Pre-Filtering Tool')
+# Sidebar (filters)
+with st.sidebar:
+    with st.expander("ℹ️ - Instructions", expanded=False):
+        st.markdown(
+            """
+            1. **Download the Excel Template file (below).**
+            2. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
+            3. **Upload the template file in the area to the right (or click browse files).**
+            The tool will immediately start processing the uploaded application data. This can take considerable time
+            depending on the number of applications and the length of text in each. For example, a file with 500 applications
+            could be expected to take approximately 20 minutes.
+            *Note - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
+            """
+        )
+    # Excel file download
+    st.download_button(
+        label="Download Excel Template",
+        data=create_excel(),
+        file_name="MAF_upload_template.xlsx",
+        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    )
 with st.expander("ℹ️ - About this app", expanded=False):
     st.write(
         """
         """)
     st.image('pipeline.png')
+uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)")
 if uploaded_file is not None:
         st.session_state['data_processed'] = True
     df = st.session_state['df']
+    # Get the current date
+    date = datetime.now().strftime('%d-%m-%Y')
+    # Format the date as 'DD-MM-YYYY'
+    # formatted_date = today_date.strftime('%d-%m-%Y')
+    output_file = 'processed_applications.csv'
+    output_filename = 'processed_applications_'+date+'.csv'
     df.to_csv(output_file, index=False)
     st.download_button(
         label="Download data as CSV",
         data=open(output_file, 'rb'),
+        file_name=output_filename,
         mime='text/csv',
     )

processed_applications.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

processed_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff