mtyrrell commited on
Commit
cb542f5
·
1 Parent(s): 60ede2d

excel template

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. app.py +66 -10
  3. processed_applications.csv +0 -0
  4. processed_data.csv +0 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -8,11 +8,36 @@ import os
8
  import torch
9
  import re
10
  import time
11
- from huggingface_hub import login, HfApi
 
 
 
 
 
 
12
 
13
  hf_token = os.environ["HF_TOKEN"]
14
  login(token=hf_token, add_to_git_credential=True)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Function to clean text
17
  def clean_text(input_text):
18
  cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,:;!?()\-\n]", "", input_text)
@@ -76,12 +101,12 @@ if 'data_processed' not in st.session_state:
76
  def process_data(uploaded_file):
77
  df = pd.read_excel(uploaded_file)
78
  # Column renaming and initial processing
79
- df.rename(columns={'Signature (intern)': 'id', 'Scope of the project': 'scope_txt',
80
- 'Business case / model related to the proposed technologies / practices': 'tech_txt',
81
- 'Financial support mechanism(s)': 'fin_txt',
82
- 'Barrier analysis': 'bar_txt',
83
- 'Technical assistance (TA) measures': 'ta_txt',
84
- 'Mitigation potential in tCO2e': 'ghg_txt'}, inplace=True)
85
  df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
86
  df.fillna('', inplace=True)
87
  df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
@@ -152,6 +177,30 @@ def process_data(uploaded_file):
152
  # Streamlit app
153
  st.title('MAF Application Pre-Filtering Tool')
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  with st.expander("ℹ️ - About this app", expanded=False):
156
  st.write(
157
  """
@@ -164,7 +213,7 @@ with st.expander("ℹ️ - About this app", expanded=False):
164
  """)
165
  st.image('pipeline.png')
166
 
167
- uploaded_file = st.file_uploader("Choose a file")
168
 
169
 
170
  if uploaded_file is not None:
@@ -173,13 +222,20 @@ if uploaded_file is not None:
173
  st.session_state['data_processed'] = True
174
 
175
  df = st.session_state['df']
 
 
 
 
 
 
176
 
177
- output_file = 'processed_data.csv'
 
178
  df.to_csv(output_file, index=False)
179
  st.download_button(
180
  label="Download data as CSV",
181
  data=open(output_file, 'rb'),
182
- file_name='processed_results.csv',
183
  mime='text/csv',
184
  )
185
 
 
8
  import torch
9
  import re
10
  import time
11
+ from huggingface_hub import login
12
+ from openpyxl import Workbook
13
+ from openpyxl.styles import Font, NamedStyle, PatternFill
14
+ from openpyxl.styles.differential import DifferentialStyle
15
+ from io import BytesIO
16
+ from datetime import datetime
17
+
18
 
19
  hf_token = os.environ["HF_TOKEN"]
20
  login(token=hf_token, add_to_git_credential=True)
21
 
22
+
23
+ def create_excel():
24
+ # Create a workbook and select the active worksheet
25
+ wb = Workbook()
26
+ sheet = wb.active
27
+ sheet.title = "template"
28
+ columns = ['id','scope','technology','financial','barrier']
29
+ sheet.append(columns) # Appending columns to the first row
30
+
31
+ # formatting
32
+ for c in sheet['A1:E4'][0]:
33
+ c.fill = PatternFill('solid', fgColor = 'bad8e1')
34
+ c.font = Font(bold=True)
35
+
36
+ # Save to a BytesIO object
37
+ output = BytesIO()
38
+ wb.save(output)
39
+ return output.getvalue()
40
+
41
  # Function to clean text
42
  def clean_text(input_text):
43
  cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,:;!?()\-\n]", "", input_text)
 
101
  def process_data(uploaded_file):
102
  df = pd.read_excel(uploaded_file)
103
  # Column renaming and initial processing
104
+ df.rename(columns={
105
+ 'id': 'id',
106
+ 'scope': 'scope_txt',
107
+ 'technology': 'tech_txt',
108
+ 'financial': 'fin_txt',
109
+ 'barrier': 'bar_txt'}, inplace=True)
110
  df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
111
  df.fillna('', inplace=True)
112
  df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
 
177
  # Streamlit app
178
  st.title('MAF Application Pre-Filtering Tool')
179
 
180
+ # Sidebar (filters)
181
+ with st.sidebar:
182
+ with st.expander("ℹ️ - Instructions", expanded=False):
183
+ st.markdown(
184
+ """
185
+ 1. **Download the Excel Template file (below).**
186
+ 2. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
187
+ 3. **Upload the template file in the area to the right (or click browse files).**
188
+
189
+ The tool will immediately start processing the uploaded application data. This can take considerable time
190
+ depending on the number of applications and the length of text in each. For example, a file with 500 applications
191
+ could be expected to take approximately 20 minutes.
192
+
193
+ *Note - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
194
+ """
195
+ )
196
+ # Excel file download
197
+ st.download_button(
198
+ label="Download Excel Template",
199
+ data=create_excel(),
200
+ file_name="MAF_upload_template.xlsx",
201
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
202
+ )
203
+
204
  with st.expander("ℹ️ - About this app", expanded=False):
205
  st.write(
206
  """
 
213
  """)
214
  st.image('pipeline.png')
215
 
216
+ uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)")
217
 
218
 
219
  if uploaded_file is not None:
 
222
  st.session_state['data_processed'] = True
223
 
224
  df = st.session_state['df']
225
+
226
+ # Get the current date
227
+ date = datetime.now().strftime('%d-%m-%Y')
228
+
229
+ # Format the date as 'DD-MM-YYYY'
230
+ # formatted_date = today_date.strftime('%d-%m-%Y')
231
 
232
+ output_file = 'processed_applications.csv'
233
+ output_filename = 'processed_applications_'+date+'.csv'
234
  df.to_csv(output_file, index=False)
235
  st.download_button(
236
  label="Download data as CSV",
237
  data=open(output_file, 'rb'),
238
+ file_name=output_filename,
239
  mime='text/csv',
240
  )
241
 
processed_applications.csv ADDED
The diff for this file is too large to render. See raw diff
 
processed_data.csv ADDED
The diff for this file is too large to render. See raw diff