Spaces:
Sleeping
Sleeping
excel template
Browse files- .DS_Store +0 -0
- app.py +66 -10
- processed_applications.csv +0 -0
- processed_data.csv +0 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
app.py
CHANGED
@@ -8,11 +8,36 @@ import os
|
|
8 |
import torch
|
9 |
import re
|
10 |
import time
|
11 |
-
from huggingface_hub import login
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
hf_token = os.environ["HF_TOKEN"]
|
14 |
login(token=hf_token, add_to_git_credential=True)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Function to clean text
|
17 |
def clean_text(input_text):
|
18 |
cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,:;!?()\-\n]", "", input_text)
|
@@ -76,12 +101,12 @@ if 'data_processed' not in st.session_state:
|
|
76 |
def process_data(uploaded_file):
|
77 |
df = pd.read_excel(uploaded_file)
|
78 |
# Column renaming and initial processing
|
79 |
-
df.rename(columns={
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
|
86 |
df.fillna('', inplace=True)
|
87 |
df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
|
@@ -152,6 +177,30 @@ def process_data(uploaded_file):
|
|
152 |
# Streamlit app
|
153 |
st.title('MAF Application Pre-Filtering Tool')
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
with st.expander("ℹ️ - About this app", expanded=False):
|
156 |
st.write(
|
157 |
"""
|
@@ -164,7 +213,7 @@ with st.expander("ℹ️ - About this app", expanded=False):
|
|
164 |
""")
|
165 |
st.image('pipeline.png')
|
166 |
|
167 |
-
uploaded_file = st.file_uploader("
|
168 |
|
169 |
|
170 |
if uploaded_file is not None:
|
@@ -173,13 +222,20 @@ if uploaded_file is not None:
|
|
173 |
st.session_state['data_processed'] = True
|
174 |
|
175 |
df = st.session_state['df']
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
-
output_file = '
|
|
|
178 |
df.to_csv(output_file, index=False)
|
179 |
st.download_button(
|
180 |
label="Download data as CSV",
|
181 |
data=open(output_file, 'rb'),
|
182 |
-
file_name=
|
183 |
mime='text/csv',
|
184 |
)
|
185 |
|
|
|
8 |
import torch
|
9 |
import re
|
10 |
import time
|
11 |
+
from huggingface_hub import login
|
12 |
+
from openpyxl import Workbook
|
13 |
+
from openpyxl.styles import Font, NamedStyle, PatternFill
|
14 |
+
from openpyxl.styles.differential import DifferentialStyle
|
15 |
+
from io import BytesIO
|
16 |
+
from datetime import datetime
|
17 |
+
|
18 |
|
19 |
hf_token = os.environ["HF_TOKEN"]
|
20 |
login(token=hf_token, add_to_git_credential=True)
|
21 |
|
22 |
+
|
23 |
+
def create_excel():
|
24 |
+
# Create a workbook and select the active worksheet
|
25 |
+
wb = Workbook()
|
26 |
+
sheet = wb.active
|
27 |
+
sheet.title = "template"
|
28 |
+
columns = ['id','scope','technology','financial','barrier']
|
29 |
+
sheet.append(columns) # Appending columns to the first row
|
30 |
+
|
31 |
+
# formatting
|
32 |
+
for c in sheet['A1:E4'][0]:
|
33 |
+
c.fill = PatternFill('solid', fgColor = 'bad8e1')
|
34 |
+
c.font = Font(bold=True)
|
35 |
+
|
36 |
+
# Save to a BytesIO object
|
37 |
+
output = BytesIO()
|
38 |
+
wb.save(output)
|
39 |
+
return output.getvalue()
|
40 |
+
|
41 |
# Function to clean text
|
42 |
def clean_text(input_text):
|
43 |
cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,:;!?()\-\n]", "", input_text)
|
|
|
101 |
def process_data(uploaded_file):
|
102 |
df = pd.read_excel(uploaded_file)
|
103 |
# Column renaming and initial processing
|
104 |
+
df.rename(columns={
|
105 |
+
'id': 'id',
|
106 |
+
'scope': 'scope_txt',
|
107 |
+
'technology': 'tech_txt',
|
108 |
+
'financial': 'fin_txt',
|
109 |
+
'barrier': 'bar_txt'}, inplace=True)
|
110 |
df = df.filter(['id', 'scope_txt', 'tech_txt', 'fin_txt', 'bar_txt'])
|
111 |
df.fillna('', inplace=True)
|
112 |
df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']] = df[['scope_txt', 'tech_txt', 'fin_txt', 'bar_txt']].applymap(clean_text)
|
|
|
177 |
# Streamlit app
|
178 |
st.title('MAF Application Pre-Filtering Tool')
|
179 |
|
180 |
+
# Sidebar (filters)
|
181 |
+
with st.sidebar:
|
182 |
+
with st.expander("ℹ️ - Instructions", expanded=False):
|
183 |
+
st.markdown(
|
184 |
+
"""
|
185 |
+
1. **Download the Excel Template file (below).**
|
186 |
+
2. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
|
187 |
+
3. **Upload the template file in the area to the right (or click browse files).**
|
188 |
+
|
189 |
+
The tool will immediately start processing the uploaded application data. This can take considerable time
|
190 |
+
depending on the number of applications and the length of text in each. For example, a file with 500 applications
|
191 |
+
could be expected to take approximately 20 minutes.
|
192 |
+
|
193 |
+
*Note - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
|
194 |
+
"""
|
195 |
+
)
|
196 |
+
# Excel file download
|
197 |
+
st.download_button(
|
198 |
+
label="Download Excel Template",
|
199 |
+
data=create_excel(),
|
200 |
+
file_name="MAF_upload_template.xlsx",
|
201 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
202 |
+
)
|
203 |
+
|
204 |
with st.expander("ℹ️ - About this app", expanded=False):
|
205 |
st.write(
|
206 |
"""
|
|
|
213 |
""")
|
214 |
st.image('pipeline.png')
|
215 |
|
216 |
+
uploaded_file = st.file_uploader("Select a file containing MAF application pre-filtering data (see instructions in the sidebar)")
|
217 |
|
218 |
|
219 |
if uploaded_file is not None:
|
|
|
222 |
st.session_state['data_processed'] = True
|
223 |
|
224 |
df = st.session_state['df']
|
225 |
+
|
226 |
+
# Get the current date
|
227 |
+
date = datetime.now().strftime('%d-%m-%Y')
|
228 |
+
|
229 |
+
# Format the date as 'DD-MM-YYYY'
|
230 |
+
# formatted_date = today_date.strftime('%d-%m-%Y')
|
231 |
|
232 |
+
output_file = 'processed_applications.csv'
|
233 |
+
output_filename = 'processed_applications_'+date+'.csv'
|
234 |
df.to_csv(output_file, index=False)
|
235 |
st.download_button(
|
236 |
label="Download data as CSV",
|
237 |
data=open(output_file, 'rb'),
|
238 |
+
file_name=output_filename,
|
239 |
mime='text/csv',
|
240 |
)
|
241 |
|
processed_applications.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
processed_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|