Spaces:

sean-excel
/

excel-processing-ai

Sleeping

App Files Files Community

umairahmad89 commited on Sep 6

Commit

f0a94b0

•

1 Parent(s): 191079a

Add lookup to previous quarter sheet and handle no previous quarter order

Browse files

Files changed (1) hide show

app.py +51 -1

app.py CHANGED Viewed

@@ -6,6 +6,9 @@ from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 import tempfile
 import os
 # Load the sentence transformer model
 model = SentenceTransformer('BAAI/bge-small-en-v1.5')
@@ -52,7 +55,53 @@ def filter_excel2(excel_path, min_row, max_row, sheetname):
         return data
     except Exception as e:
         raise gr.Error(f"Error processing Excel 2: {str(e)}")
 def get_embeddings(texts):
     return model.encode(texts)
@@ -109,7 +158,8 @@ def update_excel(excel_path, processed_data, sheetname):
 def process_files(excel1, excel2, min_row1, max_row1, min_row2, max_row2, sheetname):
     try:
         gr.Info("Starting processing...")
         # Process Excel 1
         gr.Info("Processing Excel 1...")
         csv1_data = filter_excel1(excel1, min_row1, max_row1)

 import numpy as np
 import tempfile
 import os
+import pandas as pd
+import re
 # Load the sentence transformer model
 model = SentenceTransformer('BAAI/bge-small-en-v1.5')
         return data
     except Exception as e:
         raise gr.Error(f"Error processing Excel 2: {str(e)}")
+def sheet_lookup(current_sheet_name, excel_file_path):
+    # Read the Excel file
+    xl = pd.ExcelFile(excel_file_path)
+    # Determine the previous quarter sheet name
+    match = re.match(r'(\d)Q(\d{4})', current_sheet_name)
+    if match:
+        quarter, year = map(int, match.groups())
+        prev_quarter = 4 if quarter == 1 else quarter - 1
+        prev_year = year - 1 if quarter == 1 else year
+        prev_sheet_name = f"{prev_quarter}Q{prev_year}"
+    else:
+        raise ValueError("Invalid sheet name format")
+    # Read the current sheet
+    current_df = xl.parse(current_sheet_name)
+    # Check if previous sheet exists
+    if prev_sheet_name in xl.sheet_names:
+        # Read the previous quarter sheet
+        prev_df = xl.parse(prev_sheet_name)
+        # Perform the lookup
+        lookup_col = 'Monitoring Tool Instance ID-AU'
+        current_df.drop_duplicates(subset=[lookup_col], keep='first', inplace=True)
+        prev_df.drop_duplicates(subset=[lookup_col], keep='first', inplace=True)
+        value_col = f"{prev_quarter}q CRI Profile Mapping"
+        result_col = f"{quarter}q CRI Profile Mapping"
+        # Create a dictionary for faster lookup
+        lookup_dict = dict(zip(prev_df[lookup_col], prev_df[value_col]))
+        # Perform the lookup and fill the result column
+        current_df[result_col] = current_df[lookup_col].map(lookup_dict).fillna('#N/A')
+    else:
+        # If previous sheet doesn't exist, fill the result column with '#N/A'
+        result_col = f"{quarter}q CRI Profile Mapping"
+        current_df[result_col] = '#N/A'
+        print(f"Warning: Previous sheet {prev_sheet_name} not found. Filling {result_col} with '#N/A'")
+    # Save the results back to the Excel file
+    with pd.ExcelWriter(excel_file_path, mode='a', if_sheet_exists='replace') as writer:
+        current_df.to_excel(writer, sheet_name=current_sheet_name, index=False)
+    print(f"Processing complete for sheet {current_sheet_name}")
 def get_embeddings(texts):
     return model.encode(texts)
 def process_files(excel1, excel2, min_row1, max_row1, min_row2, max_row2, sheetname):
     try:
         gr.Info("Starting processing...")
+        gr.Info("Doing lookup...")
+        sheet_lookup(sheetname, excel2)
         # Process Excel 1
         gr.Info("Processing Excel 1...")
         csv1_data = filter_excel1(excel1, min_row1, max_row1)