import pandas as pd def extract_route_emails_and_timestamps(xlsx_file, output_csv_file): # Load the Excel file sheets = pd.read_excel(xlsx_file, sheet_name=None) # Prepare a list to hold the data data = [] # Iterate over each sheet for sheet_name, sheet_data in sheets.items(): # Extract the route number from the sheet name route_no = sheet_name # Remove spaces from column names sheet_data.columns = sheet_data.columns.str.replace(' ', '') # Check if required columns exist in the sheet if 'EmailAddress' in sheet_data.columns and 'Timestamp' in sheet_data.columns: # Append the route number, email IDs, and timestamps to the data list for _, row in sheet_data.iterrows(): data.append({'routeNo': route_no, 'Email Address': row['EmailAddress'], 'Timestamp': row['Timestamp']}) # Convert the list of data to a DataFrame result_df = pd.DataFrame(data) # Save the DataFrame to a CSV file result_df.to_csv(output_csv_file, index=False) print(f"CSV file '{output_csv_file}' created successfully.") if __name__ == "_main_": xlsx_file = "/Users/sarathrajan/Documents/Projects/haversineRestAPI/utilities/datasetCreate/syntheticAlteredAttendance.xlsx" output_csv_file = "attendance.csv" extract_route_emails_and_timestamps(xlsx_file, output_csv_file)