Sarathrsk03 commited on
Commit
d8de599
·
verified ·
1 Parent(s): c97b330

Upload 6 files

Browse files
processAttendance/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from processAttendance.findMisMatchedStudents import *
2
+ from processAttendance.convertXlsxToCsv import *
processAttendance/attendance.csv ADDED
The diff for this file is too large to render. See raw diff
 
processAttendance/busAssignedList.csv ADDED
The diff for this file is too large to render. See raw diff
 
processAttendance/convertXlsxToCsv.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def extract_route_emails_and_timestamps(xlsx_file, output_csv_file):
4
+ # Load the Excel file
5
+ sheets = pd.read_excel(xlsx_file, sheet_name=None)
6
+
7
+ # Prepare a list to hold the data
8
+ data = []
9
+
10
+ # Iterate over each sheet
11
+ for sheet_name, sheet_data in sheets.items():
12
+ # Extract the route number from the sheet name
13
+ route_no = sheet_name
14
+
15
+ # Remove spaces from column names
16
+ sheet_data.columns = sheet_data.columns.str.replace(' ', '')
17
+
18
+ # Check if required columns exist in the sheet
19
+ if 'EmailAddress' in sheet_data.columns and 'Timestamp' in sheet_data.columns:
20
+ # Append the route number, email IDs, and timestamps to the data list
21
+ for _, row in sheet_data.iterrows():
22
+ data.append({'routeNo': route_no, 'Email Address': row['EmailAddress'], 'Timestamp': row['Timestamp']})
23
+
24
+ # Convert the list of data to a DataFrame
25
+ result_df = pd.DataFrame(data)
26
+
27
+ # Save the DataFrame to a CSV file
28
+ result_df.to_csv(output_csv_file, index=False)
29
+
30
+ print(f"CSV file '{output_csv_file}' created successfully.")
31
+
32
+ if __name__ == "_main_":
33
+ xlsx_file = "/Users/sarathrajan/Documents/Projects/haversineRestAPI/utilities/datasetCreate/syntheticAlteredAttendance.xlsx"
34
+ output_csv_file = "attendance.csv"
35
+ extract_route_emails_and_timestamps(xlsx_file, output_csv_file)
processAttendance/findMisMatchedStudents.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def find_mismatched_students(assigned_csv, boarded_csv, output_csv):
4
+ # Load the CSV files
5
+ assigned_df = pd.read_csv(assigned_csv)
6
+ boarded_df = pd.read_csv(boarded_csv)
7
+
8
+ # Clean up the 'Email Address' column (remove whitespaces and convert to lowercase)
9
+ assigned_df['Email Address'] = assigned_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()
10
+ boarded_df['Email Address'] = boarded_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()
11
+
12
+ # Merge the dataframes on 'Email Address' to compare assigned and boarded buses
13
+ merged_df = pd.merge(boarded_df, assigned_df, on='Email Address', how='left', suffixes=('_boarded', '_assigned'))
14
+
15
+ # Convert route numbers to strings and clean up the data
16
+ merged_df['routeNo_boarded'] = merged_df['routeNo_boarded'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()
17
+ merged_df['routeNo_assigned'] = merged_df['routeNo_assigned'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()
18
+
19
+ # Identify mismatched students (where boarded routeNo is not equal to assigned routeNo or assigned is NaN)
20
+ mismatched_students_df = merged_df[
21
+ (merged_df['routeNo_boarded'] != merged_df['routeNo_assigned']) |
22
+ merged_df['routeNo_assigned'].isna()
23
+ ]
24
+
25
+ # Select relevant columns to include in the output
26
+ mismatched_students_df = mismatched_students_df[['registerNo', 'name', 'Email Address', 'routeNo_assigned', 'routeNo_boarded']]
27
+
28
+ # Save the results to a new CSV file
29
+ mismatched_students_df.to_csv(output_csv, index=False)
30
+
31
+ print(f"Mismatched students have been saved to '{output_csv}'.")
32
+
33
+
34
+ if __name__ == "__main__":
35
+ # Example usage:
36
+ assigned_csv = 'busAssignedList.csv' # Replace with the path to your assigned CSV file
37
+ boarded_csv = 'attendance.csv' # Replace with the path to your boarded CSV file
38
+ output_csv = 'mismatched_students.csv' # Replace with your desired output file name
39
+
40
+ find_mismatched_students(assigned_csv, boarded_csv, output_csv)
processAttendance/mismatchedStudents.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ registerNo,name,Email Address,routeNo_assigned,routeNo_boarded
2
+ ,,[email protected],nan,1
3
+ 23BCE1030,Alan Price,[email protected],1,3
4
+ 21BMV1032,Alan Ramirez,[email protected],1,3
5
+ 23BEE1001,Alan Rivera,[email protected],1,3
6
+ 24PHD1037,Albert Smith,[email protected],1a,20a
7
+ 22BBH1027,Albert Taylor,[email protected],1a,20a
8
+ 24BCE1009,Albert Turner,[email protected],1a,20a
9
+ 24BLA1046,Albert Wright,[email protected],1a,20a
10
+ 24BLB1001,Alexander Alvarez,[email protected],1a,20a
11
+ 21PHD1031,Alexander Brooks,[email protected],1a,20a
12
+ 21MIS1028,Deborah Reyes,[email protected],27a,34
13
+ 24BEC1026,Deborah Scott,[email protected],27a,34
14
+ 23BME1003,Deborah Thomas,[email protected],27a,34
15
+ 23BMV1028,Deborah Watson,[email protected],27a,34
16
+ 24BMV1023,Deborah Williams,[email protected],27a,34