Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files
processAttendance/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from processAttendance.findMisMatchedStudents import *
|
2 |
+
from processAttendance.convertXlsxToCsv import *
|
processAttendance/attendance.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
processAttendance/busAssignedList.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
processAttendance/convertXlsxToCsv.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def extract_route_emails_and_timestamps(xlsx_file, output_csv_file):
|
4 |
+
# Load the Excel file
|
5 |
+
sheets = pd.read_excel(xlsx_file, sheet_name=None)
|
6 |
+
|
7 |
+
# Prepare a list to hold the data
|
8 |
+
data = []
|
9 |
+
|
10 |
+
# Iterate over each sheet
|
11 |
+
for sheet_name, sheet_data in sheets.items():
|
12 |
+
# Extract the route number from the sheet name
|
13 |
+
route_no = sheet_name
|
14 |
+
|
15 |
+
# Remove spaces from column names
|
16 |
+
sheet_data.columns = sheet_data.columns.str.replace(' ', '')
|
17 |
+
|
18 |
+
# Check if required columns exist in the sheet
|
19 |
+
if 'EmailAddress' in sheet_data.columns and 'Timestamp' in sheet_data.columns:
|
20 |
+
# Append the route number, email IDs, and timestamps to the data list
|
21 |
+
for _, row in sheet_data.iterrows():
|
22 |
+
data.append({'routeNo': route_no, 'Email Address': row['EmailAddress'], 'Timestamp': row['Timestamp']})
|
23 |
+
|
24 |
+
# Convert the list of data to a DataFrame
|
25 |
+
result_df = pd.DataFrame(data)
|
26 |
+
|
27 |
+
# Save the DataFrame to a CSV file
|
28 |
+
result_df.to_csv(output_csv_file, index=False)
|
29 |
+
|
30 |
+
print(f"CSV file '{output_csv_file}' created successfully.")
|
31 |
+
|
32 |
+
if __name__ == "_main_":
|
33 |
+
xlsx_file = "/Users/sarathrajan/Documents/Projects/haversineRestAPI/utilities/datasetCreate/syntheticAlteredAttendance.xlsx"
|
34 |
+
output_csv_file = "attendance.csv"
|
35 |
+
extract_route_emails_and_timestamps(xlsx_file, output_csv_file)
|
processAttendance/findMisMatchedStudents.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def find_mismatched_students(assigned_csv, boarded_csv, output_csv):
|
4 |
+
# Load the CSV files
|
5 |
+
assigned_df = pd.read_csv(assigned_csv)
|
6 |
+
boarded_df = pd.read_csv(boarded_csv)
|
7 |
+
|
8 |
+
# Clean up the 'Email Address' column (remove whitespaces and convert to lowercase)
|
9 |
+
assigned_df['Email Address'] = assigned_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()
|
10 |
+
boarded_df['Email Address'] = boarded_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()
|
11 |
+
|
12 |
+
# Merge the dataframes on 'Email Address' to compare assigned and boarded buses
|
13 |
+
merged_df = pd.merge(boarded_df, assigned_df, on='Email Address', how='left', suffixes=('_boarded', '_assigned'))
|
14 |
+
|
15 |
+
# Convert route numbers to strings and clean up the data
|
16 |
+
merged_df['routeNo_boarded'] = merged_df['routeNo_boarded'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()
|
17 |
+
merged_df['routeNo_assigned'] = merged_df['routeNo_assigned'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()
|
18 |
+
|
19 |
+
# Identify mismatched students (where boarded routeNo is not equal to assigned routeNo or assigned is NaN)
|
20 |
+
mismatched_students_df = merged_df[
|
21 |
+
(merged_df['routeNo_boarded'] != merged_df['routeNo_assigned']) |
|
22 |
+
merged_df['routeNo_assigned'].isna()
|
23 |
+
]
|
24 |
+
|
25 |
+
# Select relevant columns to include in the output
|
26 |
+
mismatched_students_df = mismatched_students_df[['registerNo', 'name', 'Email Address', 'routeNo_assigned', 'routeNo_boarded']]
|
27 |
+
|
28 |
+
# Save the results to a new CSV file
|
29 |
+
mismatched_students_df.to_csv(output_csv, index=False)
|
30 |
+
|
31 |
+
print(f"Mismatched students have been saved to '{output_csv}'.")
|
32 |
+
|
33 |
+
|
34 |
+
if __name__ == "__main__":
|
35 |
+
# Example usage:
|
36 |
+
assigned_csv = 'busAssignedList.csv' # Replace with the path to your assigned CSV file
|
37 |
+
boarded_csv = 'attendance.csv' # Replace with the path to your boarded CSV file
|
38 |
+
output_csv = 'mismatched_students.csv' # Replace with your desired output file name
|
39 |
+
|
40 |
+
find_mismatched_students(assigned_csv, boarded_csv, output_csv)
|
processAttendance/mismatchedStudents.csv
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
registerNo,name,Email Address,routeNo_assigned,routeNo_boarded
|
2 |
+
,,[email protected],nan,1
|
3 |
+
23BCE1030,Alan Price,[email protected],1,3
|
4 |
+
21BMV1032,Alan Ramirez,[email protected],1,3
|
5 |
+
23BEE1001,Alan Rivera,[email protected],1,3
|
6 |
+
24PHD1037,Albert Smith,[email protected],1a,20a
|
7 |
+
22BBH1027,Albert Taylor,[email protected],1a,20a
|
8 |
+
24BCE1009,Albert Turner,[email protected],1a,20a
|
9 |
+
24BLA1046,Albert Wright,[email protected],1a,20a
|
10 |
+
24BLB1001,Alexander Alvarez,[email protected],1a,20a
|
11 |
+
21PHD1031,Alexander Brooks,[email protected],1a,20a
|
12 |
+
21MIS1028,Deborah Reyes,[email protected],27a,34
|
13 |
+
24BEC1026,Deborah Scott,[email protected],27a,34
|
14 |
+
23BME1003,Deborah Thomas,[email protected],27a,34
|
15 |
+
23BMV1028,Deborah Watson,[email protected],27a,34
|
16 |
+
24BMV1023,Deborah Williams,[email protected],27a,34
|