busAttendanceSystem / processAttendance /findMisMatchedStudents.py
Sarathrsk03's picture
Upload 6 files
d8de599 verified
raw
history blame
2.06 kB
import pandas as pd
def find_mismatched_students(assigned_csv, boarded_csv, output_csv):
# Load the CSV files
assigned_df = pd.read_csv(assigned_csv)
boarded_df = pd.read_csv(boarded_csv)
# Clean up the 'Email Address' column (remove whitespaces and convert to lowercase)
assigned_df['Email Address'] = assigned_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()
boarded_df['Email Address'] = boarded_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()
# Merge the dataframes on 'Email Address' to compare assigned and boarded buses
merged_df = pd.merge(boarded_df, assigned_df, on='Email Address', how='left', suffixes=('_boarded', '_assigned'))
# Convert route numbers to strings and clean up the data
merged_df['routeNo_boarded'] = merged_df['routeNo_boarded'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()
merged_df['routeNo_assigned'] = merged_df['routeNo_assigned'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()
# Identify mismatched students (where boarded routeNo is not equal to assigned routeNo or assigned is NaN)
mismatched_students_df = merged_df[
(merged_df['routeNo_boarded'] != merged_df['routeNo_assigned']) |
merged_df['routeNo_assigned'].isna()
]
# Select relevant columns to include in the output
mismatched_students_df = mismatched_students_df[['registerNo', 'name', 'Email Address', 'routeNo_assigned', 'routeNo_boarded']]
# Save the results to a new CSV file
mismatched_students_df.to_csv(output_csv, index=False)
print(f"Mismatched students have been saved to '{output_csv}'.")
if __name__ == "__main__":
# Example usage:
assigned_csv = 'busAssignedList.csv' # Replace with the path to your assigned CSV file
boarded_csv = 'attendance.csv' # Replace with the path to your boarded CSV file
output_csv = 'mismatched_students.csv' # Replace with your desired output file name
find_mismatched_students(assigned_csv, boarded_csv, output_csv)