Spaces:
Sleeping
Sleeping
import pandas as pd | |
def find_mismatched_students(assigned_csv, boarded_csv, output_csv): | |
# Load the CSV files | |
assigned_df = pd.read_csv(assigned_csv) | |
boarded_df = pd.read_csv(boarded_csv) | |
# Clean up the 'Email Address' column (remove whitespaces and convert to lowercase) | |
assigned_df['Email Address'] = assigned_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower() | |
boarded_df['Email Address'] = boarded_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower() | |
# Merge the dataframes on 'Email Address' to compare assigned and boarded buses | |
merged_df = pd.merge(boarded_df, assigned_df, on='Email Address', how='left', suffixes=('_boarded', '_assigned')) | |
# Convert route numbers to strings and clean up the data | |
merged_df['routeNo_boarded'] = merged_df['routeNo_boarded'].astype(str).str.replace(r'\s+', '', regex=True).str.lower() | |
merged_df['routeNo_assigned'] = merged_df['routeNo_assigned'].astype(str).str.replace(r'\s+', '', regex=True).str.lower() | |
# Identify mismatched students (where boarded routeNo is not equal to assigned routeNo or assigned is NaN) | |
mismatched_students_df = merged_df[ | |
(merged_df['routeNo_boarded'] != merged_df['routeNo_assigned']) | | |
merged_df['routeNo_assigned'].isna() | |
] | |
# Select relevant columns to include in the output | |
mismatched_students_df = mismatched_students_df[['registerNo', 'name', 'Email Address', 'routeNo_assigned', 'routeNo_boarded']] | |
# Save the results to a new CSV file | |
mismatched_students_df.to_csv(output_csv, index=False) | |
print(f"Mismatched students have been saved to '{output_csv}'.") | |
if __name__ == "__main__": | |
# Example usage: | |
assigned_csv = 'busAssignedList.csv' # Replace with the path to your assigned CSV file | |
boarded_csv = 'attendance.csv' # Replace with the path to your boarded CSV file | |
output_csv = 'mismatched_students.csv' # Replace with your desired output file name | |
find_mismatched_students(assigned_csv, boarded_csv, output_csv) | |