Spaces:

Sarathrsk03
/

busAttendanceSystem

Sleeping

App Files Files Community

busAttendanceSystem / processAttendance /findMisMatchedStudents.py

Sarathrsk03

Upload 6 files

d8de599 verified 6 months ago

raw

history blame

2.06 kB

	import pandas as pd

	def find_mismatched_students(assigned_csv, boarded_csv, output_csv):
	# Load the CSV files
	assigned_df = pd.read_csv(assigned_csv)
	boarded_df = pd.read_csv(boarded_csv)

	# Clean up the 'Email Address' column (remove whitespaces and convert to lowercase)
	assigned_df['Email Address'] = assigned_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()
	boarded_df['Email Address'] = boarded_df['Email Address'].str.replace(r'\s+', '', regex=True).str.lower()

	# Merge the dataframes on 'Email Address' to compare assigned and boarded buses
	merged_df = pd.merge(boarded_df, assigned_df, on='Email Address', how='left', suffixes=('_boarded', '_assigned'))

	# Convert route numbers to strings and clean up the data
	merged_df['routeNo_boarded'] = merged_df['routeNo_boarded'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()
	merged_df['routeNo_assigned'] = merged_df['routeNo_assigned'].astype(str).str.replace(r'\s+', '', regex=True).str.lower()

	# Identify mismatched students (where boarded routeNo is not equal to assigned routeNo or assigned is NaN)
	mismatched_students_df = merged_df[
	(merged_df['routeNo_boarded'] != merged_df['routeNo_assigned']) \|
	merged_df['routeNo_assigned'].isna()
	]

	# Select relevant columns to include in the output
	mismatched_students_df = mismatched_students_df[['registerNo', 'name', 'Email Address', 'routeNo_assigned', 'routeNo_boarded']]

	# Save the results to a new CSV file
	mismatched_students_df.to_csv(output_csv, index=False)

	print(f"Mismatched students have been saved to '{output_csv}'.")


	if __name__ == "__main__":
	# Example usage:
	assigned_csv = 'busAssignedList.csv' # Replace with the path to your assigned CSV file
	boarded_csv = 'attendance.csv' # Replace with the path to your boarded CSV file
	output_csv = 'mismatched_students.csv' # Replace with your desired output file name

	find_mismatched_students(assigned_csv, boarded_csv, output_csv)