MMS_1_10

Sleeping

App Files Files Community

MMS_1_10 / csv_processor.py

bomolopuu

added downloader and bam as default

48dfeff 4 months ago

raw

history blame

2.15 kB

	import hashlib
	import csv
	import os


	def hash_md5(filepath):
	md5_hash = hashlib.md5()
	try:
	with open(filepath, "rb") as file:
	for chunk in iter(lambda: file.read(128 * md5_hash.block_size), b''):
	md5_hash.update(chunk)
	return md5_hash.hexdigest()
	except Exception as e:
	return str(e)
	CSV_FILE_PATH = "audio_plus_hash_uniq_07102024.csv"

	def update_csv_with_files(csv_file_path, audio_old_path, audio_16000_path, new_transcription):
	hash_old = hash_md5(audio_old_path)
	hash_new = hash_md5(audio_16000_path)
	update_csv(csv_file_path, hash_old, hash_new, audio_old_path, new_transcription)

	def update_csv(csv_file_path, search_hash, hash_16000, new_path, new_transcription):
	# Use read/write mode to modify the relevant line or append if not found
	with open(csv_file_path, mode='r+', newline='', encoding='utf-8') as file:
	reader = csv.DictReader(file)
	fieldnames = reader.fieldnames
	rows = list(reader)
	found = False

	# Locate the row with the matching hash
	for i, row in enumerate(rows):
	if row['hash'] == search_hash or row['hash_16000'] == search_hash:
	rows[i]['hash_16000'] = hash_16000
	rows[i]['transcription'] = new_transcription
	found = True
	break

	if found:
	# Move file pointer to the beginning and write only the updated row
	file.seek(0) # Go to the beginning of the file
	writer = csv.DictWriter(file, fieldnames=fieldnames)
	writer.writeheader() # Ensure the header is written
	writer.writerows(rows) # Write all rows back, with the updated one
	return
	# Append a new row if the hash is not found
	with open(csv_file_path, mode='a', newline='', encoding='utf-8') as append_file:
	writer = csv.DictWriter(append_file, fieldnames=fieldnames)
	writer.writerow({
	'hash': search_hash,
	'hash_16000': hash_16000,
	'filepath': new_path,
	'transcription': new_transcription
	})