Spaces:
Runtime error
Runtime error
""" | |
Loads Maharashtra assembly 2019 dataset | |
""" | |
import json | |
import sqlite3 | |
import pandas as pd | |
import csv | |
def load_data_from_csv(name, end=58925): | |
data = [] | |
keys = None | |
with open(name, "r", encoding="utf-8") as f: | |
csv_data = csv.reader(f) | |
for i, line in enumerate(csv_data): | |
found = False | |
if i == 0: | |
keys = line | |
continue | |
for field in line: | |
if field.strip() == "TURNOUT": | |
found = True | |
break | |
if found: | |
# print("TURNOUT found, skipping") | |
continue | |
item = {} | |
# print(line) | |
for key, val in zip(keys, line): | |
item[key] = val | |
data.append(item) | |
return data | |
def clean_dataframe(df): | |
# Strip leading and trailing spaces from column names (without changing them) | |
df.columns = df.columns.str.strip() | |
# Strip spaces and convert text columns to lowercase | |
for col in df.select_dtypes(include='object').columns: | |
df[col] = df[col].str.strip() | |
# Fill null values with 0 | |
df.fillna(0, inplace=True) | |
return df | |
def load_data_from_csv_to_db(name, conn): | |
# read the dataset from csv file and create a pandas dataframe | |
df = pd.read_csv(open(name, "r", encoding="utf-8")) | |
# clean the dataframe | |
df = clean_dataframe(df) | |
df.columns = [ | |
'state', 'constituency_number', 'constituency', 'candidate_name', 'sex', 'age', | |
'category', 'party_name', 'party_symbol', 'evm_votes', 'postal_votes', 'total_votes', | |
'vote_percentage', 'total_electors' | |
] | |
# save the dataframe as a database table, name of table is: elections_2019 | |
result = df.to_sql("maha_2019", conn, if_exists='replace', index=False) | |
return result | |
def query_sql(conn, query): | |
cursor = conn.cursor() | |
cursor.execute(query) | |
result = cursor.fetchall() | |
field_names = [r[0] for r in cursor.description] | |
print(field_names) | |
return result | |
if __name__ == '__main__': | |
# create a connection to sql db called elections.db | |
conn = sqlite3.connect('../data/elections.db') | |
filename = r"../data/maha_results_2019.csv" | |
data = load_data_from_csv(filename, end=5) | |
# print(data) | |
res = load_data_from_csv_to_db(filename, conn) | |
# print(res) | |
query = "SELECT * FROM maha_2019 LIMIT 5;" | |
results = query_sql(conn, query) | |
print(results) | |
# keys = data.keys() | |
# for i, item in enumerate(data): | |
# print(data[item]) | |
# jdata = json.loads(data.to_json()) | |
# print(jdata) | |