from datasets import load_dataset from datasets import load_from_disk import pandas as pd from huggingface_hub import interpreter_login interpreter_login() # dataset = load_from_disk("Path to folder") dataset = load_dataset("tatvamasi/medquad-std", split="train") print(dataset) # Convert to DataFrame df = pd.DataFrame(dataset) # Display the first few rows of the DataFrame print(df.head(2)) # Need to merge the question and answer in the instruction format def format_row(row): question = row['Question'] answer = row['Answer'] formatted_string = f"[INST] {question} [/INST] {answer} " return formatted_string # Apply the function to each row of the dataframe df['Formatted'] = df.apply(format_row, axis=1) # Display the formatted column print(df['Formatted']) new_df = df.rename(columns={'Formatted': 'Text'}) new_df = new_df[['Text']] print(new_df.head(3)) new_df.to_csv('formatted_qna_set.csv', index=False) df_from_disk = pd.read_csv("formatted_qna_set.csv") print(df_from_disk.head(2))