Spaces:
Running
Running
import pandas as pd | |
from datasets import Dataset | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import torch | |
def fine_tune_model(uploaded_file): | |
df = pd.read_csv(uploaded_file) | |
st.subheader("Dataset Preview") | |
st.write(df.head()) | |
# Convert CSV to Hugging Face dataset format | |
dataset = Dataset.from_pandas(df) | |
model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"]) | |
if st.button("Fine-tune Model"): | |
if model_name: | |
try: | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
def preprocess_function(examples): | |
return tokenizer(examples['text'], truncation=True, padding=True) | |
tokenized_datasets = dataset.map(preprocess_function, batched=True) | |
# Fine-tuning logic (example) | |
train_args = { | |
"output_dir": "./results", | |
"num_train_epochs": 3, | |
"per_device_train_batch_size": 16, | |
"logging_dir": "./logs", | |
} | |
st.success("Fine-tuning started (demo)!") # Fine-tuning process goes here | |
except Exception as e: | |
st.error(f"Error during fine-tuning: {e}") | |
else: | |
st.warning("Please select a model for fine-tuning.") | |