louiecerv commited on
Commit
fa189e6
·
1 Parent(s): db45b4c

sync with remove

Browse files
Files changed (5) hide show
  1. app.py +28 -0
  2. dataset.py +15 -0
  3. requirements.txt +5 -0
  4. train_model.py +46 -0
  5. upload_tokenizer.py +7 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import torch
4
+
5
+ # Force CPU usage
6
+ device = "cuda" if torch.cuda.is_available() else "cpu"
7
+
8
+ model_name = "louiecerv/sentiment_analysis_model"
9
+ classifier = pipeline("text-classification", model=model_name, tokenizer=model_name, device=0 if device == "cuda" else -1)
10
+
11
+ print(f"Using device: {device}")
12
+
13
+ # Streamlit UI
14
+ st.title("Sentiment Analysis App")
15
+ st.write("Enter a movie review and get its sentiment.")
16
+
17
+ user_input = st.text_area("Enter review:")
18
+
19
+ if st.button("Analyze"):
20
+ if user_input:
21
+ prediction = classifier(user_input)
22
+ label = prediction[0]['label']
23
+ confidence = prediction[0]['score']
24
+
25
+ st.write(f"### Sentiment: {label}")
26
+ st.write(f"Confidence: {confidence:.2f}")
27
+ else:
28
+ st.warning("Please enter a review.")
dataset.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import Dataset, DatasetDict
2
+ from huggingface_hub import HfApi
3
+
4
+ # Create the dataset
5
+ data = [
6
+ {"text": "I loved this movie! It was fantastic!", "label": 1},
7
+ {"text": "Terrible film. Would not recommend.", "label": 0},
8
+ {"text": "Amazing cinematography, but the plot was weak.", "label": 1},
9
+ {"text": "I fell asleep halfway through. Very boring.", "label": 0}
10
+ ]
11
+
12
+ dataset = Dataset.from_list(data)
13
+
14
+ # Push dataset to Hugging Face
15
+ dataset.push_to_hub("louiecerv/sentiment_analysis")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ torch
4
+ streamlit
5
+ huggingface_hub
train_model.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
3
+ from transformers import AutoTokenizer
4
+ import torch
5
+
6
+ # Load the dataset
7
+ dataset = load_dataset("louiecerv/sentiment_analysis")
8
+
9
+ # Load tokenizer
10
+ model_checkpoint = "distilbert-base-uncased"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
12
+
13
+ # Tokenize function
14
+ def tokenize_function(examples):
15
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
16
+
17
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
18
+
19
+ # Prepare dataset for training
20
+ train_dataset = tokenized_datasets["train"]
21
+
22
+ # Load model
23
+ model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
24
+
25
+ # Training arguments
26
+ training_args = TrainingArguments(
27
+ output_dir="./results",
28
+ eval_strategy="no",
29
+ per_device_train_batch_size=8,
30
+ per_device_eval_batch_size=8,
31
+ num_train_epochs=3,
32
+ save_strategy="epoch",
33
+ push_to_hub=True,
34
+ hub_model_id="louiecerv/sentiment_analysis_model"
35
+ )
36
+
37
+ # Trainer
38
+ trainer = Trainer(
39
+ model=model,
40
+ args=training_args,
41
+ train_dataset=train_dataset
42
+ )
43
+
44
+ # Train and save model
45
+ trainer.train()
46
+ trainer.push_to_hub()
upload_tokenizer.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+
3
+ model_checkpoint = "distilbert-base-uncased"
4
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
5
+
6
+ # Push tokenizer to the model repo
7
+ tokenizer.push_to_hub("louiecerv/sentiment_analysis_model")