Spaces:

abreel
/

thesis-deployment

Runtime error

App Files Files Community

Avril Lalaine commited on Nov 23, 2024

Commit

0ad9aa8

1 Parent(s): 28bff37

Add flask app with dockerfire

Browse files

Files changed (5) hide show

Dockerfile +11 -0
app.py +133 -0
model.py +87 -0
requirements.txt +12 -0
templates/index.html +155 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9-slim
+WORKDIR /app
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+EXPOSE 8080
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from pathlib import Path
+from flask import Flask, render_template, request, jsonify
+from transformers import  BertTokenizer, BertForSequenceClassification, AutoTokenizer, AutoModelForSequenceClassification
+import torch
+app = Flask(__name__)
+# Configuration  # Directory containing model files
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+MAX_LENGTH = 512
+BERT_TOKENIZER = 'bert-base-uncased'
+ROBERTA_TOKENIZER = 'jcblaise/roberta-tagalog-base'
+ELECTRA_TOKENIZER = 'google/electra-base-discriminator'
+LABELS = ["fake", "real"]
+class Classifier:
+    def __init__(self, model_path, device, tokenizer_name):
+        self.device = device
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            model_path,
+            local_files_only=True,
+            device_map=device
+        )
+        self.model.eval()
+    def predict(self, text):
+        """Make prediction for a single text"""
+        # Tokenize
+        inputs = self.tokenizer(
+            text,
+            truncation=True,
+            max_length=MAX_LENGTH,
+            padding=True,
+            return_tensors="pt"
+        ).to(self.device)
+        # Get prediction
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
+            predicted_class = torch.argmax(probabilities, dim=-1).item()
+            confidence_scores = probabilities[0].tolist()
+        # Format results
+        result = {
+            'predicted_class': LABELS[predicted_class],
+            'confidence_scores': {
+                label: score
+                for label, score in zip(LABELS, confidence_scores)
+            }
+        }
+        return result
+@app.route('/')
+def home():
+    return render_template('index.html')
+@app.route('/detect', methods=['POST'])
+def detect():
+    try:
+        data = request.get_json()
+        news_text = data.get('text')
+        model_chosen = data.get('model')
+        print(model_chosen)
+        if not news_text:
+            return jsonify({
+                'status': 'error',
+                'message': 'No text provided'
+            }), 400
+        switch={
+            'nonaug-bert':'bert-nonaug',
+            'aug-bert':'bert-aug',
+            'nonaug-tagbert':'tagbert-nonaug',
+            'aug-tagbert':'tagbert-aug',
+            'nonaug-electra':'electra-nonaug',
+            'aug-electra':'electra-aug'
+        }
+        model_p = switch.get(model_chosen)
+        print("model",model_p)
+        MODEL_PATH = Path("D:\\Aplil\\skibidi-thesis\\webapp", model_p)
+        print(MODEL_PATH)
+        tokenizer = model_chosen.split("-")[1]
+        tokenizer_chosen = {
+            'bert':BERT_TOKENIZER,
+            'tagbert':ROBERTA_TOKENIZER,
+            'electra':ELECTRA_TOKENIZER
+        }
+        print(tokenizer)
+        classifier = Classifier(MODEL_PATH,DEVICE,tokenizer_chosen.get(tokenizer))
+        result = classifier.predict(news_text)
+        print(result['confidence_scores'])
+        if result['predicted_class'] == "fake":
+            out = "News Needs Further Validation"
+        else:
+            out = "News is Real"
+        return jsonify({
+            'status': 'success',
+            'prediction': out,
+            'confidence':result['confidence_scores']
+        })
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': str(e)
+        }), 400
+if __name__ == '__main__':
+    app.run(debug=True)

model.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+import numpy as np
+from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
+from transformers import BertTokenizer, BertForSequenceClassification, Trainer,TrainingArguments
+# no augment dataset
+# df = df = pd.read_csv(r".\train_set.csv")
+# with augment training dataset
+df = pd.read_csv(r".\cleaned_combined_aug_set.csv")
+# df.info()
+value_counts = df['label'].value_counts()
+print(value_counts)
+test_df = pd.read_csv(r".\test_set.csv")
+# test_df.info()
+test_df['label'].value_counts()
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+model = BertForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=2)
+model = model.to('cuda')
+# independent var
+X = list(df['article'])
+X_test = list(test_df['article'])
+#dependent
+y= list(df['label'])
+y_test = list(test_df['label'])
+max_length = 512
+train_encodings = tokenizer(X, truncation=True, padding='max_length', max_length=max_length, return_tensors='pt')
+test_encodings = tokenizer(X_test, truncation=True, padding='max_length', max_length=max_length, return_tensors='pt')
+class CustomDataset(Dataset):
+    def __init__(self, encodings, labels):
+        self.encodings = encodings
+        self.labels = labels
+    def __getitem__(self, idx):
+        item = {key: val[idx] for key, val in self.encodings.items()}
+        item['labels'] = torch.tensor(self.labels[idx])
+        return item
+    def __len__(self):
+        return len(self.labels)
+torch_train_dataset = CustomDataset(train_encodings,y)
+torch_test_dataset = CustomDataset(test_encodings,y_test)
+training_args = TrainingArguments(
+    output_dir='./results/fake-news-bert-aug',
+    evaluation_strategy='epoch',
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=3
+)
+def compute_metrics(p):
+    print(type(p))
+    pred, labels = p
+    pred = np.argmax(pred,axis=1)
+    accuracy = accuracy_score(y_true=labels,y_pred=pred)
+    recall = recall_score(y_true=labels,y_pred=pred)
+    precision = precision_score(y_true=labels,y_pred=pred)
+    f1 = f1_score(y_true=labels,y_pred=pred)
+    return {"accuracy":accuracy,"precision":precision,"recall":recall,"f1":f1}
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=torch_train_dataset,
+    eval_dataset=torch_test_dataset,
+    compute_metrics=compute_metrics
+)
+trainer.train()
+def predict(text):
+    return trainer.predict(text)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+blinker==1.8.2
+click==8.1.7
+colorama==0.4.6
+Flask==3.0.3
+importlib_metadata==8.5.0
+itsdangerous==2.2.0
+Jinja2==3.1.4
+MarkupSafe==2.1.5
+Werkzeug==3.0.4
+zipp==3.20.2
+transformers==4.33.3
+torch==2.4.1+cu118

templates/index.html ADDED Viewed

	@@ -0,0 +1,155 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Fake News Detection using AugTagalog-BERT</title>
+  <link rel="icon" type="image/png" href="{{ url_for('static', filename='bert.png') }}" />
+  <script src="https://cdn.tailwindcss.com"></script>
+  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css" />
+</head>
+<body class="min-h-screen flex flex-col justify-between font-sans bg-gray-100">
+  <header class="bg-gray-800 w-full py-2 flex items-center justify-start">
+    <div class="flex items-center space-x-4 ml-4">
+      <img src="{{ url_for('static', filename='bert.png') }}" alt="BERT Logo" class="w-8 h-8" />
+      <h1 class="text-white text-md font-bold">Fake News Detection using AugTagalog-BERT</h1>
+    </div>
+  </header>
+  <div class="flex-grow flex items-center justify-center px-10 py-12">
+    <div class="grid grid-cols-1 md:grid-cols-2 w-full gap-12 max-w-6xl">
+      <div class="flex flex-col p-12 space-y-8 bg-white rounded-lg shadow-lg">
+        <h2 class="text-2xl font-semibold text-gray-800">Tagalog Fake News Classifier</h2>
+        <div>
+          <label for="models" class="block text-lg font-medium text-gray-600 mb-2">Choose a model:</label>
+          <div class="relative">
+            <select id="models" name="models" class="w-full bg-white text-gray-900 text-lg rounded-md border border-gray-300 focus:border-gray-500 focus:ring focus:ring-gray-200 py-3 pl-4 pr-10 appearance-none transition duration-200">
+              <option value="nonaug-bert">Non-Augmented BERT Model</option>
+              <option value="aug-bert">Augmented BERT Model</option>
+              <option value="nonaug-tagbert">Non-Augmented Tagalog-RoBERTa Model</option>
+              <option value="aug-tagbert" selected>Augmented Tagalog-RoBERTa Model</option>
+              <option value="nonaug-electra">Non-Augmented ELECTRA</option>
+              <option value="aug-electra">Augmented ELECTRA</option>
+            </select>
+            <div class="absolute inset-y-0 right-0 flex items-center pr-4 pointer-events-none">
+              <i class="fas fa-chevron-down text-gray-500"></i>
+            </div>
+          </div>
+        </div>
+        <div class="relative w-full">
+          <label for="newsInput" class="block text-lg font-medium text-gray-600 mb-2">Input News:</label>
+          <textarea
+            id="newsInput"
+            class="h-40 w-full border-2 border-gray-300 rounded-lg pl-4 pr-4 py-3 focus:outline-none focus:ring-2 focus:ring-gray-500 focus:border-gray-500 transition duration-200"
+            placeholder="Paste your text here..."
+            rows="6"
+          ></textarea>
+        </div>
+        <div class="flex justify-center">
+          <button
+            id="detectBtn"
+            class="bg-gray-800 text-white font-semibold py-3 px-8 rounded-lg hover:bg-gray-600 transition duration-300"
+          >
+            Detect
+          </button>
+        </div>
+      </div>
+      <div class="flex flex-col justify-center p-12 bg-white rounded-lg shadow-lg">
+        <div id="resultContainer" class="opacity-0 transition-opacity duration-500 h-full flex flex-col justify-center">
+          <div class="p-8 bg-gradient-to-b from-blue-50 to-white rounded-lg shadow-md">
+            <h2 class="text-3xl font-semibold mb-6 text-center text-gray-700">Result</h2>
+            <p id="result" class="text-center text-lg font-semibold p-4 rounded-lg border text-gray-800"></p>
+            <div class="mt-8">
+              <h3 class="text-lg font-bold text-center text-gray-700 mb-4">Confidence Levels</h3>
+              <div class="grid grid-cols-2 gap-4">
+                <div class="p-4 bg-red-100 rounded-lg shadow-sm text-center">
+                  <h4 class="font-semibold text-red-600">Fake</h4>
+                  <p id="fake" class="text-lg font-bold text-red-700">0%</p>
+                </div>
+                <div class="p-4 bg-green-100 rounded-lg shadow-sm text-center">
+                  <h4 class="font-semibold text-green-600">Real</h4>
+                  <p id="real" class="text-lg font-bold text-green-700">0%</p>
+                </div>
+              </div>
+            </div>
+          </div>
+        </div>
+        <div id="loadingSpinner" class="hidden flex justify-center items-center h-full">
+          <div class="flex flex-col items-center">
+            <div class="animate-spin rounded-full h-12 w-12 border-b-4 border-gray-600"></div>
+            <p class="mt-4 text-gray-600 font-semibold">Detecting...</p>
+          </div>
+        </div>
+      </div>
+    </div>
+  </div>
+  <footer class="text-center py-4 bg-gray-800 w-full shadow-inner">
+    <p class="text-white text-sm">
+      © 2024 | <span class="font-semibold">J. Embolode, A. Kuan, A. Linaza</span>
+    </p>
+  </footer>
+  <script>
+    document.getElementById("detectBtn").addEventListener("click", function () {
+      const newsInput = document.getElementById("newsInput").value;
+      const model = document.getElementById("models").value;
+      const loadingSpinner = document.getElementById("loadingSpinner");
+      const resultContainer = document.getElementById("resultContainer");
+      const resultText = document.getElementById("result");
+      const confidenceFake = document.getElementById("fake");
+      const confidenceReal = document.getElementById("real");
+      if (newsInput.trim() === "") {
+        alert("Please enter text.");
+        return;
+      }
+      loadingSpinner.classList.remove("hidden");
+      resultContainer.style.opacity = 0;
+      fetch("/detect", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({ text: newsInput, model: model }),
+      })
+        .then((response) => response.json())
+        .then((data) => {
+          loadingSpinner.classList.add("hidden");
+          resultContainer.style.opacity = 1;
+          if (data.status === "error") {
+            resultText.textContent = data.message;
+            resultText.classList.add("text-red-500");
+            resultText.classList.remove("text-green-500");
+          } else {
+            resultText.innerHTML = data.prediction;
+            if (data.prediction === "News Needs Further Validation") {
+              resultText.classList.add("text-red-500");
+              resultText.classList.remove("text-green-500");
+            } else {
+              resultText.classList.add("text-green-500");
+              resultText.classList.remove("text-red-500");
+            }
+            confidenceFake.textContent = (data.confidence.fake * 100).toFixed(2) + "%";
+            confidenceReal.textContent = (data.confidence.real * 100).toFixed(2) + "%";
+          }
+        })
+        .catch((error) => {
+          loadingSpinner.classList.add("hidden");
+          console.error("Error:", error);
+        });
+    });
+  </script>
+</body>
+</html>