text-ypesk

Sleeping

App Files Files Community

ypesk commited on 24 days ago

Commit

5414c47

verified ·

1 Parent(s): 0ae53cb

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +62 -3

tasks/text.py CHANGED Viewed

@@ -9,7 +9,7 @@ from .utils.emissions import tracker, clean_emissions_data, get_space_info
 router = APIRouter()
-DESCRIPTION = "Random Baseline"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
@@ -55,10 +55,69 @@ async def evaluate_text(request: TextEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
     # Make random predictions (placeholder for actual model inference)
-    true_labels = test_dataset["label"]
-    predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 router = APIRouter()
+DESCRIPTION = "First Baseline"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    class CovidTwitterBertClassifier(nn.Module):
+        def __init__(self, n_classes):
+            super().__init__()
+            self.n_classes = n_classes
+            self.bert = BertForPreTraining.from_pretrained('digitalepidemiologylab/covid-twitter-bert-v2')
+            self.bert.cls.seq_relationship = nn.Linear(1024, n_classes)
+            self.sigmoid = nn.Sigmoid()
+        def forward(self, input_ids, token_type_ids, input_mask):
+            outputs = self.bert(input_ids = input_ids, token_type_ids = token_type_ids, attention_mask = input_mask)
+            logits = outputs[1]
+            return logits
+    model = CovidTwitterBertClassifier(8)
+    model.to(device)
+    model.load_state_dict(torch.load('model.pth'))
+    model.eval()
+    tokenizer = AutoTokenizer.from_pretrained('digitalepidemiologylab/covid-twitter-bert')
+    test_texts = [t['quote'] for t in data_test]
+    MAX_LEN = 128 #1024 # < m some tweets will be truncated
+    tokenized_test = tokenizer(test_texts, max_length=MAX_LEN, padding='max_length', truncation=True)
+    test_input_ids, test_token_type_ids, test_attention_mask = tokenized_test['input_ids'], tokenized_test['token_type_ids'], tokenized_test['attention_mask']
+    test_token_type_ids = torch.tensor(test_token_type_ids)
+    test_input_ids = torch.tensor(test_input_ids)
+    test_attention_mask = torch.tensor(test_attention_mask)
+    batch_size = 8 #
+    test_data = TensorDataset(test_input_ids, test_attention_mask, test_token_type_ids)
+    test_sampler = SequentialSampler(test_data)
+    test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
+    predictions = []
+    for step, batch in enumerate(test_dataloader):
+        # Add batch to GPU
+        batch = tuple(t.to(device) for t in batch)
+        b_input_ids, b_input_mask, b_token_type_ids = batch
+        with torch.no_grad():
+            logits = model(b_input_ids, b_token_type_ids, b_input_mask)
+        logits = logits.detach().cpu().numpy()
+        predictions.extend(logits.argmax(1))
+        for l in ground_truth:
+            labels_sep.append(l)
+    true_labels = test_dataset["label"]
     # Make random predictions (placeholder for actual model inference)
+    #true_labels = test_dataset["label"]
+    #predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE