Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```
|
2 |
+
# Load model directly
|
3 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
4 |
+
|
5 |
+
tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Roberta-NER")
|
6 |
+
model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Roberta-NER")
|
7 |
+
|
8 |
+
# Example batch of sentences
|
9 |
+
sentences = [
|
10 |
+
"The corrupt politician embezzled funds.",
|
11 |
+
"Immigrants are causing a surge in crime.",
|
12 |
+
"The movie star is an idiot for their political views.",
|
13 |
+
"Only a fool would believe in climate change.",
|
14 |
+
"The new policy will destroy the economy."
|
15 |
+
]
|
16 |
+
|
17 |
+
# Tokenize the batch
|
18 |
+
encoding = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True)
|
19 |
+
|
20 |
+
# Get model predictions
|
21 |
+
outputs = model(**encoding)
|
22 |
+
|
23 |
+
# Apply softmax to the output logits to get probabilities
|
24 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
25 |
+
|
26 |
+
# Get the highest probability labels for each token
|
27 |
+
predicted_labels = torch.argmax(predictions, dim=-1)
|
28 |
+
|
29 |
+
# Define a mapping for the labels
|
30 |
+
label_mapping = {
|
31 |
+
0: "O", # No bias
|
32 |
+
1: "B-BIAS", # Beginning of a biased sequence
|
33 |
+
2: "I-BIAS" # Inside a biased sequence
|
34 |
+
}
|
35 |
+
|
36 |
+
# Convert predicted labels to their corresponding label names using the mapping
|
37 |
+
labels = [[label_mapping[label_id.item()] for label_id in sentence_labels] for sentence_labels in predicted_labels]
|
38 |
+
|
39 |
+
# Align labels with the words in the sentences
|
40 |
+
aligned_labels = []
|
41 |
+
for i, sentence_labels in enumerate(labels):
|
42 |
+
# Get the tokens from the original sentence
|
43 |
+
tokens = tokenizer.convert_ids_to_tokens(encoding['input_ids'][i])
|
44 |
+
# Only consider labels for tokens that are not special tokens
|
45 |
+
sentence_labels = [label for token, label in zip(tokens, sentence_labels) if token not in tokenizer.all_special_tokens]
|
46 |
+
aligned_labels.append(sentence_labels)
|
47 |
+
|
48 |
+
# Print the aligned labels for each sentence
|
49 |
+
for sentence, labels in zip(sentences, aligned_labels):
|
50 |
+
print(f"Sentence: {sentence}\nLabels: {labels}\n")
|