Update README.md
Browse files
README.md
CHANGED
@@ -44,8 +44,47 @@ To obtain the probabilities for each label (i.e., marketing mix variable), you n
|
|
44 |
|
45 |
IMPORTANT: At the time of writing this description, Huggingface's pipeline did not support multi-label classifiers.
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
### Citation
|
48 |
-
|
49 |
```
|
50 |
Ringel, Daniel, Creating Synthetic Experts with Generative Artificial Intelligence (July 15, 2023). Available at SSRN: https://ssrn.com/abstract=4542949
|
51 |
```
|
|
|
44 |
|
45 |
IMPORTANT: At the time of writing this description, Huggingface's pipeline did not support multi-label classifiers.
|
46 |
|
47 |
+
### Quickstart
|
48 |
+
|
49 |
+
```python
|
50 |
+
# Imports
|
51 |
+
import pandas as pd, numpy as np, warnings, torch, re
|
52 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
53 |
+
from bs4 import BeautifulSoup
|
54 |
+
warnings.filterwarnings("ignore", category=UserWarning, module='bs4')
|
55 |
+
|
56 |
+
# Helper Functions
|
57 |
+
def clean_and_parse_tweet(tweet):
|
58 |
+
tweet = re.sub(r"https?://\S+|www\.\S+", " URL ", tweet)
|
59 |
+
parsed = BeautifulSoup(tweet, "html.parser").get_text() if "filename" not in str(BeautifulSoup(tweet, "html.parser")) else None
|
60 |
+
return re.sub(r" +", " ", re.sub(r'^[.:]+', '', re.sub(r"\\n+|\n+", " ", parsed or tweet)).strip()) if parsed else None
|
61 |
+
|
62 |
+
def predict_tweet(tweet, model, tokenizer, device, threshold=0.5):
|
63 |
+
inputs = tokenizer(tweet, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
|
64 |
+
probs = torch.sigmoid(model(**inputs).logits).detach().cpu().numpy()[0]
|
65 |
+
return probs, [id2label[i] for i, p in enumerate(probs) if id2label[i] in {'Product', 'Place', 'Price', 'Promotion'} and p >= threshold]
|
66 |
+
|
67 |
+
# Setup
|
68 |
+
device = "mps" if torch.backends.mps.is_built() and torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
|
69 |
+
synxp = "dmr76/mmx_classifier_microblog_ENv02"
|
70 |
+
model = AutoModelForSequenceClassification.from_pretrained(synxp).to(device)
|
71 |
+
tokenizer = AutoTokenizer.from_pretrained(synxp)
|
72 |
+
id2label = model.config.id2label
|
73 |
+
|
74 |
+
# ---->>> Define your Tweet <<<----
|
75 |
+
tweet = "Best cushioning ever!!! 🤗🤗🤗 my zoom vomeros are the bomb🏃🏽♀️💨!!! \n @nike #run #training https://randomurl.ai"
|
76 |
+
|
77 |
+
# Clean and Predict
|
78 |
+
cleaned_tweet = clean_and_parse_tweet(tweet)
|
79 |
+
probs, labels = predict_tweet(cleaned_tweet, model, tokenizer, device)
|
80 |
+
|
81 |
+
# Print Labels and Probabilities
|
82 |
+
print("Please don't forget to cite the paper: https://ssrn.com/abstract=4542949")
|
83 |
+
print(labels, probs)
|
84 |
+
```
|
85 |
+
|
86 |
### Citation
|
87 |
+
Please cite the following reference if you use synthetic experts in your work:
|
88 |
```
|
89 |
Ringel, Daniel, Creating Synthetic Experts with Generative Artificial Intelligence (July 15, 2023). Available at SSRN: https://ssrn.com/abstract=4542949
|
90 |
```
|