stock / analysis /sentiment.py
feliponi's picture
Release 0.002
2293f58
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.stats import zscore
class SentimentAnalyzer:
def __init__(self):
self.models = {
'finbert': AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone"),
'financial_sentiment': AutoModelForSequenceClassification.from_pretrained("ahmedrachid/FinancialBERT-Sentiment-Analysis")
}
self.tokenizers = {
'finbert': AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone"),
'financial_sentiment': AutoTokenizer.from_pretrained("ahmedrachid/FinancialBERT-Sentiment-Analysis")
}
self.max_length = 512 # Limite do modelo
def chunk_text(self, text, tokenizer):
tokens = tokenizer.encode(text, truncation=False)
return [tokens[i:i+self.max_length] for i in range(0, len(tokens), self.max_length)]
def preprocess_text(self, item):
title = str(item.get('title', '')).strip()
content = str(item.get('content', '')).strip()
text = f"{title} {content}".strip()
return text if text else None
def analyze(self, news):
if not news:
return {'negative': 0.33, 'neutral': 0.33, 'positive': 0.33}
sentiment_scores = []
for item in news:
if not isinstance(item, dict):
continue
text = self.preprocess_text(item)
if not text:
continue
tokenizer = self.tokenizers['financial_sentiment']
model = self.models['financial_sentiment']
tokenized_chunks = self.chunk_text(text, tokenizer)
chunk_scores = []
for chunk in tokenized_chunks:
inputs = tokenizer.decode(chunk, skip_special_tokens=True)
inputs = tokenizer(inputs, return_tensors="pt", truncation=True, max_length=self.max_length)
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
chunk_scores.append(probabilities.detach().numpy()[0])
if chunk_scores:
sentiment_scores.append(np.mean(chunk_scores, axis=0))
if not sentiment_scores:
return {'negative': 0.33, 'neutral': 0.33, 'positive': 0.33}
# Filtro de outliers
filtered_scores = [s for s in sentiment_scores if np.abs(zscore(s)).max() < 2]
avg_sentiment = np.mean(filtered_scores, axis=0) if filtered_scores else np.mean(sentiment_scores, axis=0)
return {'negative': float(avg_sentiment[0]), 'neutral': float(avg_sentiment[1]), 'positive': float(avg_sentiment[2])}