Spaces:
Running
Running
修改启动命令为 Uvicorn,调整 Gunicorn 配置中的工作进程数;添加懒加载机制以优化模型和tokenizer的加载,确保线程安全
Browse files- Dockerfile +1 -1
- gunicorn.conf.py +1 -1
- preprocess.py +29 -4
Dockerfile
CHANGED
@@ -45,4 +45,4 @@ RUN --mount=type=secret,id=HF_Token,mode=0444,required=true \
|
|
45 |
|
46 |
|
47 |
# 修改启动命令,使用配置文件
|
48 |
-
CMD ["
|
|
|
45 |
|
46 |
|
47 |
# 修改启动命令,使用配置文件
|
48 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "3"]
|
gunicorn.conf.py
CHANGED
@@ -5,7 +5,7 @@ bind = "0.0.0.0:7860"
|
|
5 |
|
6 |
# 修改工作进程数配置策略
|
7 |
# 对于CPU密集型应用,建议设置为 CPU核心数 + 1
|
8 |
-
workers =
|
9 |
|
10 |
# 每个工作进程的线程数
|
11 |
# 设置为2,增加并发处理能力
|
|
|
5 |
|
6 |
# 修改工作进程数配置策略
|
7 |
# 对于CPU密集型应用,建议设置为 CPU核心数 + 1
|
8 |
+
workers = 3
|
9 |
|
10 |
# 每个工作进程的线程数
|
11 |
# 设置为2,增加并发处理能力
|
preprocess.py
CHANGED
@@ -51,14 +51,35 @@ print("Is NPL GPU used Preprocessing.py:", spacy.prefer_gpu())
|
|
51 |
|
52 |
|
53 |
# 使用合适的模型和tokenizer
|
54 |
-
tokenizer_one = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
55 |
-
sa_model_one = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
56 |
|
57 |
|
58 |
-
tokenizer_two = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
|
59 |
-
sa_model_two = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
|
60 |
|
|
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
index_us_stock_index_INX = ak.index_us_stock_sina(symbol=".INX")
|
64 |
index_us_stock_index_DJI = ak.index_us_stock_sina(symbol=".DJI")
|
@@ -273,6 +294,10 @@ def get_sentiment_score(text):
|
|
273 |
try:
|
274 |
import torch
|
275 |
|
|
|
|
|
|
|
|
|
276 |
# 将长文本分段
|
277 |
segments_one = process_long_text(text, tokenizer_one)
|
278 |
segments_two = process_long_text(text, tokenizer_two)
|
|
|
51 |
|
52 |
|
53 |
# 使用合适的模型和tokenizer
|
54 |
+
# tokenizer_one = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
55 |
+
# sa_model_one = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
56 |
|
57 |
|
58 |
+
# tokenizer_two = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
|
59 |
+
# sa_model_two = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
|
60 |
|
61 |
+
import multiprocessing
|
62 |
|
63 |
+
# 添加进程锁
|
64 |
+
_tokenizer_lock = multiprocessing.Lock()
|
65 |
+
_models = {}
|
66 |
+
|
67 |
+
def get_tokenizer_and_model(model_type="one"):
|
68 |
+
"""懒加载tokenizer和model"""
|
69 |
+
global _models
|
70 |
+
|
71 |
+
if model_type not in _models:
|
72 |
+
with _tokenizer_lock:
|
73 |
+
if model_type not in _models: # 双重检查锁定
|
74 |
+
if model_type == "one":
|
75 |
+
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
76 |
+
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
77 |
+
else:
|
78 |
+
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
|
79 |
+
model = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)
|
80 |
+
_models[model_type] = (tokenizer, model)
|
81 |
+
|
82 |
+
return _models[model_type]
|
83 |
|
84 |
index_us_stock_index_INX = ak.index_us_stock_sina(symbol=".INX")
|
85 |
index_us_stock_index_DJI = ak.index_us_stock_sina(symbol=".DJI")
|
|
|
294 |
try:
|
295 |
import torch
|
296 |
|
297 |
+
# 懒加载获取tokenizer和model
|
298 |
+
tokenizer_one, sa_model_one = get_tokenizer_and_model("one")
|
299 |
+
tokenizer_two, sa_model_two = get_tokenizer_and_model("two")
|
300 |
+
|
301 |
# 将长文本分段
|
302 |
segments_one = process_long_text(text, tokenizer_one)
|
303 |
segments_two = process_long_text(text, tokenizer_two)
|