parkerjj commited on
Commit
7c28043
·
1 Parent(s): 78a2dc6

修改启动命令为 Uvicorn,调整 Gunicorn 配置中的工作进程数;添加懒加载机制以优化模型和tokenizer的加载,确保线程安全

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. gunicorn.conf.py +1 -1
  3. preprocess.py +29 -4
Dockerfile CHANGED
@@ -45,4 +45,4 @@ RUN --mount=type=secret,id=HF_Token,mode=0444,required=true \
45
 
46
 
47
  # 修改启动命令,使用配置文件
48
- CMD ["gunicorn", "app:app", "-c", "gunicorn.conf.py"]
 
45
 
46
 
47
  # 修改启动命令,使用配置文件
48
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "3"]
gunicorn.conf.py CHANGED
@@ -5,7 +5,7 @@ bind = "0.0.0.0:7860"
5
 
6
  # 修改工作进程数配置策略
7
  # 对于CPU密集型应用,建议设置为 CPU核心数 + 1
8
- workers = multiprocessing.cpu_count() + 1
9
 
10
  # 每个工作进程的线程数
11
  # 设置为2,增加并发处理能力
 
5
 
6
  # 修改工作进程数配置策略
7
  # 对于CPU密集型应用,建议设置为 CPU核心数 + 1
8
+ workers = 3
9
 
10
  # 每个工作进程的线程数
11
  # 设置为2,增加并发处理能力
preprocess.py CHANGED
@@ -51,14 +51,35 @@ print("Is NPL GPU used Preprocessing.py:", spacy.prefer_gpu())
51
 
52
 
53
  # 使用合适的模型和tokenizer
54
- tokenizer_one = AutoTokenizer.from_pretrained("ProsusAI/finbert")
55
- sa_model_one = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
56
 
57
 
58
- tokenizer_two = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
59
- sa_model_two = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
60
 
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  index_us_stock_index_INX = ak.index_us_stock_sina(symbol=".INX")
64
  index_us_stock_index_DJI = ak.index_us_stock_sina(symbol=".DJI")
@@ -273,6 +294,10 @@ def get_sentiment_score(text):
273
  try:
274
  import torch
275
 
 
 
 
 
276
  # 将长文本分段
277
  segments_one = process_long_text(text, tokenizer_one)
278
  segments_two = process_long_text(text, tokenizer_two)
 
51
 
52
 
53
  # 使用合适的模型和tokenizer
54
+ # tokenizer_one = AutoTokenizer.from_pretrained("ProsusAI/finbert")
55
+ # sa_model_one = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
56
 
57
 
58
+ # tokenizer_two = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
59
+ # sa_model_two = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
60
 
61
+ import multiprocessing
62
 
63
+ # 添加进程锁
64
+ _tokenizer_lock = multiprocessing.Lock()
65
+ _models = {}
66
+
67
+ def get_tokenizer_and_model(model_type="one"):
68
+ """懒加载tokenizer和model"""
69
+ global _models
70
+
71
+ if model_type not in _models:
72
+ with _tokenizer_lock:
73
+ if model_type not in _models: # 双重检查锁定
74
+ if model_type == "one":
75
+ tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
76
+ model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
77
+ else:
78
+ tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
79
+ model = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)
80
+ _models[model_type] = (tokenizer, model)
81
+
82
+ return _models[model_type]
83
 
84
  index_us_stock_index_INX = ak.index_us_stock_sina(symbol=".INX")
85
  index_us_stock_index_DJI = ak.index_us_stock_sina(symbol=".DJI")
 
294
  try:
295
  import torch
296
 
297
+ # 懒加载获取tokenizer和model
298
+ tokenizer_one, sa_model_one = get_tokenizer_and_model("one")
299
+ tokenizer_two, sa_model_two = get_tokenizer_and_model("two")
300
+
301
  # 将长文本分段
302
  segments_one = process_long_text(text, tokenizer_one)
303
  segments_two = process_long_text(text, tokenizer_two)