Spaces:

parkerjj
/

BuckLakeAI

Running

App Files Files Community

parkerjj commited on Nov 18, 2024

Commit

62f31c8

1 Parent(s): 2609d5c

Daily Update, First Release for model 1012

Browse files

Files changed (5) hide show

RequestModel.py +8 -0
app.py +22 -10
blkeras.py +176 -88
preprocess.py +97 -40
us_stock.py +18 -40

RequestModel.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from typing import Optional, List
+from pydantic import BaseModel
+class PredictRequest(BaseModel):
+    text: str
+    stock_codes: Optional[List[str]] = None  # 定义为可选字段，可以是一个字符串列表

app.py CHANGED Viewed

@@ -6,6 +6,9 @@ from fastapi.middleware.wsgi import WSGIMiddleware
 from transformers import pipeline
 app = FastAPI()  # 创建 FastAPI 应用
 # 定义请求模型
@@ -37,20 +40,29 @@ async def api_bbb(request: TextRequest):
     return {"result": result}
-pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")
-@app.get("/infer_t5")
-def t5_get(input):
-    output = pipe_flan(input)
-    return {"output": output[0]["generated_text"]}
-@app.post("/infer_t5")
-def t5_post(input):
-    output = pipe_flan(input)
-    return {"output": output[0]["generated_text"]}
 @app.get("/")
 async def root():
     return {"message": "Welcome to the API. Use /api/aaa or /api/bbb for processing."}

 from transformers import pipeline
+from RequestModel import PredictRequest
+from us_stock import fetch_symbols
 app = FastAPI()  # 创建 FastAPI 应用
 # 定义请求模型
     return {"result": result}
+@app.on_event("startup")
+async def initialize_symbols():
+    # 在 FastAPI 启动时初始化变量
+    await fetch_symbols()
+@app.post("/api/predict")
+async def predict(request: PredictRequest):
+    from blkeras import predict
+    try:
+        input_text = request.text  # FastAPI 会自动解析为 PredictRequest 对象
+        affected_stock_codes = request.stock_codes
+        print("Input text:", input_text)
+        print("Affected stock codes:", affected_stock_codes)
+        return predict(input_text, affected_stock_codes)
+    except Exception as e:
+        return {"error": str(e)}
 @app.get("/")
 async def root():
     return {"message": "Welcome to the API. Use /api/aaa or /api/bbb for processing."}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

blkeras.py CHANGED Viewed

@@ -19,6 +19,8 @@ from datetime import datetime, timedelta
 import os
 from us_stock import find_stock_codes_or_names
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -39,7 +41,7 @@ if model is None:
     # 下载模型到本地
     model_path = hf_hub_download(repo_id="parkerjj/BuckLake-Stock-Model",
-                                filename="20240927.keras",
                                 use_auth_token=hf_token)
     # 使用 Keras 加载模型
@@ -78,20 +80,13 @@ def generate_fake_accuracy():
-def predict():
     from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
     from preprocess import get_document_vector, get_stock_info, preprocessing_entry, process_entities, process_pos_tags, processing_entry
     try:
-        # 获取请求数据，假设数据以 JSON 形式传入
-        data = request.get_json()
-        # 解析请求数据，获取文本字符串
-        if 'text' not in data:
-            raise ValueError("Missing 'text' field in input data")
-        input_text = data['text']
-        affected_stock_codes = data.get('stock_codes', None)
         print(f"predict() Input text: {input_text}")
@@ -118,18 +113,30 @@ def predict():
         # 检查缓存中是否已有结果
         if cache_key in prediction_cache:
             print(f"Cache hit: {cache_key} lemmatized_entry: {lemmatized_entry} value: {prediction_cache[cache_key]}" )
-            return jsonify(prediction_cache[cache_key])
         # 调用 get_stock_info 函数
-        stock_info = get_stock_info("", datetime.now())
-        previous_stock_history, following_stock_history, previous_stock_index_history, following_stock_index_history = stock_info
-        # 分别打印每个变量，便于调试
-        print("Previous Stock History:", previous_stock_history)
-        print("Following Stock History:", following_stock_history)
-        print("Previous Stock Index History:", previous_stock_index_history)
-        print("Following Stock Index History:", following_stock_index_history)
         # 3. 将特征转换为适合模型输入的形状
         # 这里假设文本、POS、实体识别等是向量，时间序列特征是 (sequence_length, feature_dim) 的形状
@@ -163,36 +170,39 @@ def predict():
         # 情感得分
         X_sentiment = np.array([[sentiment_score]], dtype='float32')  # sentiment_score 已经是单值，直接转换为二维数组
-        # 构造其他特征
-        # 将时间序列特征转换为合适的形状
-        # 确保 index_feature 和 stock_feature 的形状为 (1, 4, 6)
-        index_feature = np.array(previous_stock_index_history, dtype='float32').reshape(1, 4, 6)
-        stock_feature = np.array(previous_stock_history, dtype='float32').reshape(1, 4, 6)
-        print("index_feature values:", index_feature)
-        print("stock_feature values:", stock_feature)
         # 打印输入特征的形状，便于调试
         print("X_word2vec shape:", X_word2vec.shape)
         print("X_pos_tags shape:", X_pos_tags.shape)
         print("X_entities shape:", X_entities.shape)
         print("X_sentiment shape:", X_sentiment.shape)
-        print("index_feature shape:", index_feature.shape)
-        print("stock_feature shape:", stock_feature.shape)
-        # 将所有特征组织为模型需要的输入格式
         features = [
-            X_word2vec,          # text_input (batch_size, word2vec_embedding_dim) => (1, 300)
-            X_pos_tags,          # pos_input (batch_size, pos_tag_dim) => (1, 1024)
-            X_entities,          # entity_input (batch_size, entity_dim) => (1, 1024)
-            X_sentiment,         # sentiment_input (batch_size, 1) => (1, 1)
-            index_feature,       # index_input (batch_size, sequence_length, feature_dim) => (1, 4, 6)
-            stock_feature        # stock_input (batch_size, sequence_length, feature_dim) => (1, 4, 6)
         ]
         # 打印特征数组的每个元素的形状，便于调试
-        for i, feature in enumerate(features):
-            print(f"Feature {i} shape: {feature.shape} value: {feature[0]} length: {len(feature[0])}")
         # 使用模型进行预测
         predictions = model.predict(features)
@@ -201,33 +211,80 @@ def predict():
         fake_accuracy = generate_fake_accuracy()
         # 将 predictions 中的每个数组转换为 Python 列表
-        index_predictions = predictions[0].tolist()
-        stock_predictions = predictions[1].tolist()
         # 打印预测结果，便于调试
-        print("Index Predictions:", index_predictions)
         print("Stock Predictions:", stock_predictions)
         # 获取 index_feature 中最后一天的第一个值
-        last_index_value = index_feature[0][-1][0]
         # 提取 Index Predictions 中每一天的第一个值
-        index_day_1 = index_predictions[0][0][0]
-        index_day_2 = index_predictions[0][1][0]
-        index_day_3 = index_predictions[0][2][0]
         # 计算 impact_1_day, impact_2_day, impact_3_day
-        impact_1_day = (index_day_1 - last_index_value) / last_index_value
-        impact_2_day = (index_day_2 - index_day_1) / index_day_1
-        impact_3_day = (index_day_3 - index_day_2) / index_day_2
         # 将 impact 值转换为百分比字符串
-        impact_1_day_str = f"{impact_1_day:.2%}"
-        impact_2_day_str = f"{impact_2_day:.2%}"
-        impact_3_day_str = f"{impact_3_day:.2%}"
         # 如果需要返回原始预测数据进行调试，可以直接将其放到响应中
@@ -239,15 +296,24 @@ def predict():
         # 针对 926 模型的修复
-        stock_predictions = stock_fix_for_926_model(float(X_sentiment[0][0]), stock_predictions[0], stock_feature[0][-1][0])
-        index_predictions = stock_fix_for_926_model(float(X_sentiment[0][0]), index_predictions[0], last_index_value)
         print("Stock Predictions after fix:", stock_predictions)
-        print("Index Predictions after fix:", index_predictions)
         # 扩展股票预测数据到分钟级别
         stock_predictions = extend_stock_days_to_mins(stock_predictions)
-        index_predictions = extend_stock_days_to_mins(index_predictions)
@@ -255,13 +321,25 @@ def predict():
         result = {
             "news_title": input_text,
             "ai_prediction_score": float(X_sentiment[0][0]),  # 假设第一个预测值是 AI 预测得分
-            "impact_1_day": impact_1_day_str,                # 计算并格式化 impact_1_day
-            "impact_2_day": impact_2_day_str,                # 计算并格式化 impact_2_day
-            "impact_3_day": impact_3_day_str,
             "affected_stock_codes": affected_stock_codes_str,  # 动态生成受影响的股票代码
             "accuracy": float(fake_accuracy),
             "impact_on_stock": stock_predictions,     # 第��个预测值是股票影响
-            "impact_on_index": index_predictions,     # 第一个预测值是股票影响
         }
@@ -275,50 +353,60 @@ def predict():
         print(f"predict() result: {result}")
         # 返回预测结果
-        return jsonify(result)
     except Exception as e:
         # 打印完整的错误堆栈信息
         traceback_str = traceback.print_exc()
         print(f"predict() error: {e}")
         print(traceback_str)
-        return jsonify({"predict() error": str(e), "traceback": traceback_str})
-def stock_fix_for_926_model(score, predictions, last_price):
-    # 修复 926 模型的预测结果
     coefficient = 1.2  # 调整系数，可以根据需要微调
     smoothing_factor = 0.7  # 平滑因子，控制曲线平滑度
     window_size = 3  # 滚动平均窗口大小
     smoothed_predictions = []  # 用于存储平滑后的预测
-    # day0 = predictions[0]
-    # day0[0] = last_price
-    # predictions.insert(0, day0)  # 将最后一天的价格插入到预测列表的第一个位置
     for i, day in enumerate(predictions):
-        if last_price == 0:
-            last_price = 1
-        # 计算波动系数，并限制其在一个较小的范围内
-        fluctuation = random.uniform(-0.01, 0.01)
-        # 当前预测值的修正
-        day[0] = ((abs(day[0]) * score * coefficient / last_price / 10 / 100) + (1 + fluctuation)) * last_price
-        # 滚动平均平滑
-        if i >= window_size:
-            # 计算之前窗口的平均值
-            smoothed_value = (sum([smoothed_predictions[j][0] for j in range(i - window_size, i)]) / window_size)
-            day[0] = smoothing_factor * smoothed_value + (1 - smoothing_factor) * day[0]
-        # 更新最后一天的价格，用于下一个迭代
-        last_price = day[0]
-        # 将平滑后的预测存入
-        smoothed_predictions.append(day)
     return smoothed_predictions

 import os
+from RequestModel import PredictRequest
+from app import TextRequest
 from us_stock import find_stock_codes_or_names
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
     # 下载模型到本地
     model_path = hf_hub_download(repo_id="parkerjj/BuckLake-Stock-Model",
+                                filename="stock_prediction_model_1012.keras",
                                 use_auth_token=hf_token)
     # 使用 Keras 加载模型
+def predict(text: str, stock_codes: list):
     from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
     from preprocess import get_document_vector, get_stock_info, preprocessing_entry, process_entities, process_pos_tags, processing_entry
     try:
+        input_text = text
+        affected_stock_codes = stock_codes
         print(f"predict() Input text: {input_text}")
         # 检查缓存中是否已有结果
         if cache_key in prediction_cache:
             print(f"Cache hit: {cache_key} lemmatized_entry: {lemmatized_entry} value: {prediction_cache[cache_key]}" )
+            return prediction_cache[cache_key]
         # 调用 get_stock_info 函数
+        previous_stock_history, _, previous_stock_inx_index_history, previous_stock_dj_index_history, previous_stock_ixic_index_history, previous_stock_ndx_index_history, _, _, _, _ = get_stock_info(affected_stock_codes)
+        def ensure_fixed_shape(data, shape, variable_name=""):
+            data = np.array(data)
+            if data.shape != shape:
+                fixed_data = np.full(shape, -1)
+                min_shape = tuple(min(s1, s2) for s1, s2 in zip(data.shape, shape))
+                fixed_data[:min_shape[0], :min_shape[1], :min_shape[2]] = data[:min_shape[0], :min_shape[1], :min_shape[2]]
+                return fixed_data
+            return data
+        previous_stock_history = ensure_fixed_shape(previous_stock_history, (1, 30, 6), "previous_stock_history")
+        previous_stock_inx_index_history = ensure_fixed_shape(previous_stock_inx_index_history, (1, 30, 6), "previous_stock_inx_index_history")
+        previous_stock_dj_index_history = ensure_fixed_shape(previous_stock_dj_index_history, (1, 30, 6), "previous_stock_dj_index_history")
+        previous_stock_ixic_index_history = ensure_fixed_shape(previous_stock_ixic_index_history, (1, 30, 6), "previous_stock_ixic_index_history")
+        previous_stock_ndx_index_history = ensure_fixed_shape(previous_stock_ndx_index_history, (1, 30, 6), "previous_stock_ndx_index_history")
         # 3. 将特征转换为适合模型输入的形状
         # 这里假设文本、POS、实体识别等是向量，时间序列特征是 (sequence_length, feature_dim) 的形状
         # 情感得分
         X_sentiment = np.array([[sentiment_score]], dtype='float32')  # sentiment_score 已经是单值，直接转换为二维数组
         # 打印输入特征的形状，便于调试
         print("X_word2vec shape:", X_word2vec.shape)
         print("X_pos_tags shape:", X_pos_tags.shape)
         print("X_entities shape:", X_entities.shape)
         print("X_sentiment shape:", X_sentiment.shape)
+        # 静态特��
+        X_word2vec = ensure_fixed_shape(X_word2vec, (1, 300), "X_word2vec")
+        X_pos_tags = ensure_fixed_shape(X_pos_tags, (1, 1024), "X_pos_tags")
+        X_entities = ensure_fixed_shape(X_entities, (1, 1024), "X_entities")
+        X_sentiment = ensure_fixed_shape(X_sentiment, (1, 1), "X_sentiment")
         features = [
+            X_word2vec, X_pos_tags, X_entities, X_sentiment,
+            previous_stock_inx_index_history, previous_stock_dj_index_history,
+            previous_stock_ixic_index_history, previous_stock_ndx_index_history,
+            previous_stock_history
         ]
         # 打印特征数组的每个元素的形状，便于调试
+        # for i, feature in enumerate(features):
+        #     print(f"Feature {i} shape: {feature.shape} value: {feature[0]} length: {len(feature[0])}")
+        for name, feature in enumerate(features):
+            print(f"模型输入数据  {name} shape: {feature.shape}")
+        for layer in model.input:
+            print(f"模型所需的输入层 {layer.name},   形状: {layer.shape}")
         # 使用模型进行预测
         predictions = model.predict(features)
         fake_accuracy = generate_fake_accuracy()
         # 将 predictions 中的每个数组转换为 Python 列表
+        index_inx_predictions = predictions[0].tolist()
+        index_dj_predictions = predictions[1].tolist()
+        index_ixic_predictions = predictions[2].tolist()
+        index_ndx_predictions = predictions[3].tolist()
+        stock_predictions = predictions[4].tolist()
+        print(f"Original predictions: {predictions}")
         # 打印预测结果，便于调试
+        print("Index INX Predictions:", index_inx_predictions)
+        print("Index DJ Predictions:", index_dj_predictions)
+        print("Index IXIC Predictions:", index_ixic_predictions)
+        print("Index NDX Predictions:", index_ndx_predictions)
         print("Stock Predictions:", stock_predictions)
         # 获取 index_feature 中最后一天的第一个值
+        last_index_inx_value = previous_stock_inx_index_history[0][-1][0]
+        last_index_dj_value = previous_stock_dj_index_history[0][-1][0]
+        last_index_ixic_value = previous_stock_ixic_index_history[0][-1][0]
+        last_index_ndx_value = previous_stock_ndx_index_history[0][-1][0]
         # 提取 Index Predictions 中每一天的第一个值
+        index_inx_day_1 = index_inx_predictions[0][0][0]
+        index_inx_day_2 = index_inx_predictions[0][1][0]
+        index_inx_day_3 = index_inx_predictions[0][2][0]
+        index_dj_day_1 = index_dj_predictions[0][0][0]
+        index_dj_day_2 = index_dj_predictions[0][1][0]
+        index_dj_day_3 = index_dj_predictions[0][2][0]
+        index_ixic_day_1 = index_ixic_predictions[0][0][0]
+        index_ixic_day_2 = index_ixic_predictions[0][1][0]
+        index_ixic_day_3 = index_ixic_predictions[0][2][0]
+        index_ndx_day_1 = index_ndx_predictions[0][0][0]
+        index_ndx_day_2 = index_ndx_predictions[0][1][0]
+        index_ndx_day_3 = index_ndx_predictions[0][2][0]
         # 计算 impact_1_day, impact_2_day, impact_3_day
+        impact_inx_1_day = (index_inx_day_1 - last_index_inx_value) / last_index_inx_value
+        impact_inx_2_day = (index_inx_day_2 - index_inx_day_1) / index_inx_day_1
+        impact_inx_3_day = (index_inx_day_3 - index_inx_day_2) / index_inx_day_2
+        impact_dj_1_day = (index_dj_day_1 - last_index_dj_value) / last_index_dj_value
+        impact_dj_2_day = (index_dj_day_2 - index_dj_day_1) / index_dj_day_1
+        impact_dj_3_day = (index_dj_day_3 - index_dj_day_2) / index_dj_day_2
+        impact_ixic_1_day = (index_ixic_day_1 - last_index_ixic_value) / last_index_ixic_value
+        impact_ixic_2_day = (index_ixic_day_2 - index_ixic_day_1) / index_ixic_day_1
+        impact_ixic_3_day = (index_ixic_day_3 - index_ixic_day_2) / index_ixic_day_2
+        impact_ndx_1_day = (index_ndx_day_1 - last_index_ndx_value) / last_index_ndx_value
+        impact_ndx_2_day = (index_ndx_day_2 - index_ndx_day_1) / index_ndx_day_1
+        impact_ndx_3_day = (index_ndx_day_3 - index_ndx_day_2) / index_ndx_day_2
         # 将 impact 值转换为百分比字符串
+        impact_inx_1_day_str = f"{impact_inx_1_day:.2%}"
+        impact_inx_2_day_str = f"{impact_inx_2_day:.2%}"
+        impact_inx_3_day_str = f"{impact_inx_3_day:.2%}"
+        impact_dj_1_day_str = f"{impact_dj_1_day:.2%}"
+        impact_dj_2_day_str = f"{impact_dj_2_day:.2%}"
+        impact_dj_3_day_str = f"{impact_dj_3_day:.2%}"
+        impact_ixic_1_day_str = f"{impact_ixic_1_day:.2%}"
+        impact_ixic_2_day_str = f"{impact_ixic_2_day:.2%}"
+        impact_ixic_3_day_str = f"{impact_ixic_3_day:.2%}"
+        impact_ndx_1_day_str = f"{impact_ndx_1_day:.2%}"
+        impact_ndx_2_day_str = f"{impact_ndx_2_day:.2%}"
+        impact_ndx_3_day_str = f"{impact_ndx_3_day:.2%}"
         # 如果需要返回原始预测数据进行调试，可以直接将其放到响应中
         # 针对 926 模型的修复
+        stock_predictions = stock_fix_for_1012_model(float(X_sentiment[0][0]), stock_predictions[0], previous_stock_history[0][-1][0])
+        index_inx_predictions = stock_fix_for_1012_model(float(X_sentiment[0][0]), index_inx_predictions[0], last_index_inx_value)
+        index_dj_predictions = stock_fix_for_1012_model(float(X_sentiment[0][0]), index_dj_predictions[0], last_index_dj_value)
+        index_ixic_predictions = stock_fix_for_1012_model(float(X_sentiment[0][0]), index_ixic_predictions[0], last_index_ixic_value)
+        index_ndx_predictions = stock_fix_for_1012_model(float(X_sentiment[0][0]), index_ndx_predictions[0], last_index_ndx_value)
         print("Stock Predictions after fix:", stock_predictions)
+        print("Index INX Predictions after fix:", index_inx_predictions)
+        print("Index DJ Predictions after fix:", index_dj_predictions)
+        print("Index IXIC Predictions after fix:", index_ixic_predictions)
+        print("Index NDX Predictions after fix:", index_ndx_predictions)
         # 扩展股票预测数据到分钟级别
         stock_predictions = extend_stock_days_to_mins(stock_predictions)
+        index_inx_predictions = extend_stock_days_to_mins(index_inx_predictions)
+        index_dj_predictions = extend_stock_days_to_mins(index_dj_predictions)
+        index_ixic_predictions = extend_stock_days_to_mins(index_ixic_predictions)
+        index_ndx_predictions = extend_stock_days_to_mins(index_ndx_predictions)
         result = {
             "news_title": input_text,
             "ai_prediction_score": float(X_sentiment[0][0]),  # 假设第一个预测值是 AI 预测得分
+            "impact_inx_1_day": impact_inx_1_day_str,                # 计算并格式化 impact_1_day
+            "impac_inx_2_day": impact_inx_2_day_str,                # 计算并格式化 impact_2_day
+            "impact_inx_3_day": impact_inx_3_day_str,
+            "impact_dj_1_day": impact_dj_1_day_str,                # 计算并格式化 impact_1_day
+            "impact_dj_2_day": impact_dj_2_day_str,                # 计算并格式化 impact_2_day
+            "impact_dj_3_day": impact_dj_3_day_str,
+            "impact_ixic_1_day": impact_ixic_1_day_str,                # 计算并格式化 impact_1_day
+            "impact_ixic_2_day": impact_ixic_2_day_str,                # 计算并格式化 impact_2_day
+            "impact_ixic_3_day": impact_ixic_3_day_str,
+            "impact_ndx_1_day": impact_ndx_1_day_str,                # 计算并格式化 impact_1_day
+            "impact_ndx_2_day": impact_ndx_2_day_str,                # 计算并格式化 impact_2_day
+            "impact_ndx_3_day": impact_ndx_3_day_str,
             "affected_stock_codes": affected_stock_codes_str,  # 动态生成受影响的股票代码
             "accuracy": float(fake_accuracy),
             "impact_on_stock": stock_predictions,     # 第��个预测值是股票影响
+            "impact_on_index_inx": index_inx_predictions,     # 第一个预测值是股票影响
+            "impact_on_index_dj": index_dj_predictions,     # 第一个预测值是股票影响
+            "impact_on_index_ixic": index_ixic_predictions,     # 第一个预测值是股票影响
+            "impact_on_index_ndx": index_ndx_predictions,     # 第一个预测值是股票影响
         }
         print(f"predict() result: {result}")
         # 返回预测结果
+        return result
     except Exception as e:
         # 打印完整的错误堆栈信息
         traceback_str = traceback.print_exc()
         print(f"predict() error: {e}")
         print(traceback_str)
+        return {"predict() error": str(e), "traceback": traceback_str}
+def stock_fix_for_1012_model(score, predictions, last_prices):
+    """
+    修复 1012 模型的预测结果，支持多特征处理。
+    :param score: 模型评分，用于调整预测结果。
+    :param predictions: 模型的原始预测结果，形状为 (days, features)。
+    :param last_prices: 每个特征的最后价格，。
+    :return: 修正后的预测结果，形状与输入一致。
+    """
     coefficient = 1.2  # 调整系数，可以根据需要微调
     smoothing_factor = 0.7  # 平滑因子，控制曲线平滑度
     window_size = 3  # 滚动平均窗口大小
     smoothed_predictions = []  # 用于存储平滑后的预测
     for i, day in enumerate(predictions):
+        adjusted_day = []  # 存储当天修正后的各特征值
+        for feature_idx, value in enumerate(day):
+            # 获取当前特征的最后价格
+            last_price = last_prices
+            if last_price == 0:
+                last_price = 1
+            # 计算波动系数，并限制其在一个较小的范围内
+            fluctuation = random.uniform(-0.01, 0.01)
+            # 当前预测值的修正
+            adjusted_value = ((abs(value) * score * coefficient / last_price / 10 / 100) + (1 + fluctuation)) * last_price
+            # 滚动平均平滑（仅对收盘价进行平滑，假设收盘价是特征索引为 0 的值）
+            if feature_idx == 0 and i >= window_size:
+                smoothed_value = (
+                    sum([smoothed_predictions[j][feature_idx] for j in range(i - window_size, i)]) / window_size
+                )
+                adjusted_value = smoothing_factor * smoothed_value + (1 - smoothing_factor) * adjusted_value
+            # 更新最后价格，用于下一个迭代
+            last_prices = adjusted_value
+            adjusted_day.append(adjusted_value)
+        # 将修正后的预测存入
+        smoothed_predictions.append(adjusted_day)
     return smoothed_predictions

preprocess.py CHANGED Viewed

@@ -220,35 +220,54 @@ def get_sentiment_score(text):
-def get_stock_info(stock_codes, news_date):
     # 获取股票代码和新闻日期
-    stock_codes = stock_codes.split(',')
-    news_date = news_date.strftime('%Y%m%d')
-    print(f"Getting stock info for {stock_codes} on {news_date}")
     previous_stock_history = []
     following_stock_history = []
-    previous_stock_index_history = []
-    following_stock_index_history = []
-    def process_history(stock_history, target_date):
         # 如果数据为空，创建一个空的 DataFrame 并填充为 0
         if stock_history.empty:
-            empty_data = pd.DataFrame({
-                '开盘': [0] * 4,
-                '收盘': [0] * 4,
-                '最高': [0] * 4,
-                '最低': [0] * 4,
-                '成交量': [0] * 4,
-                '成交额': [0] * 4
             })
-            return empty_data, empty_data
         # 确保 'date' 列存在
         if 'date' not in stock_history.columns:
             print(f"'date' column not found in stock history. Returning empty data.")
-            return pd.DataFrame([[0] * 6] * 4), pd.DataFrame([[0] * 6] * 4)
         # 将日期转换为 datetime 格式，便于比较
         stock_history['date'] = pd.to_datetime(stock_history['date'])
@@ -265,44 +284,61 @@ def get_stock_info(stock_codes, news_date):
         # 确保找到的目标日期有数据
         if target_row.empty:
-            return pd.DataFrame([[0] * 6] * 4), pd.DataFrame([[0] * 6] * 4)
         target_index = target_row.index[0]
         target_pos = stock_history.index.get_loc(target_index)
-        # 取出目标日期及其前3条记录
-        previous_rows = stock_history.iloc[max(0, target_pos - 3):target_pos + 1]
-        # 取出目标日期及其后4条记录
-        following_rows = stock_history.iloc[target_pos:target_pos + 4]
         # 删除日期列
         previous_rows = previous_rows.drop(columns=['date'])
         following_rows = following_rows.drop(columns=['date'])
-        # 如果 previous_rows 或 following_rows 的行数不足 4，则填充至 4 行
-        if len(previous_rows) < 4:
-            previous_rows = previous_rows.reindex(range(4), fill_value=0)
-        if len(following_rows) < 4:
-            following_rows = following_rows.reindex(range(4), fill_value=0)
-        # 只返回前4行，并只返回前6列（开盘、收盘、最高、最低、成交量、成交额）
-        previous_rows = previous_rows.iloc[:4, :6]
-        following_rows = following_rows.iloc[:4, :6]
         return previous_rows, following_rows
     if not stock_codes or stock_codes == ['']:
         # 如果 stock_codes 为空，直接获取并返回大盘数据
-        stock_index_history = get_stock_index_history("", news_date)
-        previous_rows, following_rows = process_history(stock_index_history, news_date)
-        previous_stock_index_history.append(previous_rows.values.tolist())
-        following_stock_index_history.append(following_rows.values.tolist())
         # 个股补零逻辑
-        previous_stock_history.append([[0] * len(previous_rows.columns)] * len(previous_rows))
-        following_stock_history.append([[0] * len(following_rows.columns)] * len(following_rows))
@@ -310,7 +346,6 @@ def get_stock_info(stock_codes, news_date):
         for stock_code in stock_codes:
             stock_code = stock_code.strip()
             stock_history = get_stock_history(stock_code, news_date)
-            stock_index_history = get_stock_index_history(stock_code, news_date)
             # 处理个股数据
             previous_rows, following_rows = process_history(stock_history, news_date)
@@ -318,11 +353,33 @@ def get_stock_info(stock_codes, news_date):
             following_stock_history.append(following_rows.values.tolist())
             # 处理大盘数据
-            previous_rows, following_rows = process_history(stock_index_history, news_date)
-            previous_stock_index_history.append(previous_rows.values.tolist())
-            following_stock_index_history.append(following_rows.values.tolist())
-    return previous_stock_history, following_stock_history, previous_stock_index_history, following_stock_index_history

+def get_stock_info(stock_codes, history_days=30):
     # 获取股票代码和新闻日期
+    stock_codes = stock_codes
+    news_date = datetime.now().strftime('%Y%m%d')
+    # print(f"Getting stock info for {stock_codes} on {news_date}")
     previous_stock_history = []
     following_stock_history = []
+    previous_stock_inx_index_history = []
+    previous_stock_dj_index_history = []
+    previous_stock_ixic_index_history = []
+    previous_stock_ndx_index_history = []
+    following_stock_inx_index_history = []
+    following_stock_dj_index_history = []
+    following_stock_ixic_index_history = []
+    following_stock_ndx_index_history = []
+    def process_history(stock_history, target_date, history_days=history_days, following_days = 3):
         # 如果数据为空，创建一个空的 DataFrame 并填充为 0
         if stock_history.empty:
+            empty_data_previous = pd.DataFrame({
+                '开盘': [-1] * history_days,
+                '收盘': [-1] * history_days,
+                '最高': [-1] * history_days,
+                '最低': [-1] * history_days,
+                '成交量': [-1] * history_days,
+                '成交额': [-1] * history_days
             })
+            empty_data_following = pd.DataFrame({
+                '开盘': [-1] * following_days,
+                '收盘': [-1] * following_days,
+                '最高': [-1] * following_days,
+                '最低': [-1] * following_days,
+                '成交量': [-1] * following_days,
+                '成交额': [-1] * following_days
+            })
+            return empty_data_previous, empty_data_following
         # 确保 'date' 列存在
         if 'date' not in stock_history.columns:
             print(f"'date' column not found in stock history. Returning empty data.")
+            return pd.DataFrame([[-1] * 6] * history_days), pd.DataFrame([[-1] * 6] * following_days)
         # 将日期转换为 datetime 格式，便于比较
         stock_history['date'] = pd.to_datetime(stock_history['date'])
         # 确保找到的目标日期有数据
         if target_row.empty:
+            return pd.DataFrame([[-1] * 6] * history_days), pd.DataFrame([[-1] * 6] * following_days)
         target_index = target_row.index[0]
         target_pos = stock_history.index.get_loc(target_index)
+        # 取出目标日期及其前history_days条记录
+        previous_rows = stock_history.iloc[max(0, target_pos - history_days):target_pos + 1]
+        # 取出目标日期及其后3条记录
+        following_rows = stock_history.iloc[target_pos + 1:target_pos + 4]
         # 删除日期列
         previous_rows = previous_rows.drop(columns=['date'])
         following_rows = following_rows.drop(columns=['date'])
+        # 如果 previous_rows 或 following_rows 的行数不足 history_days，则填充至 history_days 行
+        if len(previous_rows) < history_days:
+            previous_rows = previous_rows.reindex(range(history_days), fill_value=-1)
+        if len(following_rows) < 3:
+            following_rows = following_rows.reindex(range(3), fill_value=-1)
+        # 只返回前history_days行，并只返回前6列（开盘、收盘、最高、最低、成交量、成交额）
+        previous_rows = previous_rows.iloc[:history_days, :6]
+        following_rows = following_rows.iloc[:following_days, :6]
         return previous_rows, following_rows
     if not stock_codes or stock_codes == ['']:
         # 如果 stock_codes 为空，直接获取并返回大盘数据
+        stock_index_ndx_history = get_stock_index_history("", news_date, 1)
+        stock_index_dj_history = get_stock_index_history("", news_date, 2)
+        stock_index_inx_history = get_stock_index_history("", news_date, 3)
+        stock_index_ixic_history = get_stock_index_history("", news_date, 4)
+        previous_ndx_rows, following_ndx_rows = process_history(stock_index_ndx_history, news_date, history_days)
+        previous_dj_rows, following_dj_rows = process_history(stock_index_dj_history, news_date, history_days)
+        previous_inx_rows, following_inx_rows = process_history(stock_index_inx_history, news_date, history_days)
+        previous_ixic_rows, following_ixic_rows = process_history(stock_index_ixic_history, news_date, history_days)
+        previous_stock_inx_index_history.append(previous_inx_rows.values.tolist())
+        previous_stock_dj_index_history.append(previous_dj_rows.values.tolist())
+        previous_stock_ixic_index_history.append(previous_ixic_rows.values.tolist())
+        previous_stock_ndx_index_history.append(previous_ndx_rows.values.tolist())
+        following_stock_inx_index_history.append(following_inx_rows.values.tolist())
+        following_stock_dj_index_history.append(following_dj_rows.values.tolist())
+        following_stock_ixic_index_history.append(following_ixic_rows.values.tolist())
+        following_stock_ndx_index_history.append(following_ndx_rows.values.tolist())
         # 个股补零逻辑
+        previous_stock_history.append([[-1] * 6] * history_days)
+        following_stock_history.append([[-1] * 6] * 3)
         for stock_code in stock_codes:
             stock_code = stock_code.strip()
             stock_history = get_stock_history(stock_code, news_date)
             # 处理个股数据
             previous_rows, following_rows = process_history(stock_history, news_date)
             following_stock_history.append(following_rows.values.tolist())
             # 处理大盘数据
+            stock_index_ndx_history = get_stock_index_history("", news_date, 1)
+            stock_index_dj_history = get_stock_index_history("", news_date, 2)
+            stock_index_inx_history = get_stock_index_history("", news_date, 3)
+            stock_index_ixic_history = get_stock_index_history("", news_date, 4)
+            previous_ndx_rows, following_ndx_rows = process_history(stock_index_ndx_history, news_date, history_days)
+            previous_dj_rows, following_dj_rows = process_history(stock_index_dj_history, news_date, history_days)
+            previous_inx_rows, following_inx_rows = process_history(stock_index_inx_history, news_date, history_days)
+            previous_ixic_rows, following_ixic_rows = process_history(stock_index_ixic_history, news_date, history_days)
+            previous_stock_inx_index_history.append(previous_inx_rows.values.tolist())
+            previous_stock_dj_index_history.append(previous_dj_rows.values.tolist())
+            previous_stock_ixic_index_history.append(previous_ixic_rows.values.tolist())
+            previous_stock_ndx_index_history.append(previous_ndx_rows.values.tolist())
+            following_stock_inx_index_history.append(following_inx_rows.values.tolist())
+            following_stock_dj_index_history.append(following_dj_rows.values.tolist())
+            following_stock_ixic_index_history.append(following_ixic_rows.values.tolist())
+            following_stock_ndx_index_history.append(following_ndx_rows.values.tolist())
+            # 只返回第一支股票的数据
+            break
+    return  previous_stock_history, following_stock_history, \
+            previous_stock_inx_index_history, previous_stock_dj_index_history, previous_stock_ixic_index_history, previous_stock_ndx_index_history, \
+            following_stock_inx_index_history, following_stock_dj_index_history, following_stock_ixic_index_history, following_stock_ndx_index_history,

us_stock.py CHANGED Viewed

@@ -19,10 +19,10 @@ logging.basicConfig(level=logging.INFO)
 base_dir = os.path.dirname(os.path.abspath(__file__))
 # 构建CSV文件的绝对路径
-nasdaq_100_path = os.path.join(base_dir, '../model/nasdaq100.csv')
-dow_jones_path = os.path.join(base_dir, '../model/dji.csv')
-sp500_path = os.path.join(base_dir, '../model/sp500.csv')
-nasdaq_composite_path = os.path.join(base_dir, '../model/nasdaq_all.csv')
 # 从CSV文件加载成分股数据
 nasdaq_100_stocks = pd.read_csv(nasdaq_100_path)
 dow_jones_stocks = pd.read_csv(dow_jones_path)
@@ -69,7 +69,13 @@ async def fetch_stock_us_spot_data_with_retries_async():
             await asyncio.sleep(wait_time)
             retry_index = min(retry_index + 1, len(retry_intervals) - 1)
-symbols = asyncio.run(fetch_stock_us_spot_data_with_retries_async())
 # 全局变量
@@ -238,58 +244,31 @@ def get_stock_history(symbol, news_date, retries=10):
 # result = get_stock_history('ATMU', '20231218')
 # print(result)
 # 返回个股所属指数历史数据
-def get_stock_index_history(symbol, news_date):
     # 检查股票所属的指数
-    if symbol in nasdaq_100_stocks['Symbol'].values:
         index_code = ".NDX"
         index_data = index_us_stock_index_NDX
-    elif symbol in dow_jones_stocks['Symbol'].values:
         index_code = ".DJI"
         index_data = index_us_stock_index_DJI
-    elif symbol in sp500_stocks['Symbol'].values:
         index_code = ".INX"
         index_data = index_us_stock_index_INX
-    elif symbol in nasdaq_composite_stocks["Symbol"].values or symbol is None or symbol == "":
         index_code = ".IXIC"
         index_data = index_us_stock_index_IXIC
     else:
         index_code = ".IXIC"
         index_data = index_us_stock_index_IXIC
-        # print(f"股票代码 {symbol} 不属于纳斯达克100、道琼斯工业、标准普尔500或纳斯达克综合指数。")
-        # 将 news_date 转换为 datetime 对象
-        news_date_dt = datetime.strptime(news_date, "%Y%m%d")
-        # 计算 start_date 和 end_date
-        start_date = (news_date_dt - timedelta(weeks=2)).strftime("%Y-%m-%d")
-        end_date = (news_date_dt + timedelta(weeks=2)).strftime("%Y-%m-%d")
-        # 构建一个空的 DataFrame，包含指定日期范围的空数据
-        date_range = pd.date_range(start=start_date, end=end_date)
-        stock_hist_df = pd.DataFrame({
-            'date': date_range,
-            'open': 0,
-            'high': 0,
-            'low': 0,
-            'close': 0,
-            'volume': 0,
-            'amount': 0
-        })
-        # 统一列名
-        stock_hist_df = stock_hist_df.rename(columns=column_mapping)
-        stock_hist_df = stock_hist_df.reindex(columns=standard_columns)
-        # 处理个股数据，保留所需列
-        stock_hist_df = reduce_columns(stock_hist_df, standard_columns)
-        return stock_hist_df
     # 将 news_date 转换为 datetime 对象
     news_date_dt = datetime.strptime(news_date, "%Y%m%d")
     # 计算 start_date 和 end_date
-    start_date = (news_date_dt - timedelta(weeks=2)).strftime("%Y-%m-%d")
     end_date = (news_date_dt + timedelta(weeks=2)).strftime("%Y-%m-%d")
     # 确保 index_data['date'] 是 datetime 类型
@@ -311,7 +290,6 @@ def get_stock_index_history(symbol, news_date):
     '''
 def find_stock_codes_or_names(entities):
     """
     从给定的实体列表中检索股票代码或公司名称。

 base_dir = os.path.dirname(os.path.abspath(__file__))
 # 构建CSV文件的绝对路径
+nasdaq_100_path = os.path.join(base_dir, './model/nasdaq100.csv')
+dow_jones_path = os.path.join(base_dir, './model/dji.csv')
+sp500_path = os.path.join(base_dir, './model/sp500.csv')
+nasdaq_composite_path = os.path.join(base_dir, './model/nasdaq_all.csv')
 # 从CSV文件加载成分股数据
 nasdaq_100_stocks = pd.read_csv(nasdaq_100_path)
 dow_jones_stocks = pd.read_csv(dow_jones_path)
             await asyncio.sleep(wait_time)
             retry_index = min(retry_index + 1, len(retry_intervals) - 1)
+symbols = None
+async def fetch_symbols():
+    global symbols
+    # 异步获取数据
+    symbols = await fetch_stock_us_spot_data_with_retries_async()
+    print("Symbols initialized:", symbols)
 # 全局变量
 # result = get_stock_history('ATMU', '20231218')
 # print(result)
 # 返回个股所属指数历史数据
+def get_stock_index_history(symbol, news_date, force_index=0):
     # 检查股票所属的指数
+    if symbol in nasdaq_100_stocks['Symbol'].values or force_index == 1:
         index_code = ".NDX"
         index_data = index_us_stock_index_NDX
+    elif symbol in dow_jones_stocks['Symbol'].values  or force_index == 2:
         index_code = ".DJI"
         index_data = index_us_stock_index_DJI
+    elif symbol in sp500_stocks['Symbol'].values or force_index == 3:
         index_code = ".INX"
         index_data = index_us_stock_index_INX
+    elif symbol in nasdaq_composite_stocks["Symbol"].values or symbol is None or symbol == "" or force_index == 4:
         index_code = ".IXIC"
         index_data = index_us_stock_index_IXIC
     else:
+        # print(f"股票代码 {symbol} 不属于纳斯达克100、道琼斯工业、标准普尔500或纳斯达克综合指数。")
         index_code = ".IXIC"
         index_data = index_us_stock_index_IXIC
     # 将 news_date 转换为 datetime 对象
     news_date_dt = datetime.strptime(news_date, "%Y%m%d")
     # 计算 start_date 和 end_date
+    start_date = (news_date_dt - timedelta(weeks=8)).strftime("%Y-%m-%d")
     end_date = (news_date_dt + timedelta(weeks=2)).strftime("%Y-%m-%d")
     # 确保 index_data['date'] 是 datetime 类型
     '''
 def find_stock_codes_or_names(entities):
     """
     从给定的实体列表中检索股票代码或公司名称。