Spaces:
Running
Running
File size: 16,075 Bytes
b0f930e 81fa929 b0f930e 1d8c17c eb44f1d 2547af9 eb44f1d 2547af9 eb44f1d b0f930e eb44f1d b0f930e eb44f1d b0f930e 2547af9 eb44f1d b0f930e 2547af9 eb44f1d 2547af9 81fa929 2547af9 eb44f1d 2547af9 eb44f1d b0f930e 2547af9 b0f930e 2547af9 f4ca46a b0f930e 2547af9 e13201e 2547af9 eb44f1d b0f930e 2547af9 37386aa b0f930e 81fa929 37386aa 2547af9 37386aa fd3aa06 37386aa d4c67b3 1a956e7 37386aa 2547af9 ea29c72 2547af9 d5e61b7 7ffffdd d5e61b7 2547af9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 |
import streamlit as st
from datetime import datetime, timedelta
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import shap
import matplotlib.pyplot as plt
class DataFetcher:
"""Fetches historical financial data using yfinance."""
def __init__(self, ticker, nb_days):
self.ticker = ticker
self.nb_days = nb_days
self.data = None
def fetch_data(self):
"""Fetches historical data for the specified ticker and number of days."""
end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
start_date = end_date - timedelta(days=self.nb_days)
end_date = end_date + timedelta(days=1)
self.data = yf.download(self.ticker, start=start_date, end=end_date, interval="1h")
return self.data
class FinancialDataProcessor:
"""Processes financial data to calculate returns, scenarios, and probabilities."""
def __init__(self, data):
self.data = data.copy()
def _flatten_columns(self):
"""Flattens MultiIndex columns into a single level."""
if isinstance(self.data.columns, pd.MultiIndex):
self.data.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in self.data.columns]
def calculate_returns(self):
"""Calculates logarithmic returns, scenarios, and adjusted returns."""
self._flatten_columns()
close_column = [col for col in self.data.columns if 'Close' in col]
if not close_column:
raise ValueError("The 'Close' column is missing in the dataset.")
self.data.rename(columns={close_column[0]: 'Close'}, inplace=True)
self.data = self.data[self.data['Close'] > 0].copy()
self.data['LogReturn'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
self.data.replace([np.inf, -np.inf], np.nan, inplace=True)
self.data.dropna(subset=['LogReturn'], inplace=True)
self.data['Scenario'] = np.where(self.data['LogReturn'] > 0, 'Buy', 'Sell')
self.data['AdjustedLogReturn'] = np.where(
self.data['Scenario'] == 'Sell', -self.data['LogReturn'], self.data['LogReturn']
)
self.data['AnnualizedReturn'] = self.data['AdjustedLogReturn'] * 252
return self
def calculate_probabilities(self):
"""Calculates Buy% and Sell% using hyperbolic tangent."""
self.data['Buy%'] = (1 + np.tanh(self.data['LogReturn'])) / 2
self.data['Sell%'] = (1 - np.tanh(self.data['LogReturn'])) / 2
return self.data
def apply_pca_calculations(self, pca_result):
"""Applies PCA-based calculations to the data."""
pca_result = pca_result[pca_result['PC1'] > 0].copy()
pca_result['PCA_LogReturn'] = np.log(pca_result['PC1'] / pca_result['PC1'].shift(1))
pca_result.replace([np.inf, -np.inf], np.nan, inplace=True)
pca_result.dropna(subset=['PCA_LogReturn'], inplace=True)
pca_result['PCA_Scenario'] = np.where(pca_result['PCA_LogReturn'] > 0, 'Buy', 'Sell')
pca_result['PCA_Buy%'] = (1 + np.tanh(pca_result['PCA_LogReturn'])) / 2
pca_result['PCA_Sell%'] = (1 - np.tanh(pca_result['PCA_LogReturn'])) / 2
self.data = self.data.merge(pca_result, left_index=True, right_index=True)
return self.data
class PCATransformer:
"""Applies PCA to reduce dimensionality and extract features."""
def __init__(self, n_components=1):
self.n_components = n_components
self.scaler = StandardScaler()
self.pca = PCA(n_components=n_components)
def fit_transform(self, data):
numeric_data = data.select_dtypes(include=[np.number])
scaled_data = self.scaler.fit_transform(numeric_data)
pca_result = self.pca.fit_transform(scaled_data)
return pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(self.n_components)], index=data.index)
class StrategyBuilder:
"""Builds and refines the trading strategy using machine learning and SHAP."""
def __init__(self, data):
self.data = data.copy()
def train_model(self, target_column):
X = self.data.select_dtypes(include=[np.number])
y = self.data[target_column]
y_encoded = LabelEncoder().fit_transform(y)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y_encoded)
return model, X, y_encoded
def compute_shapley_values(self, model, X):
explainer = shap.TreeExplainer(model)
return explainer.shap_values(X)
def analyze_feature_importance(self, shap_values, feature_names):
"""Analyzes feature importance based on SHAP values."""
if isinstance(shap_values, list):
shap_values = shap_values[1]
if len(shap_values.shape) == 3:
shap_values = shap_values[:, :, 1]
mean_abs_shap = np.mean(np.abs(shap_values), axis=0)
if len(mean_abs_shap) != len(feature_names):
raise ValueError("Mismatch between SHAP values and feature names.")
feature_importance = pd.DataFrame({
'Feature': feature_names,
'Mean_Abs_SHAP': mean_abs_shap
}).sort_values(by='Mean_Abs_SHAP', ascending=False)
return feature_importance
def refine_thresholds(self, feature_importance, buy_threshold=0.5, sell_threshold=0.5):
top_features = feature_importance.head(3)['Feature'].tolist()
for feature in top_features:
if 'Buy%' in feature or 'PCA_Buy%' in feature:
buy_threshold *= 1.1
elif 'Sell%' in feature or 'PCA_Sell%' in feature:
sell_threshold *= 1.1
return buy_threshold, sell_threshold
class Backtester:
"""Backtests the trading strategy on historical data."""
def __init__(self, data):
self.data = data.copy()
def backtest(self, buy_threshold=0.5, sell_threshold=0.5):
portfolio_value = 10000
position = None
entry_price = None
portfolio_values = []
for i in range(1, len(self.data)):
last_row = self.data.iloc[i]
if (last_row['PCA_Scenario'] == 'Buy' and last_row['PCA_Buy%'] > buy_threshold) or \
(last_row['Scenario'] == 'Buy' and last_row['Buy%'] > buy_threshold):
if position != 'Buy':
position = 'Buy'
entry_price = last_row['Close']
elif (last_row['PCA_Scenario'] == 'Sell' and last_row['PCA_Sell%'] > sell_threshold) or \
(last_row['Scenario'] == 'Sell' and last_row['Sell%'] > sell_threshold):
if position != 'Sell':
position = 'Sell'
entry_price = last_row['Close']
if position == 'Buy':
portfolio_value *= (last_row['Close'] / entry_price)
elif position == 'Sell':
portfolio_value *= (entry_price / last_row['Close'])
portfolio_values.append(portfolio_value)
return portfolio_values, position, entry_price
def run_analysis():
"""Runs the complete trading analysis."""
try:
fetcher = DataFetcher(ticker="BTC-USD", nb_days=50)
data = fetcher.fetch_data()
processor = FinancialDataProcessor(data)
processed_data = processor.calculate_returns().calculate_probabilities()
pca_transformer = PCATransformer(n_components=1)
pca_result = pca_transformer.fit_transform(processed_data)
processed_data = processor.apply_pca_calculations(pca_result)
strategy_builder = StrategyBuilder(processed_data)
model, X, y_encoded = strategy_builder.train_model(target_column='PCA_Scenario')
shap_values = strategy_builder.compute_shapley_values(model, X)
feature_importance = strategy_builder.analyze_feature_importance(shap_values, X.columns)
buy_threshold, sell_threshold = strategy_builder.refine_thresholds(feature_importance)
backtester = Backtester(processed_data)
portfolio_values, final_position, entry_price = backtester.backtest(buy_threshold, sell_threshold)
last_row = processed_data.iloc[-1]
# Display results
col1, col2 = st.columns(2)
with col1:
st.subheader("Current Position")
st.metric("Position", final_position or "No position")
if final_position:
st.metric("Entry Price", f"${entry_price:.2f}")
with col2:
st.subheader("Decision Metrics")
st.metric("Buy%", f"{last_row['PCA_Buy%']:.4f}")
st.metric("Sell%", f"{last_row['PCA_Sell%']:.4f}")
return True
except Exception as e:
st.error(f"An error occurred: {str(e)}")
return False
# Page configuration
st.set_page_config(page_title="BTC-USD Trading Bot", layout="wide")
st.title("BTC-USD Trading Bot")
# Run analysis automatically on page load
run_analysis()
# Educational Slides Section
st.header("Educational Slides")
st.markdown('🎧Listen to the Audio🎧')
st.markdown(
"""
<iframe src="https://drive.google.com/file/d/1oZdaiyrNYJ4v3LZ3lEurMSG3n784wiOw/preview" width="640" height="480"></iframe>
""", unsafe_allow_html=True
)
st.markdown('Read the [White Paper](https://huggingface.co./spaces/earnliners/dow-usd/resolve/main/QPTFen.pdf).')
st.markdown('Powered by [Forecast Trade Group](https://huggingface.co./TradeAdmin).')
with st.expander("Slide 1: Introduction", expanded=False):
st.subheader("Real-Time Crypto Trading Bot Using Machine Learning and PCA")
st.markdown("""
**Subtitle:** Leveraging Financial Data Analysis for Optimal Trading Decisions
**Overview:**
This system integrates real-time financial data, machine learning, and principal component analysis (PCA)
to automate trading decisions in cryptocurrency markets.
""")
with st.expander("Slide 2: Objective", expanded=False):
st.subheader("Main Goals")
st.markdown("""
- Develop an automated trading system that optimizes buy and sell decisions based on historical financial data
- Use machine learning models, specifically Random Forest, to predict market movements
- Implement Principal Component Analysis (PCA) for dimensionality reduction and feature extraction
- Backtest the system and evaluate portfolio performance
""")
with st.expander("Slide 3: Key Concepts", expanded=False):
st.subheader("Core Technologies and Methods")
st.markdown("""
- **Logarithmic Return:** A continuous-time return calculation used to model price changes in markets
- **Principal Component Analysis (PCA):** A dimensionality reduction technique to extract meaningful features
- **Machine Learning:** Using Random Forest to classify "Buy" and "Sell" scenarios
- **SHAP Values:** A method to interpret model outputs through feature contribution analysis
""")
with st.expander("Slide 4: Data Collection and Preprocessing", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.markdown("### Data Fetching")
st.markdown("""
- **Source:** Yahoo Finance (yfinance)
- **Data:** Historical cryptocurrency data
- **Interval:** Hourly data points
- **Period:** Last 50 days
""")
with col2:
st.markdown("### Preprocessing")
st.markdown("""
- Calculate logarithmic returns
- Classify scenarios (Buy/Sell)
- Adjust returns for sell scenarios
- Handle missing values and outliers
""")
with st.expander("Slide 5: Principal Component Analysis (PCA)", expanded=False):
st.subheader("PCA Process")
st.markdown("""
1. **Data Standardization:**
```
X' = (X - μ) / σ
```
2. **Covariance Matrix:**
```
Cov(X) = 1/(n-1) Σ(Xi - μ)(Xi - μ)ᵀ
```
3. **Eigenvalue Decomposition:**
- Find principal components
- Sort by variance explained
4. **Dimensionality Reduction:**
- Transform data to lower dimensions
- Preserve important features
""")
with st.expander("Slide 6: Machine Learning Strategy", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.markdown("### Model Architecture")
st.markdown("""
- Random Forest Classifier
- Feature selection from PCA
- Binary classification (Buy/Sell)
""")
with col2:
st.markdown("### Training Process")
st.markdown("""
- Cross-validation
- Hyperparameter tuning
- Performance metrics
- Model evaluation
""")
with st.expander("Slide 7: SHAP Analysis", expanded=False):
st.subheader("Shapley Additive Explanations")
st.markdown("""
**SHAP Value Formula:**
```
φᵢ(f) = 1/|N|! Σ [f(S ∪ {i}) - f(S)]
```
**Key Components:**
- Feature importance ranking
- Individual prediction explanations
- Global model interpretation
""")
with st.expander("Slide 8: Strategy Refinement", expanded=False):
st.subheader("Dynamic Strategy Adjustment")
st.markdown("""
1. **Threshold Refinement:**
- Use SHAP values to identify key features
- Adjust thresholds based on importance
2. **Signal Processing:**
- Buy signal strengthening
- Sell signal validation
- Risk management integration
""")
with st.expander("Slide 9: Backtesting Framework", expanded=False):
st.markdown("""
### Portfolio Value Calculation
```
Portfolio Valueₜ₊₁ = Portfolio Valueₜ × (Pₜ₊₁/Pₜ)
```
### Trading Logic
- Buy when probability > threshold
- Sell when probability > threshold
- Position management
### Performance Metrics
- Total Return
- Risk Metrics
- Sharpe Ratio
""")
with st.expander("Slide 10: Real-Time Interface", expanded=False):
st.subheader("Streamlit Dashboard Features")
st.markdown("""
- Live trading signals
- Portfolio performance tracking
- Position monitoring
- Automatic updates
- Historical performance
""")
with st.expander("Slide 11: Summary", expanded=False):
st.markdown("""
### Key System Components
- Automated trading system
- ML & PCA integration
- SHAP-based interpretation
- Real-time analytics
- Performance tracking
""")
with st.expander("Slide 12: Future Improvements", expanded=False):
st.markdown("""
### Planned Enhancements
1. Additional data sources integration
2. Advanced optimization techniques
3. Live trading deployment
4. Enhanced risk management
5. Portfolio diversification
""")
with st.expander("Slide 13: Q&A", expanded=False):
st.markdown("""
### Questions & Discussion
Thank you for exploring our trading system! For questions or suggestions:
- System architecture
- Implementation details
- Performance metrics
- Future developments
""")
st.markdown("---")
st.markdown("### Enjoying the Content?")
st.markdown("""
If you find our work useful and interesting, please consider supporting us for **free** on Publish0x!
It's quick, easy, and no cost to you. Just follow this link to show your support:
[**Like and Tip Us for Free on Publish0x!**](https://www.publish0x.com/start/crypto-trading-mathematical-modeling-and-strategic-optimizat-xkelryw)
""")
# Footer
st.markdown("---")
st.markdown("*Support Development & Info : ")
st.markdown("Send your email in comment to your btc donation at ")
st.markdown("1P9R71C6JYJxrPVEzMz4K3hoHYGRW39A9A")
st.markdown('Join our [Community on Huggingface](https://huggingface.co./TradeAdmin) for more.')
st.markdown('All our bots are [tested on HTX](https://www.htx.com/invite/en-us/1f?invite_code=awhd9223).')
# Educational Slides Section dona |