Spaces:
Running
Running
import streamlit as st | |
from datetime import datetime, timedelta | |
import yfinance as yf | |
import numpy as np | |
import pandas as pd | |
from sklearn.decomposition import PCA | |
from sklearn.preprocessing import StandardScaler, LabelEncoder | |
from sklearn.ensemble import RandomForestClassifier | |
import shap | |
import matplotlib.pyplot as plt | |
class DataFetcher: | |
"""Fetches historical financial data using yfinance.""" | |
def __init__(self, ticker, nb_days): | |
self.ticker = ticker | |
self.nb_days = nb_days | |
self.data = None | |
def fetch_data(self): | |
"""Fetches historical data for the specified ticker and number of days.""" | |
end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) | |
start_date = end_date - timedelta(days=self.nb_days) | |
end_date = end_date + timedelta(days=1) | |
self.data = yf.download(self.ticker, start=start_date, end=end_date, interval="1h") | |
return self.data | |
class FinancialDataProcessor: | |
"""Processes financial data to calculate returns, scenarios, and probabilities.""" | |
def __init__(self, data): | |
self.data = data.copy() | |
def _flatten_columns(self): | |
"""Flattens MultiIndex columns into a single level.""" | |
if isinstance(self.data.columns, pd.MultiIndex): | |
self.data.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in self.data.columns] | |
def calculate_returns(self): | |
"""Calculates logarithmic returns, scenarios, and adjusted returns.""" | |
self._flatten_columns() | |
close_column = [col for col in self.data.columns if 'Close' in col] | |
if not close_column: | |
raise ValueError("The 'Close' column is missing in the dataset.") | |
self.data.rename(columns={close_column[0]: 'Close'}, inplace=True) | |
self.data = self.data[self.data['Close'] > 0].copy() | |
self.data['LogReturn'] = np.log(self.data['Close'] / self.data['Close'].shift(1)) | |
self.data.replace([np.inf, -np.inf], np.nan, inplace=True) | |
self.data.dropna(subset=['LogReturn'], inplace=True) | |
self.data['Scenario'] = np.where(self.data['LogReturn'] > 0, 'Buy', 'Sell') | |
self.data['AdjustedLogReturn'] = np.where( | |
self.data['Scenario'] == 'Sell', -self.data['LogReturn'], self.data['LogReturn'] | |
) | |
self.data['AnnualizedReturn'] = self.data['AdjustedLogReturn'] * 252 | |
return self | |
def calculate_probabilities(self): | |
"""Calculates Buy% and Sell% using hyperbolic tangent.""" | |
self.data['Buy%'] = (1 + np.tanh(self.data['LogReturn'])) / 2 | |
self.data['Sell%'] = (1 - np.tanh(self.data['LogReturn'])) / 2 | |
return self.data | |
def apply_pca_calculations(self, pca_result): | |
"""Applies PCA-based calculations to the data.""" | |
pca_result = pca_result[pca_result['PC1'] > 0].copy() | |
pca_result['PCA_LogReturn'] = np.log(pca_result['PC1'] / pca_result['PC1'].shift(1)) | |
pca_result.replace([np.inf, -np.inf], np.nan, inplace=True) | |
pca_result.dropna(subset=['PCA_LogReturn'], inplace=True) | |
pca_result['PCA_Scenario'] = np.where(pca_result['PCA_LogReturn'] > 0, 'Buy', 'Sell') | |
pca_result['PCA_Buy%'] = (1 + np.tanh(pca_result['PCA_LogReturn'])) / 2 | |
pca_result['PCA_Sell%'] = (1 - np.tanh(pca_result['PCA_LogReturn'])) / 2 | |
self.data = self.data.merge(pca_result, left_index=True, right_index=True) | |
return self.data | |
class PCATransformer: | |
"""Applies PCA to reduce dimensionality and extract features.""" | |
def __init__(self, n_components=1): | |
self.n_components = n_components | |
self.scaler = StandardScaler() | |
self.pca = PCA(n_components=n_components) | |
def fit_transform(self, data): | |
numeric_data = data.select_dtypes(include=[np.number]) | |
scaled_data = self.scaler.fit_transform(numeric_data) | |
pca_result = self.pca.fit_transform(scaled_data) | |
return pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(self.n_components)], index=data.index) | |
class StrategyBuilder: | |
"""Builds and refines the trading strategy using machine learning and SHAP.""" | |
def __init__(self, data): | |
self.data = data.copy() | |
def train_model(self, target_column): | |
X = self.data.select_dtypes(include=[np.number]) | |
y = self.data[target_column] | |
y_encoded = LabelEncoder().fit_transform(y) | |
model = RandomForestClassifier(n_estimators=100, random_state=42) | |
model.fit(X, y_encoded) | |
return model, X, y_encoded | |
def compute_shapley_values(self, model, X): | |
explainer = shap.TreeExplainer(model) | |
return explainer.shap_values(X) | |
def analyze_feature_importance(self, shap_values, feature_names): | |
"""Analyzes feature importance based on SHAP values.""" | |
if isinstance(shap_values, list): | |
shap_values = shap_values[1] | |
if len(shap_values.shape) == 3: | |
shap_values = shap_values[:, :, 1] | |
mean_abs_shap = np.mean(np.abs(shap_values), axis=0) | |
if len(mean_abs_shap) != len(feature_names): | |
raise ValueError("Mismatch between SHAP values and feature names.") | |
feature_importance = pd.DataFrame({ | |
'Feature': feature_names, | |
'Mean_Abs_SHAP': mean_abs_shap | |
}).sort_values(by='Mean_Abs_SHAP', ascending=False) | |
return feature_importance | |
def refine_thresholds(self, feature_importance, buy_threshold=0.5, sell_threshold=0.5): | |
top_features = feature_importance.head(3)['Feature'].tolist() | |
for feature in top_features: | |
if 'Buy%' in feature or 'PCA_Buy%' in feature: | |
buy_threshold *= 1.1 | |
elif 'Sell%' in feature or 'PCA_Sell%' in feature: | |
sell_threshold *= 1.1 | |
return buy_threshold, sell_threshold | |
class Backtester: | |
"""Backtests the trading strategy on historical data.""" | |
def __init__(self, data): | |
self.data = data.copy() | |
def backtest(self, buy_threshold=0.5, sell_threshold=0.5): | |
portfolio_value = 10000 | |
position = None | |
entry_price = None | |
portfolio_values = [] | |
for i in range(1, len(self.data)): | |
last_row = self.data.iloc[i] | |
if (last_row['PCA_Scenario'] == 'Buy' and last_row['PCA_Buy%'] > buy_threshold) or \ | |
(last_row['Scenario'] == 'Buy' and last_row['Buy%'] > buy_threshold): | |
if position != 'Buy': | |
position = 'Buy' | |
entry_price = last_row['Close'] | |
elif (last_row['PCA_Scenario'] == 'Sell' and last_row['PCA_Sell%'] > sell_threshold) or \ | |
(last_row['Scenario'] == 'Sell' and last_row['Sell%'] > sell_threshold): | |
if position != 'Sell': | |
position = 'Sell' | |
entry_price = last_row['Close'] | |
if position == 'Buy': | |
portfolio_value *= (last_row['Close'] / entry_price) | |
elif position == 'Sell': | |
portfolio_value *= (entry_price / last_row['Close']) | |
portfolio_values.append(portfolio_value) | |
return portfolio_values, position, entry_price | |
def run_analysis(): | |
"""Runs the complete trading analysis.""" | |
try: | |
fetcher = DataFetcher(ticker="BTC-USD", nb_days=50) | |
data = fetcher.fetch_data() | |
processor = FinancialDataProcessor(data) | |
processed_data = processor.calculate_returns().calculate_probabilities() | |
pca_transformer = PCATransformer(n_components=1) | |
pca_result = pca_transformer.fit_transform(processed_data) | |
processed_data = processor.apply_pca_calculations(pca_result) | |
strategy_builder = StrategyBuilder(processed_data) | |
model, X, y_encoded = strategy_builder.train_model(target_column='PCA_Scenario') | |
shap_values = strategy_builder.compute_shapley_values(model, X) | |
feature_importance = strategy_builder.analyze_feature_importance(shap_values, X.columns) | |
buy_threshold, sell_threshold = strategy_builder.refine_thresholds(feature_importance) | |
backtester = Backtester(processed_data) | |
portfolio_values, final_position, entry_price = backtester.backtest(buy_threshold, sell_threshold) | |
last_row = processed_data.iloc[-1] | |
# Display results | |
col1, col2 = st.columns(2) | |
with col1: | |
st.subheader("Current Position") | |
st.metric("Position", final_position or "No position") | |
if final_position: | |
st.metric("Entry Price", f"${entry_price:.2f}") | |
with col2: | |
st.subheader("Decision Metrics") | |
st.metric("Buy%", f"{last_row['PCA_Buy%']:.4f}") | |
st.metric("Sell%", f"{last_row['PCA_Sell%']:.4f}") | |
return True | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)}") | |
return False | |
# Page configuration | |
st.set_page_config(page_title="BTC-USD Trading Bot", layout="wide") | |
st.title("BTC-USD Trading Bot") | |
# Run analysis automatically on page load | |
run_analysis() | |
# Educational Slides Section | |
st.header("Educational Slides") | |
st.markdown('🎧Listen to the Audio🎧') | |
st.markdown( | |
""" | |
<iframe src="https://drive.google.com/file/d/1oZdaiyrNYJ4v3LZ3lEurMSG3n784wiOw/preview" width="640" height="480"></iframe> | |
""", unsafe_allow_html=True | |
) | |
st.markdown('Read the [White Paper](https://huggingface.co./spaces/earnliners/dow-usd/resolve/main/QPTFen.pdf).') | |
st.markdown('Powered by [Forecast Trade Group](https://huggingface.co./TradeAdmin).') | |
with st.expander("Slide 1: Introduction", expanded=False): | |
st.subheader("Real-Time Crypto Trading Bot Using Machine Learning and PCA") | |
st.markdown(""" | |
**Subtitle:** Leveraging Financial Data Analysis for Optimal Trading Decisions | |
**Overview:** | |
This system integrates real-time financial data, machine learning, and principal component analysis (PCA) | |
to automate trading decisions in cryptocurrency markets. | |
""") | |
with st.expander("Slide 2: Objective", expanded=False): | |
st.subheader("Main Goals") | |
st.markdown(""" | |
- Develop an automated trading system that optimizes buy and sell decisions based on historical financial data | |
- Use machine learning models, specifically Random Forest, to predict market movements | |
- Implement Principal Component Analysis (PCA) for dimensionality reduction and feature extraction | |
- Backtest the system and evaluate portfolio performance | |
""") | |
with st.expander("Slide 3: Key Concepts", expanded=False): | |
st.subheader("Core Technologies and Methods") | |
st.markdown(""" | |
- **Logarithmic Return:** A continuous-time return calculation used to model price changes in markets | |
- **Principal Component Analysis (PCA):** A dimensionality reduction technique to extract meaningful features | |
- **Machine Learning:** Using Random Forest to classify "Buy" and "Sell" scenarios | |
- **SHAP Values:** A method to interpret model outputs through feature contribution analysis | |
""") | |
with st.expander("Slide 4: Data Collection and Preprocessing", expanded=False): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("### Data Fetching") | |
st.markdown(""" | |
- **Source:** Yahoo Finance (yfinance) | |
- **Data:** Historical cryptocurrency data | |
- **Interval:** Hourly data points | |
- **Period:** Last 50 days | |
""") | |
with col2: | |
st.markdown("### Preprocessing") | |
st.markdown(""" | |
- Calculate logarithmic returns | |
- Classify scenarios (Buy/Sell) | |
- Adjust returns for sell scenarios | |
- Handle missing values and outliers | |
""") | |
with st.expander("Slide 5: Principal Component Analysis (PCA)", expanded=False): | |
st.subheader("PCA Process") | |
st.markdown(""" | |
1. **Data Standardization:** | |
``` | |
X' = (X - μ) / σ | |
``` | |
2. **Covariance Matrix:** | |
``` | |
Cov(X) = 1/(n-1) Σ(Xi - μ)(Xi - μ)ᵀ | |
``` | |
3. **Eigenvalue Decomposition:** | |
- Find principal components | |
- Sort by variance explained | |
4. **Dimensionality Reduction:** | |
- Transform data to lower dimensions | |
- Preserve important features | |
""") | |
with st.expander("Slide 6: Machine Learning Strategy", expanded=False): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("### Model Architecture") | |
st.markdown(""" | |
- Random Forest Classifier | |
- Feature selection from PCA | |
- Binary classification (Buy/Sell) | |
""") | |
with col2: | |
st.markdown("### Training Process") | |
st.markdown(""" | |
- Cross-validation | |
- Hyperparameter tuning | |
- Performance metrics | |
- Model evaluation | |
""") | |
with st.expander("Slide 7: SHAP Analysis", expanded=False): | |
st.subheader("Shapley Additive Explanations") | |
st.markdown(""" | |
**SHAP Value Formula:** | |
``` | |
φᵢ(f) = 1/|N|! Σ [f(S ∪ {i}) - f(S)] | |
``` | |
**Key Components:** | |
- Feature importance ranking | |
- Individual prediction explanations | |
- Global model interpretation | |
""") | |
with st.expander("Slide 8: Strategy Refinement", expanded=False): | |
st.subheader("Dynamic Strategy Adjustment") | |
st.markdown(""" | |
1. **Threshold Refinement:** | |
- Use SHAP values to identify key features | |
- Adjust thresholds based on importance | |
2. **Signal Processing:** | |
- Buy signal strengthening | |
- Sell signal validation | |
- Risk management integration | |
""") | |
with st.expander("Slide 9: Backtesting Framework", expanded=False): | |
st.markdown(""" | |
### Portfolio Value Calculation | |
``` | |
Portfolio Valueₜ₊₁ = Portfolio Valueₜ × (Pₜ₊₁/Pₜ) | |
``` | |
### Trading Logic | |
- Buy when probability > threshold | |
- Sell when probability > threshold | |
- Position management | |
### Performance Metrics | |
- Total Return | |
- Risk Metrics | |
- Sharpe Ratio | |
""") | |
with st.expander("Slide 10: Real-Time Interface", expanded=False): | |
st.subheader("Streamlit Dashboard Features") | |
st.markdown(""" | |
- Live trading signals | |
- Portfolio performance tracking | |
- Position monitoring | |
- Automatic updates | |
- Historical performance | |
""") | |
with st.expander("Slide 11: Summary", expanded=False): | |
st.markdown(""" | |
### Key System Components | |
- Automated trading system | |
- ML & PCA integration | |
- SHAP-based interpretation | |
- Real-time analytics | |
- Performance tracking | |
""") | |
with st.expander("Slide 12: Future Improvements", expanded=False): | |
st.markdown(""" | |
### Planned Enhancements | |
1. Additional data sources integration | |
2. Advanced optimization techniques | |
3. Live trading deployment | |
4. Enhanced risk management | |
5. Portfolio diversification | |
""") | |
with st.expander("Slide 13: Q&A", expanded=False): | |
st.markdown(""" | |
### Questions & Discussion | |
Thank you for exploring our trading system! For questions or suggestions: | |
- System architecture | |
- Implementation details | |
- Performance metrics | |
- Future developments | |
""") | |
st.markdown("---") | |
st.markdown("### Enjoying the Content?") | |
st.markdown(""" | |
If you find our work useful and interesting, please consider supporting us for **free** on Publish0x! | |
It's quick, easy, and no cost to you. Just follow this link to show your support: | |
[**Like and Tip Us for Free on Publish0x!**](https://www.publish0x.com/start/crypto-trading-mathematical-modeling-and-strategic-optimizat-xkelryw) | |
""") | |
# Footer | |
st.markdown("---") | |
st.markdown("*Support Development & Info : ") | |
st.markdown("Send your email in comment to your btc donation at ") | |
st.markdown("1P9R71C6JYJxrPVEzMz4K3hoHYGRW39A9A") | |
st.markdown('Join our [Community on Huggingface](https://huggingface.co./TradeAdmin) for more.') | |
st.markdown('All our bots are [tested on HTX](https://www.htx.com/invite/en-us/1f?invite_code=awhd9223).') | |
# Educational Slides Section dona |