btc-usd / app.py
earnliners's picture
Update app.py
fd3aa06 verified
raw
history blame
16.1 kB
import streamlit as st
from datetime import datetime, timedelta
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import shap
import matplotlib.pyplot as plt
class DataFetcher:
"""Fetches historical financial data using yfinance."""
def __init__(self, ticker, nb_days):
self.ticker = ticker
self.nb_days = nb_days
self.data = None
def fetch_data(self):
"""Fetches historical data for the specified ticker and number of days."""
end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
start_date = end_date - timedelta(days=self.nb_days)
end_date = end_date + timedelta(days=1)
self.data = yf.download(self.ticker, start=start_date, end=end_date, interval="1h")
return self.data
class FinancialDataProcessor:
"""Processes financial data to calculate returns, scenarios, and probabilities."""
def __init__(self, data):
self.data = data.copy()
def _flatten_columns(self):
"""Flattens MultiIndex columns into a single level."""
if isinstance(self.data.columns, pd.MultiIndex):
self.data.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in self.data.columns]
def calculate_returns(self):
"""Calculates logarithmic returns, scenarios, and adjusted returns."""
self._flatten_columns()
close_column = [col for col in self.data.columns if 'Close' in col]
if not close_column:
raise ValueError("The 'Close' column is missing in the dataset.")
self.data.rename(columns={close_column[0]: 'Close'}, inplace=True)
self.data = self.data[self.data['Close'] > 0].copy()
self.data['LogReturn'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
self.data.replace([np.inf, -np.inf], np.nan, inplace=True)
self.data.dropna(subset=['LogReturn'], inplace=True)
self.data['Scenario'] = np.where(self.data['LogReturn'] > 0, 'Buy', 'Sell')
self.data['AdjustedLogReturn'] = np.where(
self.data['Scenario'] == 'Sell', -self.data['LogReturn'], self.data['LogReturn']
)
self.data['AnnualizedReturn'] = self.data['AdjustedLogReturn'] * 252
return self
def calculate_probabilities(self):
"""Calculates Buy% and Sell% using hyperbolic tangent."""
self.data['Buy%'] = (1 + np.tanh(self.data['LogReturn'])) / 2
self.data['Sell%'] = (1 - np.tanh(self.data['LogReturn'])) / 2
return self.data
def apply_pca_calculations(self, pca_result):
"""Applies PCA-based calculations to the data."""
pca_result = pca_result[pca_result['PC1'] > 0].copy()
pca_result['PCA_LogReturn'] = np.log(pca_result['PC1'] / pca_result['PC1'].shift(1))
pca_result.replace([np.inf, -np.inf], np.nan, inplace=True)
pca_result.dropna(subset=['PCA_LogReturn'], inplace=True)
pca_result['PCA_Scenario'] = np.where(pca_result['PCA_LogReturn'] > 0, 'Buy', 'Sell')
pca_result['PCA_Buy%'] = (1 + np.tanh(pca_result['PCA_LogReturn'])) / 2
pca_result['PCA_Sell%'] = (1 - np.tanh(pca_result['PCA_LogReturn'])) / 2
self.data = self.data.merge(pca_result, left_index=True, right_index=True)
return self.data
class PCATransformer:
"""Applies PCA to reduce dimensionality and extract features."""
def __init__(self, n_components=1):
self.n_components = n_components
self.scaler = StandardScaler()
self.pca = PCA(n_components=n_components)
def fit_transform(self, data):
numeric_data = data.select_dtypes(include=[np.number])
scaled_data = self.scaler.fit_transform(numeric_data)
pca_result = self.pca.fit_transform(scaled_data)
return pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(self.n_components)], index=data.index)
class StrategyBuilder:
"""Builds and refines the trading strategy using machine learning and SHAP."""
def __init__(self, data):
self.data = data.copy()
def train_model(self, target_column):
X = self.data.select_dtypes(include=[np.number])
y = self.data[target_column]
y_encoded = LabelEncoder().fit_transform(y)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y_encoded)
return model, X, y_encoded
def compute_shapley_values(self, model, X):
explainer = shap.TreeExplainer(model)
return explainer.shap_values(X)
def analyze_feature_importance(self, shap_values, feature_names):
"""Analyzes feature importance based on SHAP values."""
if isinstance(shap_values, list):
shap_values = shap_values[1]
if len(shap_values.shape) == 3:
shap_values = shap_values[:, :, 1]
mean_abs_shap = np.mean(np.abs(shap_values), axis=0)
if len(mean_abs_shap) != len(feature_names):
raise ValueError("Mismatch between SHAP values and feature names.")
feature_importance = pd.DataFrame({
'Feature': feature_names,
'Mean_Abs_SHAP': mean_abs_shap
}).sort_values(by='Mean_Abs_SHAP', ascending=False)
return feature_importance
def refine_thresholds(self, feature_importance, buy_threshold=0.5, sell_threshold=0.5):
top_features = feature_importance.head(3)['Feature'].tolist()
for feature in top_features:
if 'Buy%' in feature or 'PCA_Buy%' in feature:
buy_threshold *= 1.1
elif 'Sell%' in feature or 'PCA_Sell%' in feature:
sell_threshold *= 1.1
return buy_threshold, sell_threshold
class Backtester:
"""Backtests the trading strategy on historical data."""
def __init__(self, data):
self.data = data.copy()
def backtest(self, buy_threshold=0.5, sell_threshold=0.5):
portfolio_value = 10000
position = None
entry_price = None
portfolio_values = []
for i in range(1, len(self.data)):
last_row = self.data.iloc[i]
if (last_row['PCA_Scenario'] == 'Buy' and last_row['PCA_Buy%'] > buy_threshold) or \
(last_row['Scenario'] == 'Buy' and last_row['Buy%'] > buy_threshold):
if position != 'Buy':
position = 'Buy'
entry_price = last_row['Close']
elif (last_row['PCA_Scenario'] == 'Sell' and last_row['PCA_Sell%'] > sell_threshold) or \
(last_row['Scenario'] == 'Sell' and last_row['Sell%'] > sell_threshold):
if position != 'Sell':
position = 'Sell'
entry_price = last_row['Close']
if position == 'Buy':
portfolio_value *= (last_row['Close'] / entry_price)
elif position == 'Sell':
portfolio_value *= (entry_price / last_row['Close'])
portfolio_values.append(portfolio_value)
return portfolio_values, position, entry_price
def run_analysis():
"""Runs the complete trading analysis."""
try:
fetcher = DataFetcher(ticker="BTC-USD", nb_days=50)
data = fetcher.fetch_data()
processor = FinancialDataProcessor(data)
processed_data = processor.calculate_returns().calculate_probabilities()
pca_transformer = PCATransformer(n_components=1)
pca_result = pca_transformer.fit_transform(processed_data)
processed_data = processor.apply_pca_calculations(pca_result)
strategy_builder = StrategyBuilder(processed_data)
model, X, y_encoded = strategy_builder.train_model(target_column='PCA_Scenario')
shap_values = strategy_builder.compute_shapley_values(model, X)
feature_importance = strategy_builder.analyze_feature_importance(shap_values, X.columns)
buy_threshold, sell_threshold = strategy_builder.refine_thresholds(feature_importance)
backtester = Backtester(processed_data)
portfolio_values, final_position, entry_price = backtester.backtest(buy_threshold, sell_threshold)
last_row = processed_data.iloc[-1]
# Display results
col1, col2 = st.columns(2)
with col1:
st.subheader("Current Position")
st.metric("Position", final_position or "No position")
if final_position:
st.metric("Entry Price", f"${entry_price:.2f}")
with col2:
st.subheader("Decision Metrics")
st.metric("Buy%", f"{last_row['PCA_Buy%']:.4f}")
st.metric("Sell%", f"{last_row['PCA_Sell%']:.4f}")
return True
except Exception as e:
st.error(f"An error occurred: {str(e)}")
return False
# Page configuration
st.set_page_config(page_title="BTC-USD Trading Bot", layout="wide")
st.title("BTC-USD Trading Bot")
# Run analysis automatically on page load
run_analysis()
# Educational Slides Section
st.header("Educational Slides")
st.markdown('🎧Listen to the Audio🎧')
st.markdown(
"""
<iframe src="https://drive.google.com/file/d/1oZdaiyrNYJ4v3LZ3lEurMSG3n784wiOw/preview" width="640" height="480"></iframe>
""", unsafe_allow_html=True
)
st.markdown('Read the [White Paper](https://huggingface.co./spaces/earnliners/dow-usd/resolve/main/QPTFen.pdf).')
st.markdown('Powered by [Forecast Trade Group](https://huggingface.co./TradeAdmin).')
with st.expander("Slide 1: Introduction", expanded=False):
st.subheader("Real-Time Crypto Trading Bot Using Machine Learning and PCA")
st.markdown("""
**Subtitle:** Leveraging Financial Data Analysis for Optimal Trading Decisions
**Overview:**
This system integrates real-time financial data, machine learning, and principal component analysis (PCA)
to automate trading decisions in cryptocurrency markets.
""")
with st.expander("Slide 2: Objective", expanded=False):
st.subheader("Main Goals")
st.markdown("""
- Develop an automated trading system that optimizes buy and sell decisions based on historical financial data
- Use machine learning models, specifically Random Forest, to predict market movements
- Implement Principal Component Analysis (PCA) for dimensionality reduction and feature extraction
- Backtest the system and evaluate portfolio performance
""")
with st.expander("Slide 3: Key Concepts", expanded=False):
st.subheader("Core Technologies and Methods")
st.markdown("""
- **Logarithmic Return:** A continuous-time return calculation used to model price changes in markets
- **Principal Component Analysis (PCA):** A dimensionality reduction technique to extract meaningful features
- **Machine Learning:** Using Random Forest to classify "Buy" and "Sell" scenarios
- **SHAP Values:** A method to interpret model outputs through feature contribution analysis
""")
with st.expander("Slide 4: Data Collection and Preprocessing", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.markdown("### Data Fetching")
st.markdown("""
- **Source:** Yahoo Finance (yfinance)
- **Data:** Historical cryptocurrency data
- **Interval:** Hourly data points
- **Period:** Last 50 days
""")
with col2:
st.markdown("### Preprocessing")
st.markdown("""
- Calculate logarithmic returns
- Classify scenarios (Buy/Sell)
- Adjust returns for sell scenarios
- Handle missing values and outliers
""")
with st.expander("Slide 5: Principal Component Analysis (PCA)", expanded=False):
st.subheader("PCA Process")
st.markdown("""
1. **Data Standardization:**
```
X' = (X - μ) / σ
```
2. **Covariance Matrix:**
```
Cov(X) = 1/(n-1) Σ(Xi - μ)(Xi - μ)ᵀ
```
3. **Eigenvalue Decomposition:**
- Find principal components
- Sort by variance explained
4. **Dimensionality Reduction:**
- Transform data to lower dimensions
- Preserve important features
""")
with st.expander("Slide 6: Machine Learning Strategy", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.markdown("### Model Architecture")
st.markdown("""
- Random Forest Classifier
- Feature selection from PCA
- Binary classification (Buy/Sell)
""")
with col2:
st.markdown("### Training Process")
st.markdown("""
- Cross-validation
- Hyperparameter tuning
- Performance metrics
- Model evaluation
""")
with st.expander("Slide 7: SHAP Analysis", expanded=False):
st.subheader("Shapley Additive Explanations")
st.markdown("""
**SHAP Value Formula:**
```
φᵢ(f) = 1/|N|! Σ [f(S ∪ {i}) - f(S)]
```
**Key Components:**
- Feature importance ranking
- Individual prediction explanations
- Global model interpretation
""")
with st.expander("Slide 8: Strategy Refinement", expanded=False):
st.subheader("Dynamic Strategy Adjustment")
st.markdown("""
1. **Threshold Refinement:**
- Use SHAP values to identify key features
- Adjust thresholds based on importance
2. **Signal Processing:**
- Buy signal strengthening
- Sell signal validation
- Risk management integration
""")
with st.expander("Slide 9: Backtesting Framework", expanded=False):
st.markdown("""
### Portfolio Value Calculation
```
Portfolio Valueₜ₊₁ = Portfolio Valueₜ × (Pₜ₊₁/Pₜ)
```
### Trading Logic
- Buy when probability > threshold
- Sell when probability > threshold
- Position management
### Performance Metrics
- Total Return
- Risk Metrics
- Sharpe Ratio
""")
with st.expander("Slide 10: Real-Time Interface", expanded=False):
st.subheader("Streamlit Dashboard Features")
st.markdown("""
- Live trading signals
- Portfolio performance tracking
- Position monitoring
- Automatic updates
- Historical performance
""")
with st.expander("Slide 11: Summary", expanded=False):
st.markdown("""
### Key System Components
- Automated trading system
- ML & PCA integration
- SHAP-based interpretation
- Real-time analytics
- Performance tracking
""")
with st.expander("Slide 12: Future Improvements", expanded=False):
st.markdown("""
### Planned Enhancements
1. Additional data sources integration
2. Advanced optimization techniques
3. Live trading deployment
4. Enhanced risk management
5. Portfolio diversification
""")
with st.expander("Slide 13: Q&A", expanded=False):
st.markdown("""
### Questions & Discussion
Thank you for exploring our trading system! For questions or suggestions:
- System architecture
- Implementation details
- Performance metrics
- Future developments
""")
st.markdown("---")
st.markdown("### Enjoying the Content?")
st.markdown("""
If you find our work useful and interesting, please consider supporting us for **free** on Publish0x!
It's quick, easy, and no cost to you. Just follow this link to show your support:
[**Like and Tip Us for Free on Publish0x!**](https://www.publish0x.com/start/crypto-trading-mathematical-modeling-and-strategic-optimizat-xkelryw)
""")
# Footer
st.markdown("---")
st.markdown("*Support Development & Info : ")
st.markdown("Send your email in comment to your btc donation at ")
st.markdown("1P9R71C6JYJxrPVEzMz4K3hoHYGRW39A9A")
st.markdown('Join our [Community on Huggingface](https://huggingface.co./TradeAdmin) for more.')
st.markdown('All our bots are [tested on HTX](https://www.htx.com/invite/en-us/1f?invite_code=awhd9223).')
# Educational Slides Section dona