Spaces:

earnliners
/

btc-usd

Running

File size: 16,075 Bytes

import streamlit as st
from datetime import datetime, timedelta
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import shap
import matplotlib.pyplot as plt

class DataFetcher:
    """Fetches historical financial data using yfinance."""
    def __init__(self, ticker, nb_days):
        self.ticker = ticker
        self.nb_days = nb_days
        self.data = None

    def fetch_data(self):
        """Fetches historical data for the specified ticker and number of days."""
        end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
        start_date = end_date - timedelta(days=self.nb_days)
        end_date = end_date + timedelta(days=1)
        self.data = yf.download(self.ticker, start=start_date, end=end_date, interval="1h")
        return self.data

class FinancialDataProcessor:
    """Processes financial data to calculate returns, scenarios, and probabilities."""
    def __init__(self, data):
        self.data = data.copy()

    def _flatten_columns(self):
        """Flattens MultiIndex columns into a single level."""
        if isinstance(self.data.columns, pd.MultiIndex):
            self.data.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in self.data.columns]

    def calculate_returns(self):
        """Calculates logarithmic returns, scenarios, and adjusted returns."""
        self._flatten_columns()

        close_column = [col for col in self.data.columns if 'Close' in col]
        if not close_column:
            raise ValueError("The 'Close' column is missing in the dataset.")
        
        self.data.rename(columns={close_column[0]: 'Close'}, inplace=True)
        self.data = self.data[self.data['Close'] > 0].copy()
        
        self.data['LogReturn'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data.replace([np.inf, -np.inf], np.nan, inplace=True)
        self.data.dropna(subset=['LogReturn'], inplace=True)
        
        self.data['Scenario'] = np.where(self.data['LogReturn'] > 0, 'Buy', 'Sell')
        self.data['AdjustedLogReturn'] = np.where(
            self.data['Scenario'] == 'Sell', -self.data['LogReturn'], self.data['LogReturn']
        )
        self.data['AnnualizedReturn'] = self.data['AdjustedLogReturn'] * 252
        
        return self

    def calculate_probabilities(self):
        """Calculates Buy% and Sell% using hyperbolic tangent."""
        self.data['Buy%'] = (1 + np.tanh(self.data['LogReturn'])) / 2
        self.data['Sell%'] = (1 - np.tanh(self.data['LogReturn'])) / 2
        return self.data

    def apply_pca_calculations(self, pca_result):
        """Applies PCA-based calculations to the data."""
        pca_result = pca_result[pca_result['PC1'] > 0].copy()
        
        pca_result['PCA_LogReturn'] = np.log(pca_result['PC1'] / pca_result['PC1'].shift(1))
        pca_result.replace([np.inf, -np.inf], np.nan, inplace=True)
        pca_result.dropna(subset=['PCA_LogReturn'], inplace=True)
        
        pca_result['PCA_Scenario'] = np.where(pca_result['PCA_LogReturn'] > 0, 'Buy', 'Sell')
        pca_result['PCA_Buy%'] = (1 + np.tanh(pca_result['PCA_LogReturn'])) / 2
        pca_result['PCA_Sell%'] = (1 - np.tanh(pca_result['PCA_LogReturn'])) / 2
        
        self.data = self.data.merge(pca_result, left_index=True, right_index=True)
        return self.data

class PCATransformer:
    """Applies PCA to reduce dimensionality and extract features."""
    def __init__(self, n_components=1):
        self.n_components = n_components
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=n_components)

    def fit_transform(self, data):
        numeric_data = data.select_dtypes(include=[np.number])
        scaled_data = self.scaler.fit_transform(numeric_data)
        pca_result = self.pca.fit_transform(scaled_data)
        return pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(self.n_components)], index=data.index)

class StrategyBuilder:
    """Builds and refines the trading strategy using machine learning and SHAP."""
    def __init__(self, data):
        self.data = data.copy()

    def train_model(self, target_column):
        X = self.data.select_dtypes(include=[np.number])
        y = self.data[target_column]
        y_encoded = LabelEncoder().fit_transform(y)
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X, y_encoded)
        return model, X, y_encoded

    def compute_shapley_values(self, model, X):
        explainer = shap.TreeExplainer(model)
        return explainer.shap_values(X)

    def analyze_feature_importance(self, shap_values, feature_names):
        """Analyzes feature importance based on SHAP values."""
        if isinstance(shap_values, list):
            shap_values = shap_values[1]

        if len(shap_values.shape) == 3:
            shap_values = shap_values[:, :, 1]

        mean_abs_shap = np.mean(np.abs(shap_values), axis=0)

        if len(mean_abs_shap) != len(feature_names):
            raise ValueError("Mismatch between SHAP values and feature names.")

        feature_importance = pd.DataFrame({
            'Feature': feature_names,
            'Mean_Abs_SHAP': mean_abs_shap
        }).sort_values(by='Mean_Abs_SHAP', ascending=False)

        return feature_importance

    def refine_thresholds(self, feature_importance, buy_threshold=0.5, sell_threshold=0.5):
        top_features = feature_importance.head(3)['Feature'].tolist()
        for feature in top_features:
            if 'Buy%' in feature or 'PCA_Buy%' in feature:
                buy_threshold *= 1.1
            elif 'Sell%' in feature or 'PCA_Sell%' in feature:
                sell_threshold *= 1.1
        return buy_threshold, sell_threshold

class Backtester:
    """Backtests the trading strategy on historical data."""
    def __init__(self, data):
        self.data = data.copy()

    def backtest(self, buy_threshold=0.5, sell_threshold=0.5):
        portfolio_value = 10000
        position = None
        entry_price = None
        portfolio_values = []

        for i in range(1, len(self.data)):
            last_row = self.data.iloc[i]
            if (last_row['PCA_Scenario'] == 'Buy' and last_row['PCA_Buy%'] > buy_threshold) or \
               (last_row['Scenario'] == 'Buy' and last_row['Buy%'] > buy_threshold):
                if position != 'Buy':
                    position = 'Buy'
                    entry_price = last_row['Close']
            elif (last_row['PCA_Scenario'] == 'Sell' and last_row['PCA_Sell%'] > sell_threshold) or \
                 (last_row['Scenario'] == 'Sell' and last_row['Sell%'] > sell_threshold):
                if position != 'Sell':
                    position = 'Sell'
                    entry_price = last_row['Close']

            if position == 'Buy':
                portfolio_value *= (last_row['Close'] / entry_price)
            elif position == 'Sell':
                portfolio_value *= (entry_price / last_row['Close'])

            portfolio_values.append(portfolio_value)

        return portfolio_values, position, entry_price

def run_analysis():
    """Runs the complete trading analysis."""
    try:
        fetcher = DataFetcher(ticker="BTC-USD", nb_days=50)
        data = fetcher.fetch_data()

        processor = FinancialDataProcessor(data)
        processed_data = processor.calculate_returns().calculate_probabilities()

        pca_transformer = PCATransformer(n_components=1)
        pca_result = pca_transformer.fit_transform(processed_data)
        processed_data = processor.apply_pca_calculations(pca_result)

        strategy_builder = StrategyBuilder(processed_data)
        model, X, y_encoded = strategy_builder.train_model(target_column='PCA_Scenario')
        shap_values = strategy_builder.compute_shapley_values(model, X)

        feature_importance = strategy_builder.analyze_feature_importance(shap_values, X.columns)
        buy_threshold, sell_threshold = strategy_builder.refine_thresholds(feature_importance)

        backtester = Backtester(processed_data)
        portfolio_values, final_position, entry_price = backtester.backtest(buy_threshold, sell_threshold)

        last_row = processed_data.iloc[-1]

        # Display results
        col1, col2 = st.columns(2)
        
        with col1:
            st.subheader("Current Position")
            st.metric("Position", final_position or "No position")
            if final_position:
                st.metric("Entry Price", f"${entry_price:.2f}")


        with col2:
            st.subheader("Decision Metrics")
            st.metric("Buy%", f"{last_row['PCA_Buy%']:.4f}")
            st.metric("Sell%", f"{last_row['PCA_Sell%']:.4f}")

        return True

    except Exception as e:
        st.error(f"An error occurred: {str(e)}")
        return False

# Page configuration
st.set_page_config(page_title="BTC-USD Trading Bot", layout="wide")
st.title("BTC-USD Trading Bot")


# Run analysis automatically on page load
run_analysis()


# Educational Slides Section
st.header("Educational Slides")

st.markdown('🎧Listen to the Audio🎧')
st.markdown(
    """
    <iframe src="https://drive.google.com/file/d/1oZdaiyrNYJ4v3LZ3lEurMSG3n784wiOw/preview" width="640" height="480"></iframe>
    """, unsafe_allow_html=True
)

st.markdown('Read the [White Paper](https://huggingface.co./spaces/earnliners/dow-usd/resolve/main/QPTFen.pdf).')


st.markdown('Powered by [Forecast Trade Group](https://huggingface.co./TradeAdmin).')


with st.expander("Slide 1: Introduction", expanded=False):
    st.subheader("Real-Time Crypto Trading Bot Using Machine Learning and PCA")
    st.markdown("""
    **Subtitle:** Leveraging Financial Data Analysis for Optimal Trading Decisions
    
    **Overview:**  
    This system integrates real-time financial data, machine learning, and principal component analysis (PCA) 
    to automate trading decisions in cryptocurrency markets.
    """)

with st.expander("Slide 2: Objective", expanded=False):
    st.subheader("Main Goals")
    st.markdown("""
    - Develop an automated trading system that optimizes buy and sell decisions based on historical financial data
    - Use machine learning models, specifically Random Forest, to predict market movements
    - Implement Principal Component Analysis (PCA) for dimensionality reduction and feature extraction
    - Backtest the system and evaluate portfolio performance
    """)

with st.expander("Slide 3: Key Concepts", expanded=False):
    st.subheader("Core Technologies and Methods")
    st.markdown("""
    - **Logarithmic Return:** A continuous-time return calculation used to model price changes in markets
    - **Principal Component Analysis (PCA):** A dimensionality reduction technique to extract meaningful features
    - **Machine Learning:** Using Random Forest to classify "Buy" and "Sell" scenarios
    - **SHAP Values:** A method to interpret model outputs through feature contribution analysis
    """)

with st.expander("Slide 4: Data Collection and Preprocessing", expanded=False):
    col1, col2 = st.columns(2)
    with col1:
        st.markdown("### Data Fetching")
        st.markdown("""
        - **Source:** Yahoo Finance (yfinance)
        - **Data:** Historical cryptocurrency data
        - **Interval:** Hourly data points
        - **Period:** Last 50 days
        """)
    with col2:
        st.markdown("### Preprocessing")
        st.markdown("""
        - Calculate logarithmic returns
        - Classify scenarios (Buy/Sell)
        - Adjust returns for sell scenarios
        - Handle missing values and outliers
        """)

with st.expander("Slide 5: Principal Component Analysis (PCA)", expanded=False):
    st.subheader("PCA Process")
    st.markdown("""
    1. **Data Standardization:**
       ```
       X' = (X - μ) / σ
       ```
    
    2. **Covariance Matrix:**
       ```
       Cov(X) = 1/(n-1) Σ(Xi - μ)(Xi - μ)ᵀ
       ```
    
    3. **Eigenvalue Decomposition:**
       - Find principal components
       - Sort by variance explained
    
    4. **Dimensionality Reduction:**
       - Transform data to lower dimensions
       - Preserve important features
    """)

with st.expander("Slide 6: Machine Learning Strategy", expanded=False):
    col1, col2 = st.columns(2)
    with col1:
        st.markdown("### Model Architecture")
        st.markdown("""
        - Random Forest Classifier
        - Feature selection from PCA
        - Binary classification (Buy/Sell)
        """)
    with col2:
        st.markdown("### Training Process")
        st.markdown("""
        - Cross-validation
        - Hyperparameter tuning
        - Performance metrics
        - Model evaluation
        """)

with st.expander("Slide 7: SHAP Analysis", expanded=False):
    st.subheader("Shapley Additive Explanations")
    st.markdown("""
    **SHAP Value Formula:**
    ```
    φᵢ(f) = 1/|N|! Σ [f(S ∪ {i}) - f(S)]
    ```
    
    **Key Components:**
    - Feature importance ranking
    - Individual prediction explanations
    - Global model interpretation
    """)

with st.expander("Slide 8: Strategy Refinement", expanded=False):
    st.subheader("Dynamic Strategy Adjustment")
    st.markdown("""
    1. **Threshold Refinement:**
       - Use SHAP values to identify key features
       - Adjust thresholds based on importance
    
    2. **Signal Processing:**
       - Buy signal strengthening
       - Sell signal validation
       - Risk management integration
    """)

with st.expander("Slide 9: Backtesting Framework", expanded=False):
    st.markdown("""
    ### Portfolio Value Calculation
    ```
    Portfolio Valueₜ₊₁ = Portfolio Valueₜ × (Pₜ₊₁/Pₜ)
    ```
    
    ### Trading Logic
    - Buy when probability > threshold
    - Sell when probability > threshold
    - Position management
    
    ### Performance Metrics
    - Total Return
    - Risk Metrics
    - Sharpe Ratio
    """)

with st.expander("Slide 10: Real-Time Interface", expanded=False):
    st.subheader("Streamlit Dashboard Features")
    st.markdown("""
    - Live trading signals
    - Portfolio performance tracking
    - Position monitoring
    - Automatic updates
    - Historical performance
    """)

with st.expander("Slide 11: Summary", expanded=False):
    st.markdown("""
    ### Key System Components
    - Automated trading system
    - ML & PCA integration
    - SHAP-based interpretation
    - Real-time analytics
    - Performance tracking
    """)

with st.expander("Slide 12: Future Improvements", expanded=False):
    st.markdown("""
    ### Planned Enhancements
    1. Additional data sources integration
    2. Advanced optimization techniques
    3. Live trading deployment
    4. Enhanced risk management
    5. Portfolio diversification
    """)

with st.expander("Slide 13: Q&A", expanded=False):
    st.markdown("""
    ### Questions & Discussion
    
    Thank you for exploring our trading system! For questions or suggestions:
    - System architecture
    - Implementation details
    - Performance metrics
    - Future developments
    """)
st.markdown("---")
st.markdown("### Enjoying the Content?")
st.markdown("""
If you find our work useful and interesting, please consider supporting us for **free** on Publish0x!  
It's quick, easy, and no cost to you. Just follow this link to show your support:
[**Like and Tip Us for Free on Publish0x!**](https://www.publish0x.com/start/crypto-trading-mathematical-modeling-and-strategic-optimizat-xkelryw)
""")

# Footer
st.markdown("---")
st.markdown("*Support Development & Info : ")
st.markdown("Send your email in comment to your btc donation at ")
st.markdown("1P9R71C6JYJxrPVEzMz4K3hoHYGRW39A9A")
st.markdown('Join our [Community on Huggingface](https://huggingface.co./TradeAdmin) for more.')
st.markdown('All our bots are [tested on HTX](https://www.htx.com/invite/en-us/1f?invite_code=awhd9223).')

# Educational Slides Section dona