Spaces:

earnliners
/

btc-usd

Running

App Files Files Community

earnliners commited on 17 days ago

Commit

eb44f1d

verified ·

1 Parent(s): d858238

Update app.py

Browse files

Files changed (1) hide show

app.py +228 -1

app.py CHANGED Viewed

@@ -9,7 +9,234 @@ from sklearn.ensemble import RandomForestClassifier
 import shap
 import matplotlib.pyplot as plt
-[Previous code remains unchanged until the end of run_analysis() function]
 # Run analysis automatically on page load
 run_analysis()

 import shap
 import matplotlib.pyplot as plt
+class DataFetcher:
+    """Fetches historical financial data using yfinance."""
+    def __init__(self, ticker, nb_days):
+        self.ticker = ticker
+        self.nb_days = nb_days
+        self.data = None
+    def fetch_data(self):
+        """Fetches historical data for the specified ticker and number of days."""
+        end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+        start_date = end_date - timedelta(days=self.nb_days)
+        end_date = end_date + timedelta(days=1)
+        self.data = yf.download(self.ticker, start=start_date, end=end_date, interval="1h")
+        return self.data
+class FinancialDataProcessor:
+    """Processes financial data to calculate returns, scenarios, and probabilities."""
+    def __init__(self, data):
+        self.data = data.copy()
+    def _flatten_columns(self):
+        """Flattens MultiIndex columns into a single level."""
+        if isinstance(self.data.columns, pd.MultiIndex):
+            self.data.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in self.data.columns]
+    def calculate_returns(self):
+        """Calculates logarithmic returns, scenarios, and adjusted returns."""
+        self._flatten_columns()
+        close_column = [col for col in self.data.columns if 'Close' in col]
+        if not close_column:
+            raise ValueError("The 'Close' column is missing in the dataset.")
+        self.data.rename(columns={close_column[0]: 'Close'}, inplace=True)
+        self.data = self.data[self.data['Close'] > 0].copy()
+        self.data['LogReturn'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
+        self.data.replace([np.inf, -np.inf], np.nan, inplace=True)
+        self.data.dropna(subset=['LogReturn'], inplace=True)
+        self.data['Scenario'] = np.where(self.data['LogReturn'] > 0, 'Buy', 'Sell')
+        self.data['AdjustedLogReturn'] = np.where(
+            self.data['Scenario'] == 'Sell', -self.data['LogReturn'], self.data['LogReturn']
+        )
+        self.data['AnnualizedReturn'] = self.data['AdjustedLogReturn'] * 252
+        return self
+    def calculate_probabilities(self):
+        """Calculates Buy% and Sell% using hyperbolic tangent."""
+        self.data['Buy%'] = (1 + np.tanh(self.data['LogReturn'])) / 2
+        self.data['Sell%'] = (1 - np.tanh(self.data['LogReturn'])) / 2
+        return self.data
+    def apply_pca_calculations(self, pca_result):
+        """Applies PCA-based calculations to the data."""
+        pca_result = pca_result[pca_result['PC1'] > 0].copy()
+        pca_result['PCA_LogReturn'] = np.log(pca_result['PC1'] / pca_result['PC1'].shift(1))
+        pca_result.replace([np.inf, -np.inf], np.nan, inplace=True)
+        pca_result.dropna(subset=['PCA_LogReturn'], inplace=True)
+        pca_result['PCA_Scenario'] = np.where(pca_result['PCA_LogReturn'] > 0, 'Buy', 'Sell')
+        pca_result['PCA_Buy%'] = (1 + np.tanh(pca_result['PCA_LogReturn'])) / 2
+        pca_result['PCA_Sell%'] = (1 - np.tanh(pca_result['PCA_LogReturn'])) / 2
+        self.data = self.data.merge(pca_result, left_index=True, right_index=True)
+        return self.data
+class PCATransformer:
+    """Applies PCA to reduce dimensionality and extract features."""
+    def __init__(self, n_components=1):
+        self.n_components = n_components
+        self.scaler = StandardScaler()
+        self.pca = PCA(n_components=n_components)
+    def fit_transform(self, data):
+        numeric_data = data.select_dtypes(include=[np.number])
+        scaled_data = self.scaler.fit_transform(numeric_data)
+        pca_result = self.pca.fit_transform(scaled_data)
+        return pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(self.n_components)], index=data.index)
+class StrategyBuilder:
+    """Builds and refines the trading strategy using machine learning and SHAP."""
+    def __init__(self, data):
+        self.data = data.copy()
+    def train_model(self, target_column):
+        X = self.data.select_dtypes(include=[np.number])
+        y = self.data[target_column]
+        y_encoded = LabelEncoder().fit_transform(y)
+        model = RandomForestClassifier(n_estimators=100, random_state=42)
+        model.fit(X, y_encoded)
+        return model, X, y_encoded
+    def compute_shapley_values(self, model, X):
+        explainer = shap.TreeExplainer(model)
+        return explainer.shap_values(X)
+    def analyze_feature_importance(self, shap_values, feature_names):
+        """Analyzes feature importance based on SHAP values."""
+        if isinstance(shap_values, list):
+            shap_values = shap_values[1]
+        if len(shap_values.shape) == 3:
+            shap_values = shap_values[:, :, 1]
+        mean_abs_shap = np.mean(np.abs(shap_values), axis=0)
+        if len(mean_abs_shap) != len(feature_names):
+            raise ValueError("Mismatch between SHAP values and feature names.")
+        feature_importance = pd.DataFrame({
+            'Feature': feature_names,
+            'Mean_Abs_SHAP': mean_abs_shap
+        }).sort_values(by='Mean_Abs_SHAP', ascending=False)
+        return feature_importance
+    def refine_thresholds(self, feature_importance, buy_threshold=0.5, sell_threshold=0.5):
+        top_features = feature_importance.head(3)['Feature'].tolist()
+        for feature in top_features:
+            if 'Buy%' in feature or 'PCA_Buy%' in feature:
+                buy_threshold *= 1.1
+            elif 'Sell%' in feature or 'PCA_Sell%' in feature:
+                sell_threshold *= 1.1
+        return buy_threshold, sell_threshold
+class Backtester:
+    """Backtests the trading strategy on historical data."""
+    def __init__(self, data):
+        self.data = data.copy()
+    def backtest(self, buy_threshold=0.5, sell_threshold=0.5):
+        portfolio_value = 10000
+        position = None
+        entry_price = None
+        portfolio_values = []
+        for i in range(1, len(self.data)):
+            last_row = self.data.iloc[i]
+            if (last_row['PCA_Scenario'] == 'Buy' and last_row['PCA_Buy%'] > buy_threshold) or \
+               (last_row['Scenario'] == 'Buy' and last_row['Buy%'] > buy_threshold):
+                if position != 'Buy':
+                    position = 'Buy'
+                    entry_price = last_row['Close']
+            elif (last_row['PCA_Scenario'] == 'Sell' and last_row['PCA_Sell%'] > sell_threshold) or \
+                 (last_row['Scenario'] == 'Sell' and last_row['Sell%'] > sell_threshold):
+                if position != 'Sell':
+                    position = 'Sell'
+                    entry_price = last_row['Close']
+            if position == 'Buy':
+                portfolio_value *= (last_row['Close'] / entry_price)
+            elif position == 'Sell':
+                portfolio_value *= (entry_price / last_row['Close'])
+            portfolio_values.append(portfolio_value)
+        return portfolio_values, position, entry_price
+def run_analysis():
+    """Runs the complete trading analysis."""
+    try:
+        fetcher = DataFetcher(ticker="BTC-USD", nb_days=50)
+        data = fetcher.fetch_data()
+        processor = FinancialDataProcessor(data)
+        processed_data = processor.calculate_returns().calculate_probabilities()
+        pca_transformer = PCATransformer(n_components=1)
+        pca_result = pca_transformer.fit_transform(processed_data)
+        processed_data = processor.apply_pca_calculations(pca_result)
+        strategy_builder = StrategyBuilder(processed_data)
+        model, X, y_encoded = strategy_builder.train_model(target_column='PCA_Scenario')
+        shap_values = strategy_builder.compute_shapley_values(model, X)
+        feature_importance = strategy_builder.analyze_feature_importance(shap_values, X.columns)
+        buy_threshold, sell_threshold = strategy_builder.refine_thresholds(feature_importance)
+        backtester = Backtester(processed_data)
+        portfolio_values, final_position, entry_price = backtester.backtest(buy_threshold, sell_threshold)
+        last_row = processed_data.iloc[-1]
+        # Display results
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Current Position")
+            st.metric("Portfolio Value", f"${portfolio_values[-1]:.2f}")
+            st.metric("Position", final_position or "No position")
+            if final_position:
+                st.metric("Entry Price", f"${entry_price:.2f}")
+            st.metric("Latest Close", f"${last_row['Close']:.2f}")
+        with col2:
+            st.subheader("Decision Metrics")
+            st.metric("Buy%", f"{last_row['Buy%']:.4f}")
+            st.metric("Sell%", f"{last_row['Sell%']:.4f}")
+            st.metric("PCA Buy%", f"{last_row['PCA_Buy%']:.4f}")
+            st.metric("PCA Sell%", f"{last_row['PCA_Sell%']:.4f}")
+        # Plot portfolio value
+        st.subheader("Portfolio Value Over Time")
+        fig, ax = plt.subplots(figsize=(12, 6))
+        ax.plot(processed_data.index[1:], portfolio_values, label='Portfolio Value', color='blue')
+        ax.set_title('Portfolio Value Over Time (Backtest)')
+        ax.set_xlabel('Date')
+        ax.set_ylabel('Portfolio Value ($)')
+        ax.grid(True)
+        ax.legend()
+        st.pyplot(fig)
+        # Feature importance
+        st.subheader("Feature Importance")
+        st.dataframe(feature_importance)
+        return True
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}")
+        return False
+# Page configuration
+st.set_page_config(page_title="Crypto Trading Bot", layout="wide")
+st.title("Crypto Trading Analysis Bot")
 # Run analysis automatically on page load
 run_analysis()