Spaces:

sklearn-docs
/

IsolationForest-Model-for-Anomaly-Detection

Runtime error

App Files Files Community

Jayabalambika commited on Apr 25, 2023

Commit

776c009

1 Parent(s): f4d3ff0

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -130

app.py DELETED Viewed

@@ -1,130 +0,0 @@
-import os
-import pandas as pd
-from sklearn.ensemble import IsolationForest
-import numpy as np
-from sklearn.model_selection import train_test_split
-import gradio as gr
-import matplotlib.pyplot as plt
-from skops import hub_utils
-import pickle
-import time
-#Data preparation
-n_samples, n_outliers = 120, 40
-rng = np.random.RandomState(0)
-covariance = np.array([[0.5, -0.1], [0.7, 0.4]])
-cluster_1 = 0.4 * rng.randn(n_samples, 2) @ covariance + np.array([2, 2])  # general deformed cluster
-cluster_2 = 0.3 * rng.randn(n_samples, 2) + np.array([-2, -2])  # spherical cluster
-outliers = rng.uniform(low=-4, high=4, size=(n_outliers, 2))
-X = np.concatenate([cluster_1, cluster_2, outliers]) #120+120+40 = 280 with 2D
-y = np.concatenate(
-    [np.ones((2 * n_samples), dtype=int), -np.ones((n_outliers), dtype=int)]
-)
-def load_hf_model_hub():
-    '''
-    Load the directory containing pretrained model
-    and files from the model repository
-    '''
-    repo_id="sklearn-docs/anomaly-detection"
-    download_repo = "downloaded-model"
-    hub_utils.download(repo_id=repo_id, dst=download_repo)
-    time.sleep(2)
-    loaded_model = pickle.load(open('./downloaded-model/isolation_forest.pkl', 'rb'))
-    return loaded_model
-#Visualize the data as a scatter plot
-def visualize_input_data():
-    fig = plt.figure(1, facecolor="w", figsize=(5, 5))
-    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
-    handles, labels = scatter.legend_elements()
-    plt.axis("square")
-    plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
-    plt.title("Gaussian inliers with \nuniformly distributed outliers")
-    return fig
-title = " An example using IsolationForest for anomaly detection."
-description1 = "The isolation forest is an Ensemble of Isolation trees and it isolates the data points using recursive random partitioning."
-description2 = "In case of outliers the number of splits required is greater than those required for inliers."
-description3 = "We will use the toy dataset as given in the scikit-learn page for Isolation Forest."
-with gr.Blocks(title=title) as demo:
-    gr.Markdown(f" # {title}")
-    gr.Markdown(
-    """
-    The isolation forest is an ensemble of isolation trees and it isolates the data points using recursive random partitioning.
-    In case of outliers, the number of splits required is greater than those required for inliers.
-    We will use the toy dataset for our educational demo as given in the scikit-learn page for Isolation Forest.
-    """)
-    gr.Markdown("You can see the associated scikit-learn example [here](https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py).")
-    loaded_model = load_hf_model_hub()
-    with gr.Tab("Visualize Input dataset"):
-        btn = gr.Button(value="Visualize input dataset")
-        with gr.Row():
-            btn.click(visualize_input_data, outputs= gr.Plot(label='Visualizing input dataset') )
-            # out = gr.Textbox(label="explaination of the loss function")
-            gr.Markdown(
-            """
-            # Data Generation
-            We generate 2 clusters one spherical and the other slightly deformed, from Standard Normal distribution
-            For the sake of consistency inliers are assigned a ground label of 1 and outliers are assigned a label -1.
-            The plot is a visualization of the clusters of the input dataset.
-            """)
-    with gr.Tab("**Plot Decision Boundary**"):
-      # btn_decision = gr.Button(value="Plot decision boundary")
-      # btn_decision.click(plot_decision_boundary, outputs= gr.Plot(label='Plot decision boundary') )
-      with gr.Row():
-        image_decision = gr.Image('./downloaded-model/decision_boundary.png')
-        gr.Markdown(
-        """
-        # Plot the Discrete Decision Boundary
-        We plot the discrete decision boundary.
-        The background colour represents whether a sample in that given area is predicted to be an outlier or not.
-        The scatter plot displays the true labels
-        """)
-    with gr.Tab("Plot Path"):
-      with gr.Row():
-        image_path = gr.Image('./downloaded-model/plot_path.png')
-        gr.Markdown(
-        """
-        # Plot the path length of the decision boundary
-        By setting the response_method="decision_function", the background of the DecisionBoundaryDisplay represents
-        the measure of the normality of an observation.
-        Normality of Observation = path length/(Number_of_forests_of_random trees) - Eqn.1
-        The RHS of the above equation Eqn.1 is given by the number of splits required to isolate a given sample
-        Such score is given by the path length averaged over a forest of random trees, which itself is given by the depth of
-        the leaf (or equivalently the number of splits)
-        required to isolate a given sample.
-        When a forest of random trees collectively produces short path lengths for isolating some particular samples,
-        they are highly likely to be anomalies and the measure of normality is close to 0.
-        Similarly, large paths correspond to values close to 1 and are more likely to be inliers.
-        """)
-    gr.Markdown( f"## Success")
-demo.launch()