Jayabalambika commited on
Commit
ed7fafe
·
1 Parent(s): 53584da

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from time import time
3
+ from scipy import sparse
4
+ from scipy import linalg
5
+
6
+ from sklearn.datasets import make_regression
7
+ from sklearn.linear_model import Lasso
8
+
9
+
10
+ def load_dataset():
11
+ X, y = make_regression(n_samples=200, n_features=5000, random_state=0)
12
+ # create a copy of X in sparse format
13
+ X_sp = sparse.coo_matrix(X)
14
+ return X,X_sp,y
15
+
16
+ def compare_lasso_dense():
17
+ alpha = 1
18
+ sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
19
+ dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
20
+
21
+ t0 = time()
22
+ sparse_lasso.fit(X_sp, y)
23
+ # print(f"Sparse Lasso done in {(time() - t0):.3f}s")
24
+ elapse1 = time() - t0
25
+
26
+ t0 = time()
27
+ dense_lasso.fit(X, y)
28
+ # print(f"Dense Lasso done in {(time() - t0):.3f}s")
29
+ elapse2 = time() - t0
30
+
31
+ # compare the regression coefficients
32
+ coeff_diff = linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_)
33
+ # print(f"Distance between coefficients : {coeff_diff:.2e}")
34
+ return f"Sparse Lasso done in {(time() - t0):.3f}s\t\n" + f"Dense Lasso done in {(time() - t0):.3f}s\t\n" + f"Distance between coefficients : {coeff_diff:.2e}\t\n"
35
+
36
+ def compare_lasso_sparse():
37
+ # make a copy of the previous data
38
+ Xs = X.copy()
39
+ # make Xs sparse by replacing the values lower than 2.5 with 0s
40
+ Xs[Xs < 2.5] = 0.0
41
+ # create a copy of Xs in sparse format
42
+ Xs_sp = sparse.coo_matrix(Xs)
43
+ Xs_sp = Xs_sp.tocsc()
44
+
45
+ # compute the proportion of non-zero coefficient in the data matrix
46
+ print(f"Matrix density : {(Xs_sp.nnz / float(X.size) * 100):.3f}%")
47
+ matrix_density = Xs_sp.nnz / float(X.size) * 100
48
+
49
+ alpha = 0.1
50
+ sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
51
+ dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
52
+
53
+ t0 = time()
54
+ sparse_lasso.fit(Xs_sp, y)
55
+ print(f"Sparse Lasso done in {(time() - t0):.3f}s")
56
+ elapses1 = time() - t0
57
+
58
+ t0 = time()
59
+ dense_lasso.fit(Xs, y)
60
+ print(f"Dense Lasso done in {(time() - t0):.3f}s")
61
+ elapses2 = time() - t0
62
+
63
+ # compare the regression coefficients
64
+ coeff_diff = linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_)
65
+ print(f"Distance between coefficients : {coeff_diff:.2e}")
66
+ return f"Matrix density : {(Xs_sp.nnz / float(X.size) * 100):.3f}%\t\n"+ f"Sparse Lasso done in {(time() - t0):.3f}s\t\n" + f"Distance between coefficients : {coeff_diff:.2e}\t\n"
67
+
68
+
69
+ X,X_sp,y = load_dataset()
70
+ # compare_lasso_dense(X,X_sp,y)
71
+ # compare_lasso_sparse(X,X_sp,y)
72
+
73
+
74
+
75
+ title = " Lasso on Dense and Sparse data "
76
+ info = '''**Comparing the two Lasso implementations on Dense data**
77
+ We create a linear regression problem that is suitable for the Lasso, that is to say, with more features than samples.
78
+ We then store the data matrix in both dense (the usual) and sparse format, and train a Lasso on each. We compute the
79
+ runtime of both and check that they learned the same model by
80
+ computing the Euclidean norm of the difference between the coefficients they learned.
81
+ Because the data is dense, we expect better runtime with a dense data format.
82
+ '''
83
+
84
+ info2='''***Comparing the two Lasso implementations on Sparse data***
85
+ We make the previous problem sparse by replacing all small values with 0
86
+ and run the same comparisons as above. Because the data is now sparse,
87
+ we expect the implementation that uses the sparse data format to be faster.
88
+ '''
89
+
90
+ conclusion = '''**We show that linear_model.Lasso provides
91
+ the same results for dense and sparse data
92
+ and that in the case of sparse data the speed is improved**.
93
+ '''
94
+ with gr.Blocks() as demo:
95
+ gr.Markdown(f"# {title}")
96
+ gr.Markdown(info)
97
+
98
+ txt_3 = gr.Textbox(value="", label="Dense Lasso comparison")
99
+ btn = gr.Button(value="Dense Lasso comparison")
100
+ btn.click(compare_lasso_dense, outputs=[txt_3])
101
+
102
+ gr.Markdown(info2)
103
+
104
+ txt_4 = gr.Textbox(value="", label="Sparse Lasso comparison")
105
+ btn = gr.Button(value="Sparse Lasso comparison")
106
+ btn.click(compare_lasso_sparse, outputs=[txt_4])
107
+
108
+ gr.Markdown(conclusion)
109
+
110
+
111
+ if __name__ == "__main__":
112
+ demo.launch()