ibnummuhammad commited on
Commit
25f564f
1 Parent(s): 98dc8b3

Add extraction-yahoo-finance.ipynb

Browse files
Files changed (1) hide show
  1. extraction-yahoo-finance.ipynb +497 -0
extraction-yahoo-finance.ipynb ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import yfinance as yf"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 2,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "# Define the list of stock symbols\n",
19
+ "stock_symbols = ['JPM', 'GS', 'MS', 'BLK', 'C']"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 3,
25
+ "metadata": {},
26
+ "outputs": [
27
+ {
28
+ "name": "stderr",
29
+ "output_type": "stream",
30
+ "text": [
31
+ "[*********************100%%**********************] 5 of 5 completed\n"
32
+ ]
33
+ }
34
+ ],
35
+ "source": [
36
+ "# Download historical stock price data\n",
37
+ "stock_data = yf.download(stock_symbols, start=\"2020-01-01\", end=\"2023-11-30\")"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 4,
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "data": {
47
+ "text/html": [
48
+ "<div>\n",
49
+ "<style scoped>\n",
50
+ " .dataframe tbody tr th:only-of-type {\n",
51
+ " vertical-align: middle;\n",
52
+ " }\n",
53
+ "\n",
54
+ " .dataframe tbody tr th {\n",
55
+ " vertical-align: top;\n",
56
+ " }\n",
57
+ "\n",
58
+ " .dataframe thead tr th {\n",
59
+ " text-align: left;\n",
60
+ " }\n",
61
+ "\n",
62
+ " .dataframe thead tr:last-of-type th {\n",
63
+ " text-align: right;\n",
64
+ " }\n",
65
+ "</style>\n",
66
+ "<table border=\"1\" class=\"dataframe\">\n",
67
+ " <thead>\n",
68
+ " <tr>\n",
69
+ " <th>Price</th>\n",
70
+ " <th colspan=\"5\" halign=\"left\">Adj Close</th>\n",
71
+ " <th colspan=\"5\" halign=\"left\">Close</th>\n",
72
+ " <th>...</th>\n",
73
+ " <th colspan=\"5\" halign=\"left\">Open</th>\n",
74
+ " <th colspan=\"5\" halign=\"left\">Volume</th>\n",
75
+ " </tr>\n",
76
+ " <tr>\n",
77
+ " <th>Ticker</th>\n",
78
+ " <th>BLK</th>\n",
79
+ " <th>C</th>\n",
80
+ " <th>GS</th>\n",
81
+ " <th>JPM</th>\n",
82
+ " <th>MS</th>\n",
83
+ " <th>BLK</th>\n",
84
+ " <th>C</th>\n",
85
+ " <th>GS</th>\n",
86
+ " <th>JPM</th>\n",
87
+ " <th>MS</th>\n",
88
+ " <th>...</th>\n",
89
+ " <th>BLK</th>\n",
90
+ " <th>C</th>\n",
91
+ " <th>GS</th>\n",
92
+ " <th>JPM</th>\n",
93
+ " <th>MS</th>\n",
94
+ " <th>BLK</th>\n",
95
+ " <th>C</th>\n",
96
+ " <th>GS</th>\n",
97
+ " <th>JPM</th>\n",
98
+ " <th>MS</th>\n",
99
+ " </tr>\n",
100
+ " <tr>\n",
101
+ " <th>Date</th>\n",
102
+ " <th></th>\n",
103
+ " <th></th>\n",
104
+ " <th></th>\n",
105
+ " <th></th>\n",
106
+ " <th></th>\n",
107
+ " <th></th>\n",
108
+ " <th></th>\n",
109
+ " <th></th>\n",
110
+ " <th></th>\n",
111
+ " <th></th>\n",
112
+ " <th></th>\n",
113
+ " <th></th>\n",
114
+ " <th></th>\n",
115
+ " <th></th>\n",
116
+ " <th></th>\n",
117
+ " <th></th>\n",
118
+ " <th></th>\n",
119
+ " <th></th>\n",
120
+ " <th></th>\n",
121
+ " <th></th>\n",
122
+ " <th></th>\n",
123
+ " </tr>\n",
124
+ " </thead>\n",
125
+ " <tbody>\n",
126
+ " <tr>\n",
127
+ " <th>2020-01-02</th>\n",
128
+ " <td>457.110931</td>\n",
129
+ " <td>68.963692</td>\n",
130
+ " <td>210.857330</td>\n",
131
+ " <td>123.533600</td>\n",
132
+ " <td>45.443264</td>\n",
133
+ " <td>508.980011</td>\n",
134
+ " <td>81.230003</td>\n",
135
+ " <td>234.320007</td>\n",
136
+ " <td>141.089996</td>\n",
137
+ " <td>52.040001</td>\n",
138
+ " <td>...</td>\n",
139
+ " <td>510.000000</td>\n",
140
+ " <td>80.129997</td>\n",
141
+ " <td>231.000000</td>\n",
142
+ " <td>139.789993</td>\n",
143
+ " <td>51.200001</td>\n",
144
+ " <td>560400</td>\n",
145
+ " <td>12728900</td>\n",
146
+ " <td>3736300</td>\n",
147
+ " <td>10803700</td>\n",
148
+ " <td>7808000</td>\n",
149
+ " </tr>\n",
150
+ " <tr>\n",
151
+ " <th>2020-01-03</th>\n",
152
+ " <td>452.252197</td>\n",
153
+ " <td>67.664719</td>\n",
154
+ " <td>208.391678</td>\n",
155
+ " <td>121.903435</td>\n",
156
+ " <td>44.709740</td>\n",
157
+ " <td>503.570007</td>\n",
158
+ " <td>79.699997</td>\n",
159
+ " <td>231.580002</td>\n",
160
+ " <td>138.339996</td>\n",
161
+ " <td>51.200001</td>\n",
162
+ " <td>...</td>\n",
163
+ " <td>501.720001</td>\n",
164
+ " <td>79.800003</td>\n",
165
+ " <td>231.600006</td>\n",
166
+ " <td>137.500000</td>\n",
167
+ " <td>51.220001</td>\n",
168
+ " <td>337300</td>\n",
169
+ " <td>12437400</td>\n",
170
+ " <td>2274500</td>\n",
171
+ " <td>10386800</td>\n",
172
+ " <td>6706000</td>\n",
173
+ " </tr>\n",
174
+ " <tr>\n",
175
+ " <th>2020-01-06</th>\n",
176
+ " <td>452.638397</td>\n",
177
+ " <td>67.452461</td>\n",
178
+ " <td>210.524353</td>\n",
179
+ " <td>121.806473</td>\n",
180
+ " <td>44.552567</td>\n",
181
+ " <td>504.000000</td>\n",
182
+ " <td>79.449997</td>\n",
183
+ " <td>233.949997</td>\n",
184
+ " <td>138.229996</td>\n",
185
+ " <td>51.020000</td>\n",
186
+ " <td>...</td>\n",
187
+ " <td>500.170013</td>\n",
188
+ " <td>78.720001</td>\n",
189
+ " <td>229.929993</td>\n",
190
+ " <td>136.559998</td>\n",
191
+ " <td>50.669998</td>\n",
192
+ " <td>411300</td>\n",
193
+ " <td>10059500</td>\n",
194
+ " <td>3329300</td>\n",
195
+ " <td>10259000</td>\n",
196
+ " <td>7476700</td>\n",
197
+ " </tr>\n",
198
+ " <tr>\n",
199
+ " <th>2020-01-07</th>\n",
200
+ " <td>455.530243</td>\n",
201
+ " <td>66.866661</td>\n",
202
+ " <td>211.910172</td>\n",
203
+ " <td>119.735695</td>\n",
204
+ " <td>44.465233</td>\n",
205
+ " <td>507.220001</td>\n",
206
+ " <td>78.760002</td>\n",
207
+ " <td>235.490005</td>\n",
208
+ " <td>135.880005</td>\n",
209
+ " <td>50.919998</td>\n",
210
+ " <td>...</td>\n",
211
+ " <td>502.880005</td>\n",
212
+ " <td>79.290001</td>\n",
213
+ " <td>235.000000</td>\n",
214
+ " <td>137.279999</td>\n",
215
+ " <td>51.040001</td>\n",
216
+ " <td>453900</td>\n",
217
+ " <td>10469100</td>\n",
218
+ " <td>5255200</td>\n",
219
+ " <td>10531300</td>\n",
220
+ " <td>4538100</td>\n",
221
+ " </tr>\n",
222
+ " <tr>\n",
223
+ " <th>2020-01-08</th>\n",
224
+ " <td>455.422516</td>\n",
225
+ " <td>67.376060</td>\n",
226
+ " <td>213.952820</td>\n",
227
+ " <td>120.669746</td>\n",
228
+ " <td>45.032845</td>\n",
229
+ " <td>507.100006</td>\n",
230
+ " <td>79.360001</td>\n",
231
+ " <td>237.759995</td>\n",
232
+ " <td>136.940002</td>\n",
233
+ " <td>51.570000</td>\n",
234
+ " <td>...</td>\n",
235
+ " <td>507.769989</td>\n",
236
+ " <td>78.769997</td>\n",
237
+ " <td>235.679993</td>\n",
238
+ " <td>135.699997</td>\n",
239
+ " <td>50.959999</td>\n",
240
+ " <td>726500</td>\n",
241
+ " <td>11292400</td>\n",
242
+ " <td>3564700</td>\n",
243
+ " <td>9695300</td>\n",
244
+ " <td>6185200</td>\n",
245
+ " </tr>\n",
246
+ " <tr>\n",
247
+ " <th>...</th>\n",
248
+ " <td>...</td>\n",
249
+ " <td>...</td>\n",
250
+ " <td>...</td>\n",
251
+ " <td>...</td>\n",
252
+ " <td>...</td>\n",
253
+ " <td>...</td>\n",
254
+ " <td>...</td>\n",
255
+ " <td>...</td>\n",
256
+ " <td>...</td>\n",
257
+ " <td>...</td>\n",
258
+ " <td>...</td>\n",
259
+ " <td>...</td>\n",
260
+ " <td>...</td>\n",
261
+ " <td>...</td>\n",
262
+ " <td>...</td>\n",
263
+ " <td>...</td>\n",
264
+ " <td>...</td>\n",
265
+ " <td>...</td>\n",
266
+ " <td>...</td>\n",
267
+ " <td>...</td>\n",
268
+ " <td>...</td>\n",
269
+ " </tr>\n",
270
+ " <tr>\n",
271
+ " <th>2023-11-22</th>\n",
272
+ " <td>717.865295</td>\n",
273
+ " <td>44.594376</td>\n",
274
+ " <td>333.518433</td>\n",
275
+ " <td>151.506546</td>\n",
276
+ " <td>77.728569</td>\n",
277
+ " <td>727.140015</td>\n",
278
+ " <td>45.020000</td>\n",
279
+ " <td>338.640015</td>\n",
280
+ " <td>153.330002</td>\n",
281
+ " <td>78.489998</td>\n",
282
+ " <td>...</td>\n",
283
+ " <td>727.000000</td>\n",
284
+ " <td>45.310001</td>\n",
285
+ " <td>336.940002</td>\n",
286
+ " <td>153.410004</td>\n",
287
+ " <td>78.790001</td>\n",
288
+ " <td>498100</td>\n",
289
+ " <td>9441200</td>\n",
290
+ " <td>1235200</td>\n",
291
+ " <td>5174500</td>\n",
292
+ " <td>4265400</td>\n",
293
+ " </tr>\n",
294
+ " <tr>\n",
295
+ " <th>2023-11-24</th>\n",
296
+ " <td>720.787537</td>\n",
297
+ " <td>44.802391</td>\n",
298
+ " <td>334.020691</td>\n",
299
+ " <td>151.714050</td>\n",
300
+ " <td>77.887024</td>\n",
301
+ " <td>730.099976</td>\n",
302
+ " <td>45.230000</td>\n",
303
+ " <td>339.149994</td>\n",
304
+ " <td>153.539993</td>\n",
305
+ " <td>78.650002</td>\n",
306
+ " <td>...</td>\n",
307
+ " <td>726.719971</td>\n",
308
+ " <td>44.889999</td>\n",
309
+ " <td>338.799988</td>\n",
310
+ " <td>153.589996</td>\n",
311
+ " <td>78.260002</td>\n",
312
+ " <td>279600</td>\n",
313
+ " <td>5941000</td>\n",
314
+ " <td>460100</td>\n",
315
+ " <td>3496900</td>\n",
316
+ " <td>2398000</td>\n",
317
+ " </tr>\n",
318
+ " <tr>\n",
319
+ " <th>2023-11-27</th>\n",
320
+ " <td>721.360168</td>\n",
321
+ " <td>44.653809</td>\n",
322
+ " <td>332.602478</td>\n",
323
+ " <td>151.368225</td>\n",
324
+ " <td>77.193810</td>\n",
325
+ " <td>730.679993</td>\n",
326
+ " <td>45.080002</td>\n",
327
+ " <td>337.709991</td>\n",
328
+ " <td>153.190002</td>\n",
329
+ " <td>77.949997</td>\n",
330
+ " <td>...</td>\n",
331
+ " <td>727.200012</td>\n",
332
+ " <td>44.959999</td>\n",
333
+ " <td>339.190002</td>\n",
334
+ " <td>153.429993</td>\n",
335
+ " <td>78.269997</td>\n",
336
+ " <td>868100</td>\n",
337
+ " <td>14679700</td>\n",
338
+ " <td>1250500</td>\n",
339
+ " <td>6259100</td>\n",
340
+ " <td>5237800</td>\n",
341
+ " </tr>\n",
342
+ " <tr>\n",
343
+ " <th>2023-11-28</th>\n",
344
+ " <td>727.362610</td>\n",
345
+ " <td>44.435886</td>\n",
346
+ " <td>332.543396</td>\n",
347
+ " <td>151.714050</td>\n",
348
+ " <td>76.134186</td>\n",
349
+ " <td>736.760010</td>\n",
350
+ " <td>44.860001</td>\n",
351
+ " <td>337.649994</td>\n",
352
+ " <td>153.539993</td>\n",
353
+ " <td>76.879997</td>\n",
354
+ " <td>...</td>\n",
355
+ " <td>725.859985</td>\n",
356
+ " <td>45.029999</td>\n",
357
+ " <td>337.380005</td>\n",
358
+ " <td>153.220001</td>\n",
359
+ " <td>77.360001</td>\n",
360
+ " <td>610200</td>\n",
361
+ " <td>14064700</td>\n",
362
+ " <td>1419000</td>\n",
363
+ " <td>6582700</td>\n",
364
+ " <td>9197900</td>\n",
365
+ " </tr>\n",
366
+ " <tr>\n",
367
+ " <th>2023-11-29</th>\n",
368
+ " <td>737.768188</td>\n",
369
+ " <td>45.317474</td>\n",
370
+ " <td>337.865692</td>\n",
371
+ " <td>152.484787</td>\n",
372
+ " <td>77.787994</td>\n",
373
+ " <td>747.299988</td>\n",
374
+ " <td>45.750000</td>\n",
375
+ " <td>340.260010</td>\n",
376
+ " <td>154.320007</td>\n",
377
+ " <td>78.550003</td>\n",
378
+ " <td>...</td>\n",
379
+ " <td>742.280029</td>\n",
380
+ " <td>45.230000</td>\n",
381
+ " <td>337.000000</td>\n",
382
+ " <td>154.169998</td>\n",
383
+ " <td>77.480003</td>\n",
384
+ " <td>750000</td>\n",
385
+ " <td>14027600</td>\n",
386
+ " <td>2038100</td>\n",
387
+ " <td>9126100</td>\n",
388
+ " <td>7420700</td>\n",
389
+ " </tr>\n",
390
+ " </tbody>\n",
391
+ "</table>\n",
392
+ "<p>985 rows × 30 columns</p>\n",
393
+ "</div>"
394
+ ],
395
+ "text/plain": [
396
+ "Price Adj Close \\\n",
397
+ "Ticker BLK C GS JPM MS \n",
398
+ "Date \n",
399
+ "2020-01-02 457.110931 68.963692 210.857330 123.533600 45.443264 \n",
400
+ "2020-01-03 452.252197 67.664719 208.391678 121.903435 44.709740 \n",
401
+ "2020-01-06 452.638397 67.452461 210.524353 121.806473 44.552567 \n",
402
+ "2020-01-07 455.530243 66.866661 211.910172 119.735695 44.465233 \n",
403
+ "2020-01-08 455.422516 67.376060 213.952820 120.669746 45.032845 \n",
404
+ "... ... ... ... ... ... \n",
405
+ "2023-11-22 717.865295 44.594376 333.518433 151.506546 77.728569 \n",
406
+ "2023-11-24 720.787537 44.802391 334.020691 151.714050 77.887024 \n",
407
+ "2023-11-27 721.360168 44.653809 332.602478 151.368225 77.193810 \n",
408
+ "2023-11-28 727.362610 44.435886 332.543396 151.714050 76.134186 \n",
409
+ "2023-11-29 737.768188 45.317474 337.865692 152.484787 77.787994 \n",
410
+ "\n",
411
+ "Price Close ... \\\n",
412
+ "Ticker BLK C GS JPM MS ... \n",
413
+ "Date ... \n",
414
+ "2020-01-02 508.980011 81.230003 234.320007 141.089996 52.040001 ... \n",
415
+ "2020-01-03 503.570007 79.699997 231.580002 138.339996 51.200001 ... \n",
416
+ "2020-01-06 504.000000 79.449997 233.949997 138.229996 51.020000 ... \n",
417
+ "2020-01-07 507.220001 78.760002 235.490005 135.880005 50.919998 ... \n",
418
+ "2020-01-08 507.100006 79.360001 237.759995 136.940002 51.570000 ... \n",
419
+ "... ... ... ... ... ... ... \n",
420
+ "2023-11-22 727.140015 45.020000 338.640015 153.330002 78.489998 ... \n",
421
+ "2023-11-24 730.099976 45.230000 339.149994 153.539993 78.650002 ... \n",
422
+ "2023-11-27 730.679993 45.080002 337.709991 153.190002 77.949997 ... \n",
423
+ "2023-11-28 736.760010 44.860001 337.649994 153.539993 76.879997 ... \n",
424
+ "2023-11-29 747.299988 45.750000 340.260010 154.320007 78.550003 ... \n",
425
+ "\n",
426
+ "Price Open Volume \\\n",
427
+ "Ticker BLK C GS JPM MS BLK \n",
428
+ "Date \n",
429
+ "2020-01-02 510.000000 80.129997 231.000000 139.789993 51.200001 560400 \n",
430
+ "2020-01-03 501.720001 79.800003 231.600006 137.500000 51.220001 337300 \n",
431
+ "2020-01-06 500.170013 78.720001 229.929993 136.559998 50.669998 411300 \n",
432
+ "2020-01-07 502.880005 79.290001 235.000000 137.279999 51.040001 453900 \n",
433
+ "2020-01-08 507.769989 78.769997 235.679993 135.699997 50.959999 726500 \n",
434
+ "... ... ... ... ... ... ... \n",
435
+ "2023-11-22 727.000000 45.310001 336.940002 153.410004 78.790001 498100 \n",
436
+ "2023-11-24 726.719971 44.889999 338.799988 153.589996 78.260002 279600 \n",
437
+ "2023-11-27 727.200012 44.959999 339.190002 153.429993 78.269997 868100 \n",
438
+ "2023-11-28 725.859985 45.029999 337.380005 153.220001 77.360001 610200 \n",
439
+ "2023-11-29 742.280029 45.230000 337.000000 154.169998 77.480003 750000 \n",
440
+ "\n",
441
+ "Price \n",
442
+ "Ticker C GS JPM MS \n",
443
+ "Date \n",
444
+ "2020-01-02 12728900 3736300 10803700 7808000 \n",
445
+ "2020-01-03 12437400 2274500 10386800 6706000 \n",
446
+ "2020-01-06 10059500 3329300 10259000 7476700 \n",
447
+ "2020-01-07 10469100 5255200 10531300 4538100 \n",
448
+ "2020-01-08 11292400 3564700 9695300 6185200 \n",
449
+ "... ... ... ... ... \n",
450
+ "2023-11-22 9441200 1235200 5174500 4265400 \n",
451
+ "2023-11-24 5941000 460100 3496900 2398000 \n",
452
+ "2023-11-27 14679700 1250500 6259100 5237800 \n",
453
+ "2023-11-28 14064700 1419000 6582700 9197900 \n",
454
+ "2023-11-29 14027600 2038100 9126100 7420700 \n",
455
+ "\n",
456
+ "[985 rows x 30 columns]"
457
+ ]
458
+ },
459
+ "execution_count": 4,
460
+ "metadata": {},
461
+ "output_type": "execute_result"
462
+ }
463
+ ],
464
+ "source": [
465
+ "stock_data"
466
+ ]
467
+ },
468
+ {
469
+ "cell_type": "code",
470
+ "execution_count": null,
471
+ "metadata": {},
472
+ "outputs": [],
473
+ "source": []
474
+ }
475
+ ],
476
+ "metadata": {
477
+ "kernelspec": {
478
+ "display_name": "py312",
479
+ "language": "python",
480
+ "name": "python3"
481
+ },
482
+ "language_info": {
483
+ "codemirror_mode": {
484
+ "name": "ipython",
485
+ "version": 3
486
+ },
487
+ "file_extension": ".py",
488
+ "mimetype": "text/x-python",
489
+ "name": "python",
490
+ "nbconvert_exporter": "python",
491
+ "pygments_lexer": "ipython3",
492
+ "version": "3.12.2"
493
+ }
494
+ },
495
+ "nbformat": 4,
496
+ "nbformat_minor": 2
497
+ }