LeoWalker commited on
Commit
b78565b
1 Parent(s): 66700bd

successfully able to extract the description information back into a pandas dataframe. Need to look how to write to the database after every call instead of doing it in such large batches.

Browse files
notebooks/google_job_rwtest.ipynb CHANGED
@@ -83,17 +83,6 @@
83
  "# return None"
84
  ]
85
  },
86
- {
87
- "cell_type": "code",
88
- "execution_count": 2,
89
- "metadata": {},
90
- "outputs": [],
91
- "source": [
92
- "# job_list= [\"Machine Learning Engineer\", \"Data Scientist\", \"Generative AI Engineer\", \"Solutions Engineer\", \"LLM Engineer\"]\n",
93
- "# simple_city_state_list= [\"Menlo Park CA\", \"Palo Alto CA\", \"San Francisco CA\", \"Mountain View CA\"]\n",
94
- "# sample = main(job_list, simple_city_state_list)"
95
- ]
96
- },
97
  {
98
  "cell_type": "code",
99
  "execution_count": 1,
@@ -109,6 +98,9 @@
109
  "import os\n",
110
  "from sqlalchemy import create_engine\n",
111
  "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
 
 
 
112
  "\n",
113
  "def google_job_search(job_title, city_state, start=0):\n",
114
  " '''\n",
@@ -151,7 +143,7 @@
151
  " return None\n",
152
  "\n",
153
  "def sql_dump(df, table):\n",
154
- " engine = create_engine(f\"postgresql://{os.getenv('MasterName')}:{os.getenv('MasterPass')}@{os.getenv('RDS_EndPoint')}:5432/postgres\")\n",
155
  " with engine.connect() as conn:\n",
156
  " df.to_sql(table, conn, if_exists='append', chunksize=20, method='multi', index=False)\n",
157
  " print(f\"Dumped {df.shape} to SQL table {table}\")\n",
@@ -182,116 +174,15 @@
182
  },
183
  {
184
  "cell_type": "code",
185
- "execution_count": 2,
186
  "metadata": {},
187
- "outputs": [
188
- {
189
- "name": "stdout",
190
- "output_type": "stream",
191
- "text": [
192
- "City: Menlo Park CA Job: Data Engineer Start: 1\n",
193
- "(10, 6)\n",
194
- "City: San Francisco CA Job: Data Engineer Start: 0\n",
195
- "(9, 6)\n",
196
- "City: Mountain View CA Job: Data Analyst Start: 0\n",
197
- "(10, 6)\n",
198
- "City: Menlo Park CA Job: Data Analyst Start: 1\n",
199
- "(10, 6)\n",
200
- "City: Palo Alto CA Job: Data Analyst Start: 1\n",
201
- "(10, 6)\n",
202
- "City: San Francisco CA Job: Data Analyst Start: 0\n",
203
- "(10, 6)\n",
204
- "City: Menlo Park CA Job: Data Engineer Start: 0\n",
205
- "(10, 6)\n",
206
- "City: Palo Alto CA Job: Data Analyst Start: 0\n",
207
- "(10, 6)\n",
208
- "Dumped (10, 7) to SQL table leoTestTable\n",
209
- "Rows affected: None\n",
210
- "Dumped (10, 7) to SQL table leoTestTable\n",
211
- "Rows affected: None\n",
212
- "Dumped (10, 7) to SQL table leoTestTableDumped (9, 7) to SQL table leoTestTable\n",
213
- "Rows affected: None\n",
214
- "\n",
215
- "Rows affected: None\n",
216
- "Dumped (10, 7) to SQL table leoTestTable\n",
217
- "Rows affected: None\n",
218
- "Dumped (10, 7) to SQL table leoTestTable\n",
219
- "Rows affected: None\n",
220
- "Dumped (10, 7) to SQL table leoTestTable\n",
221
- "Rows affected: None\n",
222
- "Dumped (10, 7) to SQL table leoTestTable\n",
223
- "Rows affected: None\n",
224
- "City: Mountain View CA Job: Data Engineer Start: 1\n",
225
- "(10, 6)\n",
226
- "Dumped (10, 7) to SQL table leoTestTable\n",
227
- "Rows affected: None\n",
228
- "City: Mountain View CA Job: Big Data Engineer Start: 0\n",
229
- "(10, 6)\n",
230
- "City: San Francisco CA Job: Big Data Engineer Start: 1\n",
231
- "(10, 6)\n",
232
- "City: Menlo Park CA Job: Big Data Engineer Start: 1\n",
233
- "(10, 6)\n",
234
- "City: Menlo Park CA Job: Big Data Engineer Start: 0\n",
235
- "(10, 6)\n",
236
- "Dumped (10, 7) to SQL table leoTestTableDumped (10, 7) to SQL table leoTestTable\n",
237
- "Rows affected: None\n",
238
- "\n",
239
- "Rows affected: None\n",
240
- "Dumped (10, 7) to SQL table leoTestTable\n",
241
- "Rows affected: None\n",
242
- "Dumped (10, 7) to SQL table leoTestTable\n",
243
- "Rows affected: None\n",
244
- "City: Palo Alto CA Job: Data Engineer Start: 0\n",
245
- "(10, 6)\n",
246
- "Error occurred for search: Data Engineer in Palo Alto CA\n",
247
- "Error message: 'google_jobs_results'\n",
248
- "Error occurred for search: Data Engineer in Mountain View CA\n",
249
- "Error message: 'google_jobs_results'\n",
250
- "Error occurred for search: Data Analyst in Menlo Park CA\n",
251
- "Error message: 'google_jobs_results'\n",
252
- "Error occurred for search: Data Analyst in Mountain View CA\n",
253
- "Error message: 'google_jobs_results'\n",
254
- "Dumped (10, 7) to SQL table leoTestTable\n",
255
- "Rows affected: None\n",
256
- "Error occurred for search: Data Engineer in San Francisco CA\n",
257
- "Error message: 'google_jobs_results'\n",
258
- "City: Palo Alto CA Job: Big Data Engineer Start: 1\n",
259
- "(10, 6)\n",
260
- "Dumped (10, 7) to SQL table leoTestTable\n",
261
- "Rows affected: None\n",
262
- "Error occurred for search: Big Data Engineer in Mountain View CA\n",
263
- "Error message: 'google_jobs_results'\n",
264
- "Error occurred for search: Big Data Engineer in San Francisco CA\n",
265
- "Error message: 'google_jobs_results'\n",
266
- "City: San Francisco CA Job: Data Analyst Start: 1\n",
267
- "(10, 6)\n",
268
- "Dumped (10, 7) to SQL table leoTestTable\n",
269
- "Rows affected: None\n",
270
- "Error occurred for search: Big Data Engineer in Palo Alto CA\n",
271
- "Error message: 'google_jobs_results'\n"
272
- ]
273
- }
274
- ],
275
  "source": [
276
- "job_list = [\"Data Analyst\", \"Data Engineer\", \"Big Data Engineer\"]\n",
277
- "simple_city_state_list = [\"Menlo Park CA\", \"Palo Alto CA\", \"San Francisco CA\", \"Mountain View CA\"]\n",
278
- "main(job_list, simple_city_state_list)"
279
  ]
280
  },
281
- {
282
- "cell_type": "code",
283
- "execution_count": null,
284
- "metadata": {},
285
- "outputs": [],
286
- "source": []
287
- },
288
- {
289
- "cell_type": "code",
290
- "execution_count": null,
291
- "metadata": {},
292
- "outputs": [],
293
- "source": []
294
- },
295
  {
296
  "cell_type": "markdown",
297
  "metadata": {},
@@ -301,7 +192,7 @@
301
  },
302
  {
303
  "cell_type": "code",
304
- "execution_count": 31,
305
  "metadata": {},
306
  "outputs": [],
307
  "source": [
@@ -309,7 +200,7 @@
309
  "from sqlalchemy import create_engine\n",
310
  "\n",
311
  "def read_data_from_db(table_name):\n",
312
- " engine = create_engine(f\"postgresql://{os.getenv('MasterName')}:{os.getenv('MasterPass')}@{os.getenv('RDS_EndPoint')}:5432/postgres\")\n",
313
  " \n",
314
  " try:\n",
315
  " with engine.connect() as conn:\n",
@@ -325,7 +216,7 @@
325
  },
326
  {
327
  "cell_type": "code",
328
- "execution_count": 32,
329
  "metadata": {},
330
  "outputs": [
331
  {
@@ -334,7 +225,7 @@
334
  "(417, 7)"
335
  ]
336
  },
337
- "execution_count": 32,
338
  "metadata": {},
339
  "output_type": "execute_result"
340
  }
@@ -345,7 +236,7 @@
345
  },
346
  {
347
  "cell_type": "code",
348
- "execution_count": 33,
349
  "metadata": {},
350
  "outputs": [
351
  {
@@ -563,7 +454,7 @@
563
  "[417 rows x 7 columns]"
564
  ]
565
  },
566
- "execution_count": 33,
567
  "metadata": {},
568
  "output_type": "execute_result"
569
  }
@@ -574,17 +465,17 @@
574
  },
575
  {
576
  "cell_type": "code",
577
- "execution_count": 34,
578
  "metadata": {},
579
  "outputs": [],
580
  "source": [
581
  "# get the list of unique title, company_name pairs\n",
582
- "title_company = data24_df[['title', 'company_name', 'description']].drop_duplicates()"
583
  ]
584
  },
585
  {
586
  "cell_type": "code",
587
- "execution_count": 35,
588
  "metadata": {},
589
  "outputs": [
590
  {
@@ -612,9 +503,6 @@
612
  " <th>company_name</th>\n",
613
  " <th>location</th>\n",
614
  " <th>description</th>\n",
615
- " <th>extensions</th>\n",
616
- " <th>job_id</th>\n",
617
- " <th>retrieve_date</th>\n",
618
  " </tr>\n",
619
  " </thead>\n",
620
  " <tbody>\n",
@@ -624,9 +512,6 @@
624
  " <td>Nuvolum</td>\n",
625
  " <td>San Francisco, CA</td>\n",
626
  " <td>Nuvolum combines innovative, data-driven strat...</td>\n",
627
- " <td>{\"3 days ago\",Full-time,\"No degree mentioned\"}</td>\n",
628
- " <td>eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2...</td>\n",
629
- " <td>2024-05-04</td>\n",
630
  " </tr>\n",
631
  " <tr>\n",
632
  " <th>1</th>\n",
@@ -634,9 +519,6 @@
634
  " <td>Sunrun</td>\n",
635
  " <td>San Francisco, CA (+1 other)</td>\n",
636
  " <td>Everything we do at Sunrun is driven by a dete...</td>\n",
637
- " <td>{\"12 days ago\",Full-time,\"Health insurance\",\"D...</td>\n",
638
- " <td>eyJqb2JfdGl0bGUiOiJTci4gU3RyYXRlZ3kgYW5kIEJ1c2...</td>\n",
639
- " <td>2024-05-04</td>\n",
640
  " </tr>\n",
641
  " <tr>\n",
642
  " <th>2</th>\n",
@@ -644,9 +526,6 @@
644
  " <td>Side</td>\n",
645
  " <td>Anywhere</td>\n",
646
  " <td>Side, Inc. seeks Business Intelligence Analyst...</td>\n",
647
- " <td>{\"11 days ago\",\"151,736–157,000 a year\",\"Work ...</td>\n",
648
- " <td>eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2...</td>\n",
649
- " <td>2024-05-04</td>\n",
650
  " </tr>\n",
651
  " <tr>\n",
652
  " <th>3</th>\n",
@@ -654,9 +533,6 @@
654
  " <td>TekNavigators Staffing</td>\n",
655
  " <td>San Francisco, CA</td>\n",
656
  " <td>Role: Senior BI Developer\\n\\nLocation: San Fra...</td>\n",
657
- " <td>{\"20 hours ago\",Contractor,\"No degree mentioned\"}</td>\n",
658
- " <td>eyJqb2JfdGl0bGUiOiJTZW5pb3IgQnVzaW5lc3MgSW50ZW...</td>\n",
659
- " <td>2024-05-04</td>\n",
660
  " </tr>\n",
661
  " <tr>\n",
662
  " <th>4</th>\n",
@@ -664,9 +540,6 @@
664
  " <td>FIS Fidelity National Information Services</td>\n",
665
  " <td>San Francisco, CA</td>\n",
666
  " <td>Position Type : Full time Type Of Hire : Exper...</td>\n",
667
- " <td>{\"19 days ago\",Full-time}</td>\n",
668
- " <td>eyJqb2JfdGl0bGUiOiJTZW5pb3IgQnVzaW5lc3MgSW50ZW...</td>\n",
669
- " <td>2024-05-04</td>\n",
670
  " </tr>\n",
671
  " <tr>\n",
672
  " <th>...</th>\n",
@@ -674,9 +547,6 @@
674
  " <td>...</td>\n",
675
  " <td>...</td>\n",
676
  " <td>...</td>\n",
677
- " <td>...</td>\n",
678
- " <td>...</td>\n",
679
- " <td>...</td>\n",
680
  " </tr>\n",
681
  " <tr>\n",
682
  " <th>412</th>\n",
@@ -684,9 +554,6 @@
684
  " <td>Medtronic</td>\n",
685
  " <td>Anywhere</td>\n",
686
  " <td>Careers that Change Lives\\n\\nWe are looking fo...</td>\n",
687
- " <td>{\"10 days ago\",\"Work from home\",Full-time,\"No ...</td>\n",
688
- " <td>eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2...</td>\n",
689
- " <td>2024-05-04</td>\n",
690
  " </tr>\n",
691
  " <tr>\n",
692
  " <th>413</th>\n",
@@ -694,9 +561,6 @@
694
  " <td>Keck Medicine of USC</td>\n",
695
  " <td>Alhambra, CA</td>\n",
696
  " <td>Actively design and develop ETL solutions that...</td>\n",
697
- " <td>{\"13 days ago\",Full-time}</td>\n",
698
- " <td>eyJqb2JfdGl0bGUiOiJJVCBBbmFseXN0LCBCdXNpbmVzcy...</td>\n",
699
- " <td>2024-05-04</td>\n",
700
  " </tr>\n",
701
  " <tr>\n",
702
  " <th>414</th>\n",
@@ -704,9 +568,6 @@
704
  " <td>Deutsch LA</td>\n",
705
  " <td>Los Angeles, CA</td>\n",
706
  " <td>DIRECTOR, BUSINESS INTELLIGENCE\\n\\nWe are seek...</td>\n",
707
- " <td>{\"3 days ago\",Full-time,\"No degree mentioned\"}</td>\n",
708
- " <td>eyJqb2JfdGl0bGUiOiJEaXJlY3RvciwgQnVzaW5lc3MgSW...</td>\n",
709
- " <td>2024-05-04</td>\n",
710
  " </tr>\n",
711
  " <tr>\n",
712
  " <th>415</th>\n",
@@ -714,9 +575,6 @@
714
  " <td>U.S. Bank</td>\n",
715
  " <td>Los Angeles, CA</td>\n",
716
  " <td>At U.S. Bank, we’re on a journey to do our bes...</td>\n",
717
- " <td>{\"3 days ago\",Full-time,\"Health insurance\",\"De...</td>\n",
718
- " <td>eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2...</td>\n",
719
- " <td>2024-05-04</td>\n",
720
  " </tr>\n",
721
  " <tr>\n",
722
  " <th>416</th>\n",
@@ -724,13 +582,10 @@
724
  " <td>BIGO</td>\n",
725
  " <td>Los Angeles, CA</td>\n",
726
  " <td>Location: 10250 Constellation Blvd., Century C...</td>\n",
727
- " <td>{\"1 day ago\",Full-time,\"Health insurance\",\"Den...</td>\n",
728
- " <td>eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2...</td>\n",
729
- " <td>2024-05-04</td>\n",
730
  " </tr>\n",
731
  " </tbody>\n",
732
  "</table>\n",
733
- "<p>417 rows × 7 columns</p>\n",
734
  "</div>"
735
  ],
736
  "text/plain": [
@@ -760,55 +615,38 @@
760
  "415 U.S. Bank Los Angeles, CA \n",
761
  "416 BIGO Los Angeles, CA \n",
762
  "\n",
763
- " description \\\n",
764
- "0 Nuvolum combines innovative, data-driven strat... \n",
765
- "1 Everything we do at Sunrun is driven by a dete... \n",
766
- "2 Side, Inc. seeks Business Intelligence Analyst... \n",
767
- "3 Role: Senior BI Developer\\n\\nLocation: San Fra... \n",
768
- "4 Position Type : Full time Type Of Hire : Exper... \n",
769
- ".. ... \n",
770
- "412 Careers that Change Lives\\n\\nWe are looking fo... \n",
771
- "413 Actively design and develop ETL solutions that... \n",
772
- "414 DIRECTOR, BUSINESS INTELLIGENCE\\n\\nWe are seek... \n",
773
- "415 At U.S. Bank, we’re on a journey to do our bes... \n",
774
- "416 Location: 10250 Constellation Blvd., Century C... \n",
775
  "\n",
776
- " extensions \\\n",
777
- "0 {\"3 days ago\",Full-time,\"No degree mentioned\"} \n",
778
- "1 {\"12 days ago\",Full-time,\"Health insurance\",\"D... \n",
779
- "2 {\"11 days ago\",\"151,736–157,000 a year\",\"Work ... \n",
780
- "3 {\"20 hours ago\",Contractor,\"No degree mentioned\"} \n",
781
- "4 {\"19 days ago\",Full-time} \n",
782
- ".. ... \n",
783
- "412 {\"10 days ago\",\"Work from home\",Full-time,\"No ... \n",
784
- "413 {\"13 days ago\",Full-time} \n",
785
- "414 {\"3 days ago\",Full-time,\"No degree mentioned\"} \n",
786
- "415 {\"3 days ago\",Full-time,\"Health insurance\",\"De... \n",
787
- "416 {\"1 day ago\",Full-time,\"Health insurance\",\"Den... \n",
788
- "\n",
789
- " job_id retrieve_date \n",
790
- "0 eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2... 2024-05-04 \n",
791
- "1 eyJqb2JfdGl0bGUiOiJTci4gU3RyYXRlZ3kgYW5kIEJ1c2... 2024-05-04 \n",
792
- "2 eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2... 2024-05-04 \n",
793
- "3 eyJqb2JfdGl0bGUiOiJTZW5pb3IgQnVzaW5lc3MgSW50ZW... 2024-05-04 \n",
794
- "4 eyJqb2JfdGl0bGUiOiJTZW5pb3IgQnVzaW5lc3MgSW50ZW... 2024-05-04 \n",
795
- ".. ... ... \n",
796
- "412 eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2... 2024-05-04 \n",
797
- "413 eyJqb2JfdGl0bGUiOiJJVCBBbmFseXN0LCBCdXNpbmVzcy... 2024-05-04 \n",
798
- "414 eyJqb2JfdGl0bGUiOiJEaXJlY3RvciwgQnVzaW5lc3MgSW... 2024-05-04 \n",
799
- "415 eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2... 2024-05-04 \n",
800
- "416 eyJqb2JfdGl0bGUiOiJCdXNpbmVzcyBJbnRlbGxpZ2VuY2... 2024-05-04 \n",
801
- "\n",
802
- "[417 rows x 7 columns]"
803
  ]
804
  },
805
- "execution_count": 35,
806
  "metadata": {},
807
  "output_type": "execute_result"
808
  }
809
  ],
810
  "source": [
811
- "data24_df"
 
 
 
 
 
 
 
 
 
812
  ]
813
  },
814
  {
 
83
  "# return None"
84
  ]
85
  },
 
 
 
 
 
 
 
 
 
 
 
86
  {
87
  "cell_type": "code",
88
  "execution_count": 1,
 
98
  "import os\n",
99
  "from sqlalchemy import create_engine\n",
100
  "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
101
+ "from dotenv import load_dotenv\n",
102
+ "\n",
103
+ "load_dotenv()\n",
104
  "\n",
105
  "def google_job_search(job_title, city_state, start=0):\n",
106
  " '''\n",
 
143
  " return None\n",
144
  "\n",
145
  "def sql_dump(df, table):\n",
146
+ " engine = create_engine(f\"postgresql://{os.getenv('PSQL_MASTER_NAME')}:{os.getenv('PSQL_KEY')}@{os.getenv('RDS_ENDPOINT')}:5432/postgres\")\n",
147
  " with engine.connect() as conn:\n",
148
  " df.to_sql(table, conn, if_exists='append', chunksize=20, method='multi', index=False)\n",
149
  " print(f\"Dumped {df.shape} to SQL table {table}\")\n",
 
174
  },
175
  {
176
  "cell_type": "code",
177
+ "execution_count": 17,
178
  "metadata": {},
179
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  "source": [
181
+ "# job_list = [\"Data Analyst\", \"Data Engineer\", \"Big Data Engineer\"]\n",
182
+ "# simple_city_state_list = [\"Menlo Park CA\", \"Palo Alto CA\", \"San Francisco CA\", \"Mountain View CA\"]\n",
183
+ "# main(job_list, simple_city_state_list)"
184
  ]
185
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  {
187
  "cell_type": "markdown",
188
  "metadata": {},
 
192
  },
193
  {
194
  "cell_type": "code",
195
+ "execution_count": 2,
196
  "metadata": {},
197
  "outputs": [],
198
  "source": [
 
200
  "from sqlalchemy import create_engine\n",
201
  "\n",
202
  "def read_data_from_db(table_name):\n",
203
+ " engine = create_engine(f\"postgresql://{os.getenv('PSQL_MASTER_NAME')}:{os.getenv('PSQL_KEY')}@{os.getenv('RDS_ENDPOINT')}:5432/postgres\")\n",
204
  " \n",
205
  " try:\n",
206
  " with engine.connect() as conn:\n",
 
216
  },
217
  {
218
  "cell_type": "code",
219
+ "execution_count": 3,
220
  "metadata": {},
221
  "outputs": [
222
  {
 
225
  "(417, 7)"
226
  ]
227
  },
228
+ "execution_count": 3,
229
  "metadata": {},
230
  "output_type": "execute_result"
231
  }
 
236
  },
237
  {
238
  "cell_type": "code",
239
+ "execution_count": 4,
240
  "metadata": {},
241
  "outputs": [
242
  {
 
454
  "[417 rows x 7 columns]"
455
  ]
456
  },
457
+ "execution_count": 4,
458
  "metadata": {},
459
  "output_type": "execute_result"
460
  }
 
465
  },
466
  {
467
  "cell_type": "code",
468
+ "execution_count": 8,
469
  "metadata": {},
470
  "outputs": [],
471
  "source": [
472
  "# get the list of unique title, company_name pairs\n",
473
+ "title_company = data24_df[['title', 'company_name', 'location', 'description']].drop_duplicates()"
474
  ]
475
  },
476
  {
477
  "cell_type": "code",
478
+ "execution_count": 9,
479
  "metadata": {},
480
  "outputs": [
481
  {
 
503
  " <th>company_name</th>\n",
504
  " <th>location</th>\n",
505
  " <th>description</th>\n",
 
 
 
506
  " </tr>\n",
507
  " </thead>\n",
508
  " <tbody>\n",
 
512
  " <td>Nuvolum</td>\n",
513
  " <td>San Francisco, CA</td>\n",
514
  " <td>Nuvolum combines innovative, data-driven strat...</td>\n",
 
 
 
515
  " </tr>\n",
516
  " <tr>\n",
517
  " <th>1</th>\n",
 
519
  " <td>Sunrun</td>\n",
520
  " <td>San Francisco, CA (+1 other)</td>\n",
521
  " <td>Everything we do at Sunrun is driven by a dete...</td>\n",
 
 
 
522
  " </tr>\n",
523
  " <tr>\n",
524
  " <th>2</th>\n",
 
526
  " <td>Side</td>\n",
527
  " <td>Anywhere</td>\n",
528
  " <td>Side, Inc. seeks Business Intelligence Analyst...</td>\n",
 
 
 
529
  " </tr>\n",
530
  " <tr>\n",
531
  " <th>3</th>\n",
 
533
  " <td>TekNavigators Staffing</td>\n",
534
  " <td>San Francisco, CA</td>\n",
535
  " <td>Role: Senior BI Developer\\n\\nLocation: San Fra...</td>\n",
 
 
 
536
  " </tr>\n",
537
  " <tr>\n",
538
  " <th>4</th>\n",
 
540
  " <td>FIS Fidelity National Information Services</td>\n",
541
  " <td>San Francisco, CA</td>\n",
542
  " <td>Position Type : Full time Type Of Hire : Exper...</td>\n",
 
 
 
543
  " </tr>\n",
544
  " <tr>\n",
545
  " <th>...</th>\n",
 
547
  " <td>...</td>\n",
548
  " <td>...</td>\n",
549
  " <td>...</td>\n",
 
 
 
550
  " </tr>\n",
551
  " <tr>\n",
552
  " <th>412</th>\n",
 
554
  " <td>Medtronic</td>\n",
555
  " <td>Anywhere</td>\n",
556
  " <td>Careers that Change Lives\\n\\nWe are looking fo...</td>\n",
 
 
 
557
  " </tr>\n",
558
  " <tr>\n",
559
  " <th>413</th>\n",
 
561
  " <td>Keck Medicine of USC</td>\n",
562
  " <td>Alhambra, CA</td>\n",
563
  " <td>Actively design and develop ETL solutions that...</td>\n",
 
 
 
564
  " </tr>\n",
565
  " <tr>\n",
566
  " <th>414</th>\n",
 
568
  " <td>Deutsch LA</td>\n",
569
  " <td>Los Angeles, CA</td>\n",
570
  " <td>DIRECTOR, BUSINESS INTELLIGENCE\\n\\nWe are seek...</td>\n",
 
 
 
571
  " </tr>\n",
572
  " <tr>\n",
573
  " <th>415</th>\n",
 
575
  " <td>U.S. Bank</td>\n",
576
  " <td>Los Angeles, CA</td>\n",
577
  " <td>At U.S. Bank, we’re on a journey to do our bes...</td>\n",
 
 
 
578
  " </tr>\n",
579
  " <tr>\n",
580
  " <th>416</th>\n",
 
582
  " <td>BIGO</td>\n",
583
  " <td>Los Angeles, CA</td>\n",
584
  " <td>Location: 10250 Constellation Blvd., Century C...</td>\n",
 
 
 
585
  " </tr>\n",
586
  " </tbody>\n",
587
  "</table>\n",
588
+ "<p>405 rows × 4 columns</p>\n",
589
  "</div>"
590
  ],
591
  "text/plain": [
 
615
  "415 U.S. Bank Los Angeles, CA \n",
616
  "416 BIGO Los Angeles, CA \n",
617
  "\n",
618
+ " description \n",
619
+ "0 Nuvolum combines innovative, data-driven strat... \n",
620
+ "1 Everything we do at Sunrun is driven by a dete... \n",
621
+ "2 Side, Inc. seeks Business Intelligence Analyst... \n",
622
+ "3 Role: Senior BI Developer\\n\\nLocation: San Fra... \n",
623
+ "4 Position Type : Full time Type Of Hire : Exper... \n",
624
+ ".. ... \n",
625
+ "412 Careers that Change Lives\\n\\nWe are looking fo... \n",
626
+ "413 Actively design and develop ETL solutions that... \n",
627
+ "414 DIRECTOR, BUSINESS INTELLIGENCE\\n\\nWe are seek... \n",
628
+ "415 At U.S. Bank, we’re on a journey to do our bes... \n",
629
+ "416 Location: 10250 Constellation Blvd., Century C... \n",
630
  "\n",
631
+ "[405 rows x 4 columns]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632
  ]
633
  },
634
+ "execution_count": 9,
635
  "metadata": {},
636
  "output_type": "execute_result"
637
  }
638
  ],
639
  "source": [
640
+ "title_company"
641
+ ]
642
+ },
643
+ {
644
+ "cell_type": "code",
645
+ "execution_count": 10,
646
+ "metadata": {},
647
+ "outputs": [],
648
+ "source": [
649
+ "data24_df.to_csv('data24.csv', index=False)"
650
  ]
651
  },
652
  {
notebooks/parse_description_test.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
sf_recent_jobs.csv ADDED
The diff for this file is too large to render. See raw diff