{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data24.csv parse_description_test.ipynb\n", "google_job_rwtest.ipynb\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/leowalker/Documents/Projects/ml_project_job_analysis\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# import sys\n", "# sys.path.append('../utils')\n", "\n", "from utils import parse_description\n", "\n", "import pprint\n", "import os\n", "import pandas as pd\n", "from sqlalchemy import create_engine\n", "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "from dotenv import load_dotenv\n", "\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def read_data_from_db(table_name):\n", " engine = create_engine(f\"postgresql://{os.getenv('PSQL_MASTER_NAME')}:{os.getenv('PSQL_KEY')}@{os.getenv('RDS_ENDPOINT')}:5432/postgres\")\n", " \n", " try:\n", " with engine.connect() as conn:\n", " query = f'SELECT * FROM \"{table_name}\"'\n", " df = pd.read_sql(query, conn)\n", " return df\n", " except Exception as e:\n", " print(f\"Error occurred while reading data from the database: {str(e)}\")\n", " return None\n", "\n", "data24_df = read_data_from_db('usajobstest')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | title | \n", "company_name | \n", "location | \n", "description | \n", "extensions | \n", "job_id | \n", "retrieve_date | \n", "
---|---|---|---|---|---|---|---|
330 | \n", "Data Scientist 3 | \n", "United Launch Alliance | \n", "Denver, CO | \n", "Your Role: What you'll be doing\\n\\nULA is look... | \n", "{\"111,700–174,504 a year\",Full-time,\"Paid time... | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCAzIiwiY2... | \n", "2024-05-07 | \n", "
399 | \n", "VP ENG, Gen AI | \n", "Voicera | \n", "San Francisco, CA | \n", "Job description\\n\\nJob Title: VP Engineering, ... | \n", "{\"6 days ago\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJWUCBFTkcsIEdlbiBBSSIsImNvbX... | \n", "2024-05-07 | \n", "
409 | \n", "Fraud Strategy Data Scientist | \n", "Softworld, Inc. | \n", "Mountain View, CA | \n", "Job Title: Fraud Strategy Data Scientist\\n\\nJo... | \n", "{\"1 day ago\",Contractor} | \n", "eyJqb2JfdGl0bGUiOiJGcmF1ZCBTdHJhdGVneSBEYXRhIF... | \n", "2024-05-07 | \n", "
420 | \n", "Data Scientist at Remedly in Mountain View, CA | \n", "Remedly | \n", "Mountain View, CA | \n", "Rhombus is purposefully transforming the natio... | \n", "{\"22 hours ago\",\"20–28 an hour\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCBhdCBSZW... | \n", "2024-05-07 | \n", "
421 | \n", "Principal Data Scientist | \n", "Microsoft | \n", "Mountain View, CA | \n", "The Search + Distribution (S+D) team is the le... | \n", "{\"24 days ago\",\"134K–257K a year\",Full-time,\"H... | \n", "eyJqb2JfdGl0bGUiOiJQcmluY2lwYWwgRGF0YSBTY2llbn... | \n", "2024-05-07 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
907 | \n", "Data Engineer | \n", "Bonfy.AI | \n", "Mountain View, CA | \n", "At Bonfy.AI, we're working behind the scenes o... | \n", "{\"21 days ago\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG... | \n", "2024-05-07 | \n", "
908 | \n", "Data Engineer - Onsite - Mountain View, CA | \n", "MethodHub | \n", "Mountain View, CA | \n", "Job Details\\n\\nRequirements...\\n• Bachelor s d... | \n", "{\"27 days ago\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIC0gT25zaX... | \n", "2024-05-07 | \n", "
909 | \n", "Data Engineer | \n", "Ampcus Incorporated | \n", "Mountain View, CA | \n", "Location: Mountain View, CA (Hybrid)\\n\\nExperi... | \n", "{\"22 days ago\",Contractor,\"No degree mentioned\"} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG... | \n", "2024-05-07 | \n", "
910 | \n", "AWS Data Engineer (Mountainview, CA; ) | \n", "CEDENT | \n", "Mountain View, CA | \n", "5+ years of data engineer experience in develo... | \n", "{\"4 days ago\",Contractor,\"No degree mentioned\"} | \n", "eyJqb2JfdGl0bGUiOiJBV1MgRGF0YSBFbmdpbmVlciAoTW... | \n", "2024-05-07 | \n", "
911 | \n", "Data Infrastructure Engineer | \n", "Applied Intuition | \n", "Mountain View, CA | \n", "About the role\\n\\nWe are looking for infrastru... | \n", "{Full-time,\"No degree mentioned\"} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEluZnJhc3RydWN0dXJlIE... | \n", "2024-05-07 | \n", "
495 rows × 7 columns
\n", "\n", " | title | \n", "company_name | \n", "location | \n", "description | \n", "
---|---|---|---|---|
409 | \n", "Fraud Strategy Data Scientist | \n", "Softworld, Inc. | \n", "Mountain View, CA | \n", "Job Title: Fraud Strategy Data Scientist\\n\\nJo... | \n", "
420 | \n", "Data Scientist at Remedly in Mountain View, CA | \n", "Remedly | \n", "Mountain View, CA | \n", "Rhombus is purposefully transforming the natio... | \n", "
421 | \n", "Principal Data Scientist | \n", "Microsoft | \n", "Mountain View, CA | \n", "The Search + Distribution (S+D) team is the le... | \n", "
422 | \n", "Software Engineer - Agent AI | \n", "Applied Intuition | \n", "Mountain View, CA | \n", "About the role\\n\\nWe are looking for a softwar... | \n", "
423 | \n", "DATA SCIENTIST | \n", "Mythical Games | \n", "Mountain View, CA | \n", "We are looking for a highly-skilled Data Scien... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
907 | \n", "Data Engineer | \n", "Bonfy.AI | \n", "Mountain View, CA | \n", "At Bonfy.AI, we're working behind the scenes o... | \n", "
908 | \n", "Data Engineer - Onsite - Mountain View, CA | \n", "MethodHub | \n", "Mountain View, CA | \n", "Job Details\\n\\nRequirements...\\n• Bachelor s d... | \n", "
909 | \n", "Data Engineer | \n", "Ampcus Incorporated | \n", "Mountain View, CA | \n", "Location: Mountain View, CA (Hybrid)\\n\\nExperi... | \n", "
910 | \n", "AWS Data Engineer (Mountainview, CA; ) | \n", "CEDENT | \n", "Mountain View, CA | \n", "5+ years of data engineer experience in develo... | \n", "
911 | \n", "Data Infrastructure Engineer | \n", "Applied Intuition | \n", "Mountain View, CA | \n", "About the role\\n\\nWe are looking for infrastru... | \n", "
76 rows × 4 columns
\n", "\n", " | title | \n", "company_name | \n", "location | \n", "description | \n", "
---|---|---|---|---|
401 | \n", "Senior Business Intelligence Analyst, Operatio... | \n", "Rivian | \n", "Palo Alto, CA | \n", "About Rivian:\\n\\nRivian is on a mission to kee... | \n", "
217 | \n", "Generative AI Engineer | \n", "Knitit.ai | \n", "Palo Alto, CA | \n", "We are looking for a AI/ML Engineer to join a ... | \n", "
235 | \n", "Senior Data Analyst | \n", "DynPro Inc. | \n", "Mountain View, CA | \n", "Duration: 6 Months\\n\\nLocation: Bay Area, CA M... | \n", "