{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import psycopg2\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pandas\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Tom\\AppData\\Local\\Temp\\ipykernel_22016\\2374447718.py:12: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", " df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n" ] } ], "source": [ "# db_connection -----------------------------------------------------------\n", "con_details = {\n", " \"host\" : \"localhost\",\n", " \"database\" : \"next\",\n", " \"user\" : \"postgres\",\n", " \"password\" : \"postgres\",\n", " \"port\" : \"5432\"\n", "}\n", "con = psycopg2.connect(**con_details)\n", "\n", "# get data tables ---------------------------------------------------------\n", "df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n", " FROM open_discourse.speeches AS s\n", " INNER JOIN open_discourse.factions AS f ON\n", " s.faction_id = f.id;\"\"\", con)\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data Cleaning" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'Z', 'FDP', 'GB/BHE', 'DIE LINKE.', 'DRP', 'WAV', 'Fraktionslos', 'NR', 'BP', 'not found', 'SPD', 'Gast', 'FU', 'SSW', 'KPD', 'DA', 'FVP', 'AfD', 'Grüne', 'DP', 'CDU/CSU', 'PDS'}\n" ] } ], "source": [ "# Unique partys\n", "print(set(df['party'].to_list()))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "speech_content | \n", "date | \n", "party | \n", "
---|---|---|---|---|
126 | \n", "121 | \n", "Meine Damen und Herren, die Zentrumsfraktion, ... | \n", "1949-09-22 | \n", "Z | \n", "
192 | \n", "181 | \n", "Meine Damen und Herren! Der Herr Bundeskanzler... | \n", "1949-09-22 | \n", "Z | \n", "
208 | \n", "196 | \n", "Die Zentrumsfraktion des Deutschen Bundestags ... | \n", "1949-09-27 | \n", "Z | \n", "
210 | \n", "198 | \n", "Den Antrag habe ich hier.\\n({0})\\n- Ich begrün... | \n", "1949-09-27 | \n", "Z | \n", "
211 | \n", "199 | \n", "Ich werde Ihnen, Herr Präsident, also den Antr... | \n", "1949-09-27 | \n", "Z | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
16480 | \n", "16412 | \n", "Meine Damen und Herren! Das, was Herr Kollege ... | \n", "1951-12-06 | \n", "Z | \n", "
16558 | \n", "16496 | \n", "Herr Präsident! Meine sehr verehrten Damen und... | \n", "1951-12-12 | \n", "Z | \n", "
16592 | \n", "16526 | \n", "Herr Präsident! Meine Damen und Herren! Der He... | \n", "1951-12-12 | \n", "Z | \n", "
16622 | \n", "16580 | \n", "Herr Präsident! Meine Herren und Damen! Entgeg... | \n", "1951-12-12 | \n", "Z | \n", "
16699 | \n", "16634 | \n", "Herr Präsident! Meine Damen und Herren! Die Ze... | \n", "1951-12-13 | \n", "Z | \n", "
420 rows × 4 columns
\n", "\n", " | id | \n", "speech_content | \n", "date | \n", "party | \n", "
---|---|---|---|---|
0 | \n", "0 | \n", "Meine Damen und Herren! Ich eröffne die 2. Sit... | \n", "1949-09-12 | \n", "not found | \n", "
1 | \n", "1 | \n", "Der Bundesrat ist versammelt, Herr Präsident.\\n | \n", "1949-09-12 | \n", "not found | \n", "
2 | \n", "2 | \n", "Ich danke für diese Erklärung. Ich stelle dami... | \n", "1949-09-12 | \n", "not found | \n", "
3 | \n", "3 | \n", "Ja, ich habe den Wunsch.\\n | \n", "1949-09-12 | \n", "not found | \n", "
4 | \n", "4 | \n", "Ich erteile dem Herrn Bundespräsidenten das Wo... | \n", "1949-09-12 | \n", "not found | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
930955 | \n", "1084268 | \n", "\\n\\nWir sind zwar Kollegen. | \n", "2022-12-16 | \n", "not found | \n", "
930956 | \n", "1084269 | \n", "\\n\\nLiebe, sehr geehrte Frau Präsidentin! | \n", "2022-12-16 | \n", "CDU/CSU | \n", "
930957 | \n", "1084270 | \n", "\\n\\nVielen Dank. | \n", "2022-12-16 | \n", "not found | \n", "
930958 | \n", "1084272 | \n", "\\n\\nDen Abschluss dieser Aktuellen Stunde bild... | \n", "2022-12-16 | \n", "not found | \n", "
930959 | \n", "1084273 | \n", "\\n\\nSehr geehrte Frau Präsidentin! Werte Kolle... | \n", "2022-12-16 | \n", "SPD | \n", "
930960 rows × 4 columns
\n", "