{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import psycopg2\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pandas\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Tom\\AppData\\Local\\Temp\\ipykernel_22016\\2374447718.py:12: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", " df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n" ] } ], "source": [ "# db_connection -----------------------------------------------------------\n", "con_details = {\n", " \"host\" : \"localhost\",\n", " \"database\" : \"next\",\n", " \"user\" : \"postgres\",\n", " \"password\" : \"postgres\",\n", " \"port\" : \"5432\"\n", "}\n", "con = psycopg2.connect(**con_details)\n", "\n", "# get data tables ---------------------------------------------------------\n", "df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n", " FROM open_discourse.speeches AS s\n", " INNER JOIN open_discourse.factions AS f ON\n", " s.faction_id = f.id;\"\"\", con)\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data Cleaning" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'Z', 'FDP', 'GB/BHE', 'DIE LINKE.', 'DRP', 'WAV', 'Fraktionslos', 'NR', 'BP', 'not found', 'SPD', 'Gast', 'FU', 'SSW', 'KPD', 'DA', 'FVP', 'AfD', 'Grüne', 'DP', 'CDU/CSU', 'PDS'}\n" ] } ], "source": [ "# Unique partys\n", "print(set(df['party'].to_list()))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idspeech_contentdateparty
126121Meine Damen und Herren, die Zentrumsfraktion, ...1949-09-22Z
192181Meine Damen und Herren! Der Herr Bundeskanzler...1949-09-22Z
208196Die Zentrumsfraktion des Deutschen Bundestags ...1949-09-27Z
210198Den Antrag habe ich hier.\\n({0})\\n- Ich begrün...1949-09-27Z
211199Ich werde Ihnen, Herr Präsident, also den Antr...1949-09-27Z
...............
1648016412Meine Damen und Herren! Das, was Herr Kollege ...1951-12-06Z
1655816496Herr Präsident! Meine sehr verehrten Damen und...1951-12-12Z
1659216526Herr Präsident! Meine Damen und Herren! Der He...1951-12-12Z
1662216580Herr Präsident! Meine Herren und Damen! Entgeg...1951-12-12Z
1669916634Herr Präsident! Meine Damen und Herren! Die Ze...1951-12-13Z
\n", "

420 rows × 4 columns

\n", "
" ], "text/plain": [ " id speech_content date \\\n", "126 121 Meine Damen und Herren, die Zentrumsfraktion, ... 1949-09-22 \n", "192 181 Meine Damen und Herren! Der Herr Bundeskanzler... 1949-09-22 \n", "208 196 Die Zentrumsfraktion des Deutschen Bundestags ... 1949-09-27 \n", "210 198 Den Antrag habe ich hier.\\n({0})\\n- Ich begrün... 1949-09-27 \n", "211 199 Ich werde Ihnen, Herr Präsident, also den Antr... 1949-09-27 \n", "... ... ... ... \n", "16480 16412 Meine Damen und Herren! Das, was Herr Kollege ... 1951-12-06 \n", "16558 16496 Herr Präsident! Meine sehr verehrten Damen und... 1951-12-12 \n", "16592 16526 Herr Präsident! Meine Damen und Herren! Der He... 1951-12-12 \n", "16622 16580 Herr Präsident! Meine Herren und Damen! Entgeg... 1951-12-12 \n", "16699 16634 Herr Präsident! Meine Damen und Herren! Die Ze... 1951-12-13 \n", "\n", " party \n", "126 Z \n", "192 Z \n", "208 Z \n", "210 Z \n", "211 Z \n", "... ... \n", "16480 Z \n", "16558 Z \n", "16592 Z \n", "16622 Z \n", "16699 Z \n", "\n", "[420 rows x 4 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['party'] == 'Z']\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idspeech_contentdateparty
00Meine Damen und Herren! Ich eröffne die 2. Sit...1949-09-12not found
11Der Bundesrat ist versammelt, Herr Präsident.\\n1949-09-12not found
22Ich danke für diese Erklärung. Ich stelle dami...1949-09-12not found
33Ja, ich habe den Wunsch.\\n1949-09-12not found
44Ich erteile dem Herrn Bundespräsidenten das Wo...1949-09-12not found
...............
9309551084268\\n\\nWir sind zwar Kollegen.2022-12-16not found
9309561084269\\n\\nLiebe, sehr geehrte Frau Präsidentin!2022-12-16CDU/CSU
9309571084270\\n\\nVielen Dank.2022-12-16not found
9309581084272\\n\\nDen Abschluss dieser Aktuellen Stunde bild...2022-12-16not found
9309591084273\\n\\nSehr geehrte Frau Präsidentin! Werte Kolle...2022-12-16SPD
\n", "

930960 rows × 4 columns

\n", "
" ], "text/plain": [ " id speech_content \\\n", "0 0 Meine Damen und Herren! Ich eröffne die 2. Sit... \n", "1 1 Der Bundesrat ist versammelt, Herr Präsident.\\n \n", "2 2 Ich danke für diese Erklärung. Ich stelle dami... \n", "3 3 Ja, ich habe den Wunsch.\\n \n", "4 4 Ich erteile dem Herrn Bundespräsidenten das Wo... \n", "... ... ... \n", "930955 1084268 \\n\\nWir sind zwar Kollegen. \n", "930956 1084269 \\n\\nLiebe, sehr geehrte Frau Präsidentin! \n", "930957 1084270 \\n\\nVielen Dank. \n", "930958 1084272 \\n\\nDen Abschluss dieser Aktuellen Stunde bild... \n", "930959 1084273 \\n\\nSehr geehrte Frau Präsidentin! Werte Kolle... \n", "\n", " date party \n", "0 1949-09-12 not found \n", "1 1949-09-12 not found \n", "2 1949-09-12 not found \n", "3 1949-09-12 not found \n", "4 1949-09-12 not found \n", "... ... ... \n", "930955 2022-12-16 not found \n", "930956 2022-12-16 CDU/CSU \n", "930957 2022-12-16 not found \n", "930958 2022-12-16 not found \n", "930959 2022-12-16 SPD \n", "\n", "[930960 rows x 4 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"speech_content\"].replace(\"\\({\\d+}\\)\", \"\", inplace=True, regex=True) #removing keys from interruptions\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df.to_pickle(\"speeches_1949_09_12\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }