Spaces:

TomData
/

PoliticsToYou

Runtime error

App Files Files Community

TomData commited on Jun 3, 2024

Commit

a3f5633

1 Parent(s): b5a209d

update about tab

Browse files

Files changed (3) hide show

Home.py +5 -3
src/Speeches/querry.ipynb +174 -2
src/vectordatabase.py +1 -1

Home.py CHANGED Viewed

@@ -3,7 +3,7 @@ from src.chatbot import chatbot, keyword_search
 from gradio_calendar import Calendar
 from datetime import datetime
 legislature_periods = [
     "All",
     "20. Legislaturperiode",
@@ -28,6 +28,8 @@ legislature_periods = [
     "1. Legislaturperiode"
 ]
 with gr.Blocks() as App:
@@ -57,7 +59,7 @@ with gr.Blocks() as App:
                 # Row orientation
                 with gr.Row() as additional_input:
                     n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
-                    party_dopdown = gr.Dropdown(value='All', choices=['All','CDU/CSU','SPD','FDP','Grüne','not found','DIE LINKE.','PDS','KPD'], label='Party') # change choices to all possible options
                     # ToDo: Add date or legislature filter as input
                     #start_date = Calendar(value="1949-01-01", type="datetime", label="Select start date", info="Click the calendar icon to bring up the calendar.", interactive=True)
                     #end_date = Calendar(value=datetime.today().strftime('%Y-%m-%d'), type="datetime", label="Select end date", info="Click the calendar icon to bring up the calendar.", interactive=True)
@@ -109,7 +111,7 @@ with gr.Blocks() as App:
             )
     with gr.Tab("About"):
-        gr.Markdown(text="""**Motivation:**
                     The idea of this project is a combination of my curiosity in LLM application and my affection for speech data, that I developed during my bachelor thesis on measuring populism in text data.
                     I would like to allow people to discover interesting discussions, opinions and positions that were communicated in the german parliament thoughout the years.
                     **Development status:**

 from gradio_calendar import Calendar
 from datetime import datetime
+# Define important variables
 legislature_periods = [
     "All",
     "20. Legislaturperiode",
     "1. Legislaturperiode"
 ]
+partys = ['All','CDU/CSU','SPD','AfD','Grüne','FDP','DIE LINKE.','GB/BHE','DRP', 'WAV', 'NR', 'BP', 'FU', 'SSW', 'KPD', 'DA', 'FVP','DP','Z', 'PDS','Fraktionslos', 'Gast','not found', 'Gast']
 with gr.Blocks() as App:
                 # Row orientation
                 with gr.Row() as additional_input:
                     n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
+                    party_dopdown = gr.Dropdown(value='All', choices=partys, label='Party')
                     # ToDo: Add date or legislature filter as input
                     #start_date = Calendar(value="1949-01-01", type="datetime", label="Select start date", info="Click the calendar icon to bring up the calendar.", interactive=True)
                     #end_date = Calendar(value=datetime.today().strftime('%Y-%m-%d'), type="datetime", label="Select end date", info="Click the calendar icon to bring up the calendar.", interactive=True)
             )
     with gr.Tab("About"):
+        gr.Markdown("""**Motivation:**
                     The idea of this project is a combination of my curiosity in LLM application and my affection for speech data, that I developed during my bachelor thesis on measuring populism in text data.
                     I would like to allow people to discover interesting discussions, opinions and positions that were communicated in the german parliament thoughout the years.
                     **Development status:**

src/Speeches/querry.ipynb CHANGED Viewed

@@ -19,14 +19,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "C:\\Users\\Tom\\AppData\\Local\\Temp\\ipykernel_32\\2374447718.py:12: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
       "  df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n"
      ]
     }
@@ -58,6 +58,178 @@
     "### Data Cleaning"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,

   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "C:\\Users\\Tom\\AppData\\Local\\Temp\\ipykernel_22016\\2374447718.py:12: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
       "  df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n"
      ]
     }
     "### Data Cleaning"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'Z', 'FDP', 'GB/BHE', 'DIE LINKE.', 'DRP', 'WAV', 'Fraktionslos', 'NR', 'BP', 'not found', 'SPD', 'Gast', 'FU', 'SSW', 'KPD', 'DA', 'FVP', 'AfD', 'Grüne', 'DP', 'CDU/CSU', 'PDS'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Unique partys\n",
+    "print(set(df['party'].to_list()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>speech_content</th>\n",
+       "      <th>date</th>\n",
+       "      <th>party</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>126</th>\n",
+       "      <td>121</td>\n",
+       "      <td>Meine Damen und Herren, die Zentrumsfraktion, ...</td>\n",
+       "      <td>1949-09-22</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>192</th>\n",
+       "      <td>181</td>\n",
+       "      <td>Meine Damen und Herren! Der Herr Bundeskanzler...</td>\n",
+       "      <td>1949-09-22</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>208</th>\n",
+       "      <td>196</td>\n",
+       "      <td>Die Zentrumsfraktion des Deutschen Bundestags ...</td>\n",
+       "      <td>1949-09-27</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>210</th>\n",
+       "      <td>198</td>\n",
+       "      <td>Den Antrag habe ich hier.\\n({0})\\n- Ich begrün...</td>\n",
+       "      <td>1949-09-27</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>211</th>\n",
+       "      <td>199</td>\n",
+       "      <td>Ich werde Ihnen, Herr Präsident, also den Antr...</td>\n",
+       "      <td>1949-09-27</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16480</th>\n",
+       "      <td>16412</td>\n",
+       "      <td>Meine Damen und Herren! Das, was Herr Kollege ...</td>\n",
+       "      <td>1951-12-06</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16558</th>\n",
+       "      <td>16496</td>\n",
+       "      <td>Herr Präsident! Meine sehr verehrten Damen und...</td>\n",
+       "      <td>1951-12-12</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16592</th>\n",
+       "      <td>16526</td>\n",
+       "      <td>Herr Präsident! Meine Damen und Herren! Der He...</td>\n",
+       "      <td>1951-12-12</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16622</th>\n",
+       "      <td>16580</td>\n",
+       "      <td>Herr Präsident! Meine Herren und Damen! Entgeg...</td>\n",
+       "      <td>1951-12-12</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16699</th>\n",
+       "      <td>16634</td>\n",
+       "      <td>Herr Präsident! Meine Damen und Herren! Die Ze...</td>\n",
+       "      <td>1951-12-13</td>\n",
+       "      <td>Z</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>420 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          id                                     speech_content        date  \\\n",
+       "126      121  Meine Damen und Herren, die Zentrumsfraktion, ...  1949-09-22   \n",
+       "192      181  Meine Damen und Herren! Der Herr Bundeskanzler...  1949-09-22   \n",
+       "208      196  Die Zentrumsfraktion des Deutschen Bundestags ...  1949-09-27   \n",
+       "210      198  Den Antrag habe ich hier.\\n({0})\\n- Ich begrün...  1949-09-27   \n",
+       "211      199  Ich werde Ihnen, Herr Präsident, also den Antr...  1949-09-27   \n",
+       "...      ...                                                ...         ...   \n",
+       "16480  16412  Meine Damen und Herren! Das, was Herr Kollege ...  1951-12-06   \n",
+       "16558  16496  Herr Präsident! Meine sehr verehrten Damen und...  1951-12-12   \n",
+       "16592  16526  Herr Präsident! Meine Damen und Herren! Der He...  1951-12-12   \n",
+       "16622  16580  Herr Präsident! Meine Herren und Damen! Entgeg...  1951-12-12   \n",
+       "16699  16634  Herr Präsident! Meine Damen und Herren! Die Ze...  1951-12-13   \n",
+       "\n",
+       "      party  \n",
+       "126       Z  \n",
+       "192       Z  \n",
+       "208       Z  \n",
+       "210       Z  \n",
+       "211       Z  \n",
+       "...     ...  \n",
+       "16480     Z  \n",
+       "16558     Z  \n",
+       "16592     Z  \n",
+       "16622     Z  \n",
+       "16699     Z  \n",
+       "\n",
+       "[420 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[df['party'] == 'Z']\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,

src/vectordatabase.py CHANGED Viewed

@@ -18,7 +18,7 @@ import os
 # from dotenv import load_dotenv
 # load_dotenv()
-# Global variables
 embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
 db_all = FAISS.load_local(folder_path="./src/FAISS", index_name="speeches_1949_09_12",
                                             embeddings=embeddings, allow_dangerous_deserialization=True)

 # from dotenv import load_dotenv
 # load_dotenv()
+# Define important variables
 embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
 db_all = FAISS.load_local(folder_path="./src/FAISS", index_name="speeches_1949_09_12",
                                             embeddings=embeddings, allow_dangerous_deserialization=True)