TomData commited on
Commit
a3f5633
·
1 Parent(s): b5a209d

update about tab

Browse files
Files changed (3) hide show
  1. Home.py +5 -3
  2. src/Speeches/querry.ipynb +174 -2
  3. src/vectordatabase.py +1 -1
Home.py CHANGED
@@ -3,7 +3,7 @@ from src.chatbot import chatbot, keyword_search
3
  from gradio_calendar import Calendar
4
  from datetime import datetime
5
 
6
-
7
  legislature_periods = [
8
  "All",
9
  "20. Legislaturperiode",
@@ -28,6 +28,8 @@ legislature_periods = [
28
  "1. Legislaturperiode"
29
  ]
30
 
 
 
31
 
32
 
33
  with gr.Blocks() as App:
@@ -57,7 +59,7 @@ with gr.Blocks() as App:
57
  # Row orientation
58
  with gr.Row() as additional_input:
59
  n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
60
- party_dopdown = gr.Dropdown(value='All', choices=['All','CDU/CSU','SPD','FDP','Grüne','not found','DIE LINKE.','PDS','KPD'], label='Party') # change choices to all possible options
61
  # ToDo: Add date or legislature filter as input
62
  #start_date = Calendar(value="1949-01-01", type="datetime", label="Select start date", info="Click the calendar icon to bring up the calendar.", interactive=True)
63
  #end_date = Calendar(value=datetime.today().strftime('%Y-%m-%d'), type="datetime", label="Select end date", info="Click the calendar icon to bring up the calendar.", interactive=True)
@@ -109,7 +111,7 @@ with gr.Blocks() as App:
109
  )
110
 
111
  with gr.Tab("About"):
112
- gr.Markdown(text="""**Motivation:**
113
  The idea of this project is a combination of my curiosity in LLM application and my affection for speech data, that I developed during my bachelor thesis on measuring populism in text data.
114
  I would like to allow people to discover interesting discussions, opinions and positions that were communicated in the german parliament thoughout the years.
115
  **Development status:**
 
3
  from gradio_calendar import Calendar
4
  from datetime import datetime
5
 
6
+ # Define important variables
7
  legislature_periods = [
8
  "All",
9
  "20. Legislaturperiode",
 
28
  "1. Legislaturperiode"
29
  ]
30
 
31
+ partys = ['All','CDU/CSU','SPD','AfD','Grüne','FDP','DIE LINKE.','GB/BHE','DRP', 'WAV', 'NR', 'BP', 'FU', 'SSW', 'KPD', 'DA', 'FVP','DP','Z', 'PDS','Fraktionslos', 'Gast','not found', 'Gast']
32
+
33
 
34
 
35
  with gr.Blocks() as App:
 
59
  # Row orientation
60
  with gr.Row() as additional_input:
61
  n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
62
+ party_dopdown = gr.Dropdown(value='All', choices=partys, label='Party')
63
  # ToDo: Add date or legislature filter as input
64
  #start_date = Calendar(value="1949-01-01", type="datetime", label="Select start date", info="Click the calendar icon to bring up the calendar.", interactive=True)
65
  #end_date = Calendar(value=datetime.today().strftime('%Y-%m-%d'), type="datetime", label="Select end date", info="Click the calendar icon to bring up the calendar.", interactive=True)
 
111
  )
112
 
113
  with gr.Tab("About"):
114
+ gr.Markdown("""**Motivation:**
115
  The idea of this project is a combination of my curiosity in LLM application and my affection for speech data, that I developed during my bachelor thesis on measuring populism in text data.
116
  I would like to allow people to discover interesting discussions, opinions and positions that were communicated in the german parliament thoughout the years.
117
  **Development status:**
src/Speeches/querry.ipynb CHANGED
@@ -19,14 +19,14 @@
19
  },
20
  {
21
  "cell_type": "code",
22
- "execution_count": 3,
23
  "metadata": {},
24
  "outputs": [
25
  {
26
  "name": "stderr",
27
  "output_type": "stream",
28
  "text": [
29
- "C:\\Users\\Tom\\AppData\\Local\\Temp\\ipykernel_32\\2374447718.py:12: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
30
  " df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n"
31
  ]
32
  }
@@ -58,6 +58,178 @@
58
  "### Data Cleaning"
59
  ]
60
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  {
62
  "cell_type": "code",
63
  "execution_count": 4,
 
19
  },
20
  {
21
  "cell_type": "code",
22
+ "execution_count": 2,
23
  "metadata": {},
24
  "outputs": [
25
  {
26
  "name": "stderr",
27
  "output_type": "stream",
28
  "text": [
29
+ "C:\\Users\\Tom\\AppData\\Local\\Temp\\ipykernel_22016\\2374447718.py:12: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
30
  " df = pd.read_sql_query(\"\"\"SELECT s.id,s.speech_content,s.date,f.abbreviation AS party\n"
31
  ]
32
  }
 
58
  "### Data Cleaning"
59
  ]
60
  },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": 3,
64
+ "metadata": {},
65
+ "outputs": [
66
+ {
67
+ "name": "stdout",
68
+ "output_type": "stream",
69
+ "text": [
70
+ "{'Z', 'FDP', 'GB/BHE', 'DIE LINKE.', 'DRP', 'WAV', 'Fraktionslos', 'NR', 'BP', 'not found', 'SPD', 'Gast', 'FU', 'SSW', 'KPD', 'DA', 'FVP', 'AfD', 'Grüne', 'DP', 'CDU/CSU', 'PDS'}\n"
71
+ ]
72
+ }
73
+ ],
74
+ "source": [
75
+ "# Unique partys\n",
76
+ "print(set(df['party'].to_list()))"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 7,
82
+ "metadata": {},
83
+ "outputs": [
84
+ {
85
+ "data": {
86
+ "text/html": [
87
+ "<div>\n",
88
+ "<style scoped>\n",
89
+ " .dataframe tbody tr th:only-of-type {\n",
90
+ " vertical-align: middle;\n",
91
+ " }\n",
92
+ "\n",
93
+ " .dataframe tbody tr th {\n",
94
+ " vertical-align: top;\n",
95
+ " }\n",
96
+ "\n",
97
+ " .dataframe thead th {\n",
98
+ " text-align: right;\n",
99
+ " }\n",
100
+ "</style>\n",
101
+ "<table border=\"1\" class=\"dataframe\">\n",
102
+ " <thead>\n",
103
+ " <tr style=\"text-align: right;\">\n",
104
+ " <th></th>\n",
105
+ " <th>id</th>\n",
106
+ " <th>speech_content</th>\n",
107
+ " <th>date</th>\n",
108
+ " <th>party</th>\n",
109
+ " </tr>\n",
110
+ " </thead>\n",
111
+ " <tbody>\n",
112
+ " <tr>\n",
113
+ " <th>126</th>\n",
114
+ " <td>121</td>\n",
115
+ " <td>Meine Damen und Herren, die Zentrumsfraktion, ...</td>\n",
116
+ " <td>1949-09-22</td>\n",
117
+ " <td>Z</td>\n",
118
+ " </tr>\n",
119
+ " <tr>\n",
120
+ " <th>192</th>\n",
121
+ " <td>181</td>\n",
122
+ " <td>Meine Damen und Herren! Der Herr Bundeskanzler...</td>\n",
123
+ " <td>1949-09-22</td>\n",
124
+ " <td>Z</td>\n",
125
+ " </tr>\n",
126
+ " <tr>\n",
127
+ " <th>208</th>\n",
128
+ " <td>196</td>\n",
129
+ " <td>Die Zentrumsfraktion des Deutschen Bundestags ...</td>\n",
130
+ " <td>1949-09-27</td>\n",
131
+ " <td>Z</td>\n",
132
+ " </tr>\n",
133
+ " <tr>\n",
134
+ " <th>210</th>\n",
135
+ " <td>198</td>\n",
136
+ " <td>Den Antrag habe ich hier.\\n({0})\\n- Ich begrün...</td>\n",
137
+ " <td>1949-09-27</td>\n",
138
+ " <td>Z</td>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>211</th>\n",
142
+ " <td>199</td>\n",
143
+ " <td>Ich werde Ihnen, Herr Präsident, also den Antr...</td>\n",
144
+ " <td>1949-09-27</td>\n",
145
+ " <td>Z</td>\n",
146
+ " </tr>\n",
147
+ " <tr>\n",
148
+ " <th>...</th>\n",
149
+ " <td>...</td>\n",
150
+ " <td>...</td>\n",
151
+ " <td>...</td>\n",
152
+ " <td>...</td>\n",
153
+ " </tr>\n",
154
+ " <tr>\n",
155
+ " <th>16480</th>\n",
156
+ " <td>16412</td>\n",
157
+ " <td>Meine Damen und Herren! Das, was Herr Kollege ...</td>\n",
158
+ " <td>1951-12-06</td>\n",
159
+ " <td>Z</td>\n",
160
+ " </tr>\n",
161
+ " <tr>\n",
162
+ " <th>16558</th>\n",
163
+ " <td>16496</td>\n",
164
+ " <td>Herr Präsident! Meine sehr verehrten Damen und...</td>\n",
165
+ " <td>1951-12-12</td>\n",
166
+ " <td>Z</td>\n",
167
+ " </tr>\n",
168
+ " <tr>\n",
169
+ " <th>16592</th>\n",
170
+ " <td>16526</td>\n",
171
+ " <td>Herr Präsident! Meine Damen und Herren! Der He...</td>\n",
172
+ " <td>1951-12-12</td>\n",
173
+ " <td>Z</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>16622</th>\n",
177
+ " <td>16580</td>\n",
178
+ " <td>Herr Präsident! Meine Herren und Damen! Entgeg...</td>\n",
179
+ " <td>1951-12-12</td>\n",
180
+ " <td>Z</td>\n",
181
+ " </tr>\n",
182
+ " <tr>\n",
183
+ " <th>16699</th>\n",
184
+ " <td>16634</td>\n",
185
+ " <td>Herr Präsident! Meine Damen und Herren! Die Ze...</td>\n",
186
+ " <td>1951-12-13</td>\n",
187
+ " <td>Z</td>\n",
188
+ " </tr>\n",
189
+ " </tbody>\n",
190
+ "</table>\n",
191
+ "<p>420 rows × 4 columns</p>\n",
192
+ "</div>"
193
+ ],
194
+ "text/plain": [
195
+ " id speech_content date \\\n",
196
+ "126 121 Meine Damen und Herren, die Zentrumsfraktion, ... 1949-09-22 \n",
197
+ "192 181 Meine Damen und Herren! Der Herr Bundeskanzler... 1949-09-22 \n",
198
+ "208 196 Die Zentrumsfraktion des Deutschen Bundestags ... 1949-09-27 \n",
199
+ "210 198 Den Antrag habe ich hier.\\n({0})\\n- Ich begrün... 1949-09-27 \n",
200
+ "211 199 Ich werde Ihnen, Herr Präsident, also den Antr... 1949-09-27 \n",
201
+ "... ... ... ... \n",
202
+ "16480 16412 Meine Damen und Herren! Das, was Herr Kollege ... 1951-12-06 \n",
203
+ "16558 16496 Herr Präsident! Meine sehr verehrten Damen und... 1951-12-12 \n",
204
+ "16592 16526 Herr Präsident! Meine Damen und Herren! Der He... 1951-12-12 \n",
205
+ "16622 16580 Herr Präsident! Meine Herren und Damen! Entgeg... 1951-12-12 \n",
206
+ "16699 16634 Herr Präsident! Meine Damen und Herren! Die Ze... 1951-12-13 \n",
207
+ "\n",
208
+ " party \n",
209
+ "126 Z \n",
210
+ "192 Z \n",
211
+ "208 Z \n",
212
+ "210 Z \n",
213
+ "211 Z \n",
214
+ "... ... \n",
215
+ "16480 Z \n",
216
+ "16558 Z \n",
217
+ "16592 Z \n",
218
+ "16622 Z \n",
219
+ "16699 Z \n",
220
+ "\n",
221
+ "[420 rows x 4 columns]"
222
+ ]
223
+ },
224
+ "execution_count": 7,
225
+ "metadata": {},
226
+ "output_type": "execute_result"
227
+ }
228
+ ],
229
+ "source": [
230
+ "df[df['party'] == 'Z']\n"
231
+ ]
232
+ },
233
  {
234
  "cell_type": "code",
235
  "execution_count": 4,
src/vectordatabase.py CHANGED
@@ -18,7 +18,7 @@ import os
18
  # from dotenv import load_dotenv
19
  # load_dotenv()
20
 
21
- # Global variables
22
  embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
23
  db_all = FAISS.load_local(folder_path="./src/FAISS", index_name="speeches_1949_09_12",
24
  embeddings=embeddings, allow_dangerous_deserialization=True)
 
18
  # from dotenv import load_dotenv
19
  # load_dotenv()
20
 
21
+ # Define important variables
22
  embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
23
  db_all = FAISS.load_local(folder_path="./src/FAISS", index_name="speeches_1949_09_12",
24
  embeddings=embeddings, allow_dangerous_deserialization=True)