File size: 7,409 Bytes
f3d0f1e
 
d0fd192
 
85df319
a3f5633
85df319
c98215f
85df319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7c7ddc
a3f5633
f3d0f1e
 
 
 
723fbc0
d7c7ddc
 
 
 
d0fd192
 
 
 
6e5c3a6
 
 
192315d
 
 
6e5c3a6
192315d
d0fd192
 
33014c1
d0fd192
f3d0f1e
33014c1
85df319
33014c1
 
 
 
85df319
33014c1
d0fd192
a3f5633
b5a209d
 
 
33014c1
 
 
 
 
 
85df319
33014c1
 
 
 
 
 
85df319
 
33014c1
 
0d7e513
33014c1
 
 
 
 
 
 
 
85df319
33014c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5a209d
 
192315d
456a93d
963621e
d0fd192
6e5c3a6
963621e
6e5c3a6
d0fd192
d7c7ddc
d0fd192
192315d
 
 
 
 
 
 
 
d0fd192
33014c1
192315d
33014c1
192315d
 
 
f3d0f1e
d0fd192
f3d0f1e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
from src.chatbot import chatbot, keyword_search
#from gradio_calendar import Calendar
#from datetime import datetime

# Define important variables
legislature_periods = [
    "All",
    "20. Legislaturperiode",
    "19. Legislaturperiode",
    "18. Legislaturperiode",
    "17. Legislaturperiode",
    "16. Legislaturperiode",
    "15. Legislaturperiode",
    "14. Legislaturperiode",
    "13. Legislaturperiode",
    "12. Legislaturperiode",
    "11. Legislaturperiode",
    "10. Legislaturperiode",
    "9. Legislaturperiode",
    "8. Legislaturperiode",
    "7. Legislaturperiode",
    "6. Legislaturperiode",
    "5. Legislaturperiode",
    "4. Legislaturperiode",
    "3. Legislaturperiode",
    "2. Legislaturperiode",
    "1. Legislaturperiode"
]

partys = ['All','CDU/CSU','SPD','AfD','Grüne','FDP','DIE LINKE.','GB/BHE','DRP', 'WAV', 'NR', 'BP', 'FU', 'SSW', 'KPD', 'DA', 'FVP','DP','Z', 'PDS','Fraktionslos','not found', 'Gast']



with gr.Blocks() as App:
    with gr.Tab("ChatBot"):
        with gr.Blocks(fill_height=True):
            with gr.Accordion(open=False, label="Filter database"):
                # Apply RAG using chatbut function from local file ChatBot.py
                db_inputs = gr.Dropdown(choices=legislature_periods, value="All", multiselect=True, label="Legislature", info="Select a combination of legislatures as basis for the chatbot's replies", show_label=True)
                prompt_language = gr.Dropdown(choices=["DE", "EN"], value="DE",label="Language", info="Choose output language", multiselect=False)
           
            gr.ChatInterface(chatbot,
                        title="PoliticsToYou",
                        description= "Ask anything about your favorite political topic from any legislature period",
                        examples=[
                            ["Wie steht die CDU zur Cannabislegalisierung?", "All", "DE"],
                            ["Wie steht die FDP zur Rente?", "All", "DE"],
                            ["Was sagten die Parteien in der ersten Legislaturperiode über die nazi Vergangenheit?", "1. Legislaturperiode", "DE"],
                            ["Wie wird die Ehe für alle diskutiert?", "18. Legislaturperiode", "DE"],
                            ["How is the GDR perceived?", "11. Legislaturperiode", "EN"]
                            ], 
                        cache_examples=True,  #true increases loading time
                        additional_inputs = [db_inputs, prompt_language],
                        )
        
    with gr.Tab("KeywordSearch"):
        
        with gr.Blocks() as Block:
            # Keyword Input
            keyword_box = gr.Textbox(label='keyword')

            #Additional Input (hidden)
            with gr.Accordion('Detailed filters', open=False):
                # Row orientation
                with gr.Row() as additional_input:
                    n_slider = gr.Slider(label="Number of Results",info="Other filters reduces the returned results", minimum=1, maximum=100, step=1, value=10)
                    party_dopdown = gr.Dropdown(value='All', choices=partys, label='Party') 
                    # ToDo: Add date or legislature filter as input
                    #start_date = Calendar(value="1949-01-01", type="datetime", label="Select start date", info="Click the calendar icon to bring up the calendar.", interactive=True)
                    #end_date = Calendar(value=datetime.today().strftime('%Y-%m-%d'), type="datetime", label="Select end date", info="Click the calendar icon to bring up the calendar.", interactive=True)

            search_btn = gr.Button('Search')

            with gr.Column(visible=False) as output_col:
                results_df = gr.Dataframe(label='Results', interactive=False)

                # Download results from keyword search
                with gr.Accordion('Would you like to download your results?', open=False) as download_row:
                    with gr.Row():
                        ftype_dropdown = gr.Dropdown(choices=["csv","excel","json"], label="Format")
                        export_btn = gr.Button('Export')
                        file = gr.File(file_types=[".xlsx", ".csv", ".json"], visible=False)
    
            # Keyword Search on click
            def search(keyword, n, party): # ToDo: Include party and timedate
                return {
                    output_col: gr.Column(visible=True),
                    results_df: keyword_search(query=keyword, n=n, party_filter=party),
                }

            search_btn.click(
                fn=search,
                inputs=[keyword_box, n_slider, party_dopdown],
                outputs=[output_col, results_df],
            )
           
            # Export data to a downloadable format
            def export(df, keyword, ftype=None):
                if ftype == "csv":
                    file = f'{keyword}.csv'
                    df.to_csv(file, index = False)
                    return gr.File(value=file,visible=True)
                elif ftype == "json":
                    file = f'{keyword}.json'
                    df.to_json(file, index = True)
                    return gr.File(value=file,visible=True)
                else:
                    file = f'{keyword}.xlsx'
                    df.to_excel(file, index = True)
                    return gr.File(value=file,visible=True)
            
            export_btn.click(
                fn=export,
                inputs=[results_df, keyword_box, ftype_dropdown],
                outputs=[file],
            )
            
    with gr.Tab("About"):
        gr.Markdown("""

                    <h2>Welcome to <strong>PoliticsToYou</strong> - your playground for investigating the heart of politics in Germany</h2>

                    <ul>

                    <p>Would you like to gain insights into political debates or reveal party positions on specific topics from any legislature?</p>

                    <p>You can use the ChatBot to ask all your questions or search for related speech content in the Keyword Search section.</p>

                    </ul>

                    <p>Looking forward to your feedback!</p>



                    <h3>Further improvements & Ideas:</h3>

                    <ul>

                        <li>Experiment with different LLMs and Templates</li>

                        <li>Include chat history in RAG</li>

                        <li>Add a date or legislature filter to KeywordSearch</li>

                        <li>Exclude short document splits when creating the vectorstore</li>

                        <li>Improve inference time</li>

                        <li>Add analytic tools for party manifestos</li>

                        <li>Expand the scope to different countries</li>

                        <li>Update vector databases with new speech entries every n months (currently static)</li>

                    </ul>



                    <p>Big thank you to the OpenDiscourse team for creating the underlying speeches corpus. Check out their website <a href="https://opendiscourse.de/">here</a>.</p>



                    """
                    )
        
if __name__ == "__main__":
    App.launch(share=False) # true not supported on hf spaces