File size: 3,493 Bytes
dd204e1
 
 
 
 
6b119d2
499fb92
6b119d2
 
 
dd204e1
 
 
96d1677
46d202f
a71fbbb
96d1677
a71fbbb
13f4578
 
 
 
 
 
 
6cfa228
 
 
dd204e1
b8c0eaf
73ed7f4
 
 
 
 
6b119d2
b8c0eaf
6b119d2
 
73ed7f4
b8c0eaf
 
 
73ed7f4
dd204e1
 
6cfa228
dd204e1
 
 
 
 
 
 
 
 
 
 
 
 
6cfa228
 
3407050
 
 
03b1205
3407050
 
 
 
 
 
 
 
 
 
 
dd204e1
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from model_functions import *
from preprocessor import *
import streamlit as st
import pandas as pd


@st.cache_data
def load_example_file(file):
    with open(file, "rb") as f:
        return f.read()


def main():
    st.markdown("""
        <style>
        [data-testid-"stAppViewContainer"]{
                background-color: #e6fedb;
            }
        </style>""",unsafe_allow_html=True)
    # Load models
    tokenizer_sentiment, model_sentiment = load_sentiment_analyzer()
    tokenizer_summary, model_summary = load_summarizer()
    pipe_ner = load_NER()
    
    st.title("WhatsApp Analysis Tool")
    st.markdown("This app summarizes Whatsapp chats and provides named entity recognition as well as sentiment analysis for the conversation")
    st.markdown("**NOTE**: *This app can only receive chats downloaded from IOS as the downloaded chat format is different than from Android.*")
    st.markdown("Download your whatsapp chat by going to Settings > Chats > Export Chat and there select the chat you want to summarize (download 'Without Media').")

    st.markdown("**Example Files**: Download example zip files to test the app:")
    example_files = {
        "Example 1": "example1.zip",
        "Example 2": "example2.zip",
        "Example 3": "example3.zip"
    }
    
    for name, file in example_files.items():
        data = load_example_file(file)
        st.download_button(label=name, data=data, file_name=file, mime="application/zip")
    

    # File uploader
    uploaded_file = st.file_uploader("Choose a file (.zip)", type=['zip'])
    
    if uploaded_file is not None:
        file_type = detect_file_type(uploaded_file.name)
        if file_type == "zip":
            # Process the file
            data = preprocess_whatsapp_messages(uploaded_file, file_type)
            if data.empty:
                st.write("No messages found or the file could not be processed.")
            else:
                # Date selector
                date_options = data['date'].dt.strftime('%Y-%m-%d').unique()
                selected_date = st.selectbox("Select a date for analysis:", date_options)

                if selected_date:
                    text_for_analysis = get_dated_input(data, selected_date)
                    with st.expander("Show/Hide Original Conversation"):
                        st.markdown(f"```\n{text_for_analysis}\n```", unsafe_allow_html=True)
                    process = st.button('Process')
                    if process:
                        # Perform analysis
                        sentiment = get_sentiment_analysis(text_for_analysis, tokenizer_sentiment, model_sentiment)
                        summary = generate_summary(text_for_analysis, tokenizer_summary, model_summary)
                        ner_results = get_NER(summary.title(), pipe_ner)
    
                        # Display results
                        st.subheader("Sentiment Analysis")
                        st.write("Sentiment:", sentiment)
    
                        st.subheader("Summary")
                        st.write("Summary:", summary)
    
                        st.subheader("Named Entity Recognition")
                        ner_df = pd.DataFrame(ner_results, columns=["Word", "Entity Group"])
                        st.write(ner_df)
        else:
            st.error("Unsupported file type. Please upload a .txt or .zip file.")
    else:
        st.info("Please upload a file to proceed.")

if __name__ == "__main__":
    main()