import streamlit as st
import pandas as pd
import re
import time
import plotly.express as px
import random
from huggingface_hub import InferenceClient

# Load spaCy model
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

def get_location(sentence):
    doc = nlp(sentence)
    for ent in doc.ents:
        if ent.label_ == "LOC" or ent.label_ == "GPE":
            return ent.text
    return None

class JobPosting:
    def __init__(self, description):
        self.description = description

    def extract(self):
        # Define responsibilities extraction logic (replace with actual logic)
        responsibilities = "Define responsibilities here"
        return responsibilities

class CSVFileUploader:
    def __init__(self):
        self.file = None
        self.selected_column = None
        self.selected_row = None

    def upload_file(self):
        uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
        if uploaded_file is not None:
            self.file = pd.read_csv(uploaded_file)
            df = self.file

            # Refining the Location Column
            df["Location"] = df["Location"].apply(get_location)

            st.header("Data Preview")
            st.write(df.head())

            st.header("Data Information")
            st.write(df.info())

            st.header("Descriptive Statistics")
            st.write(df.describe())

            st.header("Column Distribution")
            selected_column = st.selectbox("Select a column", df.columns)
            fig = px.bar(df, x=df[selected_column], labels={'index': 'Value', selected_column: 'Count'})
            st.plotly_chart(fig)

    def plot_locations(self):
        if self.file is not None:
            df = self.file.dropna(subset=["Location"])
            locations = df["Location"].value_counts().reset_index()
            locations.columns = ["Location", "Count"]
            fig = px.bar(locations, x="Location", y="Count", labels={'index': 'Value', 'Location': 'Location', 'Count': 'Count'})
            fig.update_layout(xaxis={'categoryorder': 'total descending'})
            st.header("Location Distribution")
            st.plotly_chart(fig)

    def select_row(self):
        if self.file is not None:
            self.selected_row = st.selectbox("Select row", list(range(len(self.file))))

    def display_data(self):
        if self.file is not None:
            if self.selected_column is not None:
                st.write(self.file[self.selected_column])
            if self.selected_row is not None:
                location = self.file.iloc[self.selected_row]["Location"]
                description = self.file.iloc[self.selected_row]["Description"]
                st.markdown("---")
                st.markdown(f"<span style='background-color: #f4a261; padding: 2px 4px; border-radius: 4px;'>Location:</span>", unsafe_allow_html=True)
                st.write("Just give me the location for this job description, no other words and remove 'on-site' or 'remote' if mentioned. For example, 'Boston, MA, USA': "
                         + str(location))
                st.markdown("---")
                st.markdown(f"<span style='background-color: #f4a261; padding: 2px 4px; border-radius: 4px;'>Description for selected row:</span>", unsafe_allow_html=True)
                st.write("What are the qualifications required: "+ str(description))

                # Automatically populate prompts and show the output

                clients = {
                    "google/gemma-7b": InferenceClient("google/gemma-7b"),
                    "google/gemma-7b-it": InferenceClient("google/gemma-7b-it"),
                    "google/gemma-2b": InferenceClient("google/gemma-2b"),
                    "google/gemma-2b-it": InferenceClient("google/gemma-2b-it")
                }

                def format_prompt(message, history):
                    prompt = ""
                    if history:
                        for user_prompt, bot_response in history:
                            prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
                            prompt += f"<start_of_turn>model{bot_response}"
                    prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
                    return prompt

                def chat_inf(system_prompt, prompt, history, client_choice, seed, temp, tokens, top_p, rep_p):
                    client = clients[client_choice]  # Use the client_choice directly as an index
                    if not history:
                        history = []
                        hist_len = 0
                    if history:
                        hist_len = len(history)

                    generate_kwargs = dict(
                        temperature=temp,
                        max_new_tokens=tokens,
                        top_p=top_p,
                        repetition_penalty=rep_p,
                        do_sample=True,
                        seed=seed,
                    )
                    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
                    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True,
                                                    return_full_text=False)
                    output = []

                    for response in stream:
                        output.append(response.token.text)
                    history.append((prompt, "".join(output)))
                    st.write("".join(output))  # Display the accumulated output

                def clear_fn():
                    return None

                rand_val = random.randint(1, 1111111111111111)

                def check_rand(inp, val):
                    if inp is True:
                        return st.slider("Seed", 1, 1111111111111111, rand_val)
                    else:
                        return st.slider("Seed", 1, 1111111111111111, int(val))

                st.title("Google Gemma Models")

                client_choice = st.selectbox("Models", list(clients.keys()))  # Use keys as the choices

                rand = st.checkbox("Random Seed", True)
                seed = check_rand(rand, rand_val)
                tokens = st.slider("Max new tokens", 0, 8000, 6400, 64)
                temp = st.slider("Temperature", 0.01, 1.0, 0.9, step=0.01)
                top_p = st.slider("Top-P", 0.01, 1.0, 0.9, step=0.01)
                rep_p = st.slider("Repetition Penalty", 0.1, 2.0, 1.0, step=0.1)

                sys_inp = st.text_input("System Prompt (optional)")
                inp = st.text_input("Prompt for Description", value=f"what is qualifications required: {description}")
                btn = st.button("Chat")
                clear_btn = st.button("Clear")

                if btn:
                    chat_inf(sys_inp, inp, None, client_choice, seed, temp, tokens, top_p, rep_p)
                if clear_btn:
                    st.session_state.history = clear_fn()

if __name__ == "__main__":
    st.title("Market Hipocrisy")
    uploader = CSVFileUploader()
    uploader.upload_file()
    uploader.select_row()
    uploader.plot_locations()

    # Progress bar example (replace with actual scraping logic)
    with st.spinner("Scraping data..."):
        time.sleep(5)  # Simulating a long-running process
        st.success("Scraping complete!")

    uploader.display_data()