Spaces:

negismohit123
/

LinkedIn_Bot_Gemma_Streamlit

Sleeping

App Files Files Community

LinkedIn_Bot_Gemma_Streamlit / app.py

negismohit123

Update app.py

3b1ebb0 verified 11 months ago

raw

history blame contribute delete

7.45 kB

	import streamlit as st
	import pandas as pd
	import re
	import time
	import plotly.express as px
	import random
	from huggingface_hub import InferenceClient

	# Load spaCy model
	import spacy

	# Load the English language model
	nlp = spacy.load("en_core_web_sm")

	def get_location(sentence):
	doc = nlp(sentence)
	for ent in doc.ents:
	if ent.label_ == "LOC" or ent.label_ == "GPE":
	return ent.text
	return None

	class JobPosting:
	def __init__(self, description):
	self.description = description

	def extract(self):
	# Define responsibilities extraction logic (replace with actual logic)
	responsibilities = "Define responsibilities here"
	return responsibilities

	class CSVFileUploader:
	def __init__(self):
	self.file = None
	self.selected_column = None
	self.selected_row = None

	def upload_file(self):
	uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
	if uploaded_file is not None:
	self.file = pd.read_csv(uploaded_file)
	df = self.file

	# Refining the Location Column
	df["Location"] = df["Location"].apply(get_location)

	st.header("Data Preview")
	st.write(df.head())

	st.header("Data Information")
	st.write(df.info())

	st.header("Descriptive Statistics")
	st.write(df.describe())

	st.header("Column Distribution")
	selected_column = st.selectbox("Select a column", df.columns)
	fig = px.bar(df, x=df[selected_column], labels={'index': 'Value', selected_column: 'Count'})
	st.plotly_chart(fig)

	def plot_locations(self):
	if self.file is not None:
	df = self.file.dropna(subset=["Location"])
	locations = df["Location"].value_counts().reset_index()
	locations.columns = ["Location", "Count"]
	fig = px.bar(locations, x="Location", y="Count", labels={'index': 'Value', 'Location': 'Location', 'Count': 'Count'})
	fig.update_layout(xaxis={'categoryorder': 'total descending'})
	st.header("Location Distribution")
	st.plotly_chart(fig)

	def select_row(self):
	if self.file is not None:
	self.selected_row = st.selectbox("Select row", list(range(len(self.file))))

	def display_data(self):
	if self.file is not None:
	if self.selected_column is not None:
	st.write(self.file[self.selected_column])
	if self.selected_row is not None:
	location = self.file.iloc[self.selected_row]["Location"]
	description = self.file.iloc[self.selected_row]["Description"]
	st.markdown("---")
	st.markdown(f"<span style='background-color: #f4a261; padding: 2px 4px; border-radius: 4px;'>Location:</span>", unsafe_allow_html=True)
	st.write("Just give me the location for this job description, no other words and remove 'on-site' or 'remote' if mentioned. For example, 'Boston, MA, USA': "
	+ str(location))
	st.markdown("---")
	st.markdown(f"<span style='background-color: #f4a261; padding: 2px 4px; border-radius: 4px;'>Description for selected row:</span>", unsafe_allow_html=True)
	st.write("What are the qualifications required: "+ str(description))

	# Automatically populate prompts and show the output

	clients = {
	"google/gemma-7b": InferenceClient("google/gemma-7b"),
	"google/gemma-7b-it": InferenceClient("google/gemma-7b-it"),
	"google/gemma-2b": InferenceClient("google/gemma-2b"),
	"google/gemma-2b-it": InferenceClient("google/gemma-2b-it")
	}

	def format_prompt(message, history):
	prompt = ""
	if history:
	for user_prompt, bot_response in history:
	prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
	prompt += f"<start_of_turn>model{bot_response}"
	prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
	return prompt

	def chat_inf(system_prompt, prompt, history, client_choice, seed, temp, tokens, top_p, rep_p):
	client = clients[client_choice] # Use the client_choice directly as an index
	if not history:
	history = []
	hist_len = 0
	if history:
	hist_len = len(history)

	generate_kwargs = dict(
	temperature=temp,
	max_new_tokens=tokens,
	top_p=top_p,
	repetition_penalty=rep_p,
	do_sample=True,
	seed=seed,
	)
	formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True,
	return_full_text=False)
	output = []

	for response in stream:
	output.append(response.token.text)
	history.append((prompt, "".join(output)))
	st.write("".join(output)) # Display the accumulated output

	def clear_fn():
	return None

	rand_val = random.randint(1, 1111111111111111)

	def check_rand(inp, val):
	if inp is True:
	return st.slider("Seed", 1, 1111111111111111, rand_val)
	else:
	return st.slider("Seed", 1, 1111111111111111, int(val))

	st.title("Google Gemma Models")

	client_choice = st.selectbox("Models", list(clients.keys())) # Use keys as the choices

	rand = st.checkbox("Random Seed", True)
	seed = check_rand(rand, rand_val)
	tokens = st.slider("Max new tokens", 0, 8000, 6400, 64)
	temp = st.slider("Temperature", 0.01, 1.0, 0.9, step=0.01)
	top_p = st.slider("Top-P", 0.01, 1.0, 0.9, step=0.01)
	rep_p = st.slider("Repetition Penalty", 0.1, 2.0, 1.0, step=0.1)

	sys_inp = st.text_input("System Prompt (optional)")
	inp = st.text_input("Prompt for Description", value=f"what is qualifications required: {description}")
	btn = st.button("Chat")
	clear_btn = st.button("Clear")

	if btn:
	chat_inf(sys_inp, inp, None, client_choice, seed, temp, tokens, top_p, rep_p)
	if clear_btn:
	st.session_state.history = clear_fn()

	if __name__ == "__main__":
	st.title("Market Hipocrisy")
	uploader = CSVFileUploader()
	uploader.upload_file()
	uploader.select_row()
	uploader.plot_locations()

	# Progress bar example (replace with actual scraping logic)
	with st.spinner("Scraping data..."):
	time.sleep(5) # Simulating a long-running process
	st.success("Scraping complete!")

	uploader.display_data()