Spaces:
Sleeping
Sleeping
File size: 7,155 Bytes
711cd18 357410f 711cd18 357410f 711cd18 357410f e0d7678 357410f 711cd18 357410f 711cd18 e0d7678 357410f 711cd18 357410f 711cd18 357410f 711cd18 e0d7678 711cd18 357410f 0c23cf6 82f988d 0c23cf6 357410f 711cd18 357410f 711cd18 e0d7678 711cd18 e0d7678 357410f e0d7678 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import streamlit as st
import numpy as np
import nltk
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from typing import Dict, List, Union
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
import os
import re
import tempfile
# Function to get credentials from environment variable and create a temporary file
def get_credentials():
creds_json_str = os.getenv("JSONSTR") # Get JSON credentials stored as a string
if creds_json_str is None:
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
# Create a temporary file
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
temp.write(creds_json_str) # Write in JSON format
temp_filename = temp.name
return temp_filename
# Set environment variable for Google application credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_credentials()
max_seq_length = 2048
dtype = None
load_in_4bit = True
# Check if 'punkt' is already downloaded, otherwise download it
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
text_split_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
# Function to predict emotions using the custom trained model
def predict_custom_trained_model_sample(
project: str,
endpoint_id: str,
instances: Union[Dict, List[Dict]],
location: str = "us-east4",
api_endpoint: str = "us-east4-aiplatform.googleapis.com",
) -> List[str]:
client_options = {"api_endpoint": api_endpoint}
client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
instances = instances if isinstance(instances, list) else [instances]
instances = [
json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
]
parameters_dict = {}
parameters = json_format.ParseDict(parameters_dict, Value())
endpoint = client.endpoint_path(
project=project, location=location, endpoint=endpoint_id
)
response = client.predict(
endpoint=endpoint, instances=instances, parameters=parameters
)
predictions_list = []
predictions = response.predictions
for prediction in predictions:
if isinstance(prediction, str):
clean_prediction = re.sub(r'(\n|Origin|###|Optimization|Response:)', '', prediction)
split_predictions = clean_prediction.split()
predictions_list.extend(split_predictions)
else:
print(" prediction (unknown type, skipping):", prediction)
return [emotion for emotion in predictions_list if emotion in d_emotion.values()]
d_emotion = {0: 'admiration', 1: 'amusement', 2: 'anger', 3: 'annoyance', 4: 'approval', 5: 'caring', 6: 'confusion',
7: 'curiosity', 8: 'desire', 9: 'disappointment', 10: 'disapproval', 11: 'disgust', 12: 'embarrassment',
13: 'excitement', 14: 'fear', 15: 'gratitude', 16: 'grief', 17: 'joy', 18: 'love', 19: 'nervousness',
20: 'optimism', 21: 'pride', 22: 'realization', 23: 'relief', 24: 'remorse', 25: 'sadness', 26: 'surprise',
27: 'neutral'}
st.write("Write or paste any number of document texts to analyse the emotion percentage with your document")
# Define the sample text
sample_text = ("Once, in a small village nestled in the rolling hills of Tuscany, lived an elderly woman named Isabella. "
"She had spent her entire life in this village, raising her children and caring for her garden, which was the most "
"beautiful in the region. Her husband, Marco, had passed away many years ago, leaving her with a heart full of memories "
"and a small, quaint house that overlooked the lush vineyards.")
# Add button to fill in sample text
if st.button("Use Sample Text"):
user_input = st.text_input(value=sample_text)
else:
user_input = st.text_area('Enter Text to Analyze')
button = st.button("Analyze")
if button and user_input:
alpaca_prompt = """Below is a conversation between a human and an AI agent. write a response based on the input.
### Instruction:
predict the emotion word or words
### Input:
{}
### Response:
"""
instances = []
input_array = text_split_tokenizer.tokenize(user_input)
for sentence in input_array:
formatted_input = alpaca_prompt.format(sentence.strip())
instance = {
"inputs": formatted_input,
"parameters": {
"max_new_tokens": 4,
"temperature": 0.00001,
"top_p": 0.9,
"top_k": 10
}
}
instances.append(instance)
predictions = predict_custom_trained_model_sample(
project=os.environ["project"],
endpoint_id=os.environ["endpoint_id"],
location=os.environ["location"],
instances=instances
)
emotion_counts = pd.Series(predictions).value_counts(normalize=True).reset_index()
emotion_counts.columns = ['Emotion', 'Percentage']
emotion_counts['Percentage'] *= 100 # Convert to percentage
fig_pie = px.pie(emotion_counts, values='Percentage', names='Emotion', title='Percentage of Emotions in Given Text')
fig_pie.update_traces(textposition='inside', textinfo='percent+label')
@st.cache_data
def get_emotion_chart(predictions):
emotion_counts = pd.Series(predictions).value_counts().reset_index()
emotion_counts.columns = ['Emotion', 'Count']
fig_bar = go.Figure()
fig_bar.add_trace(go.Bar(
x=emotion_counts['Emotion'],
y=emotion_counts['Count'],
marker_color='indianred'
))
fig_bar.update_layout(title='Count of Each Emotion in Given Text', xaxis_title='Emotion', yaxis_title='Count')
return fig_bar
fig_bar = get_emotion_chart(predictions)
@st.cache_data
def get_emotion_heatmap(predictions):
emotion_counts = pd.Series(predictions).value_counts().reset_index()
emotion_counts.columns = ['Emotion', 'Count']
heatmap_matrix = pd.DataFrame(0, index=d_emotion.values(), columns=d_emotion.values())
for index, row in emotion_counts.iterrows():
heatmap_matrix.at[row['Emotion'], row['Emotion']] = row['Count']
fig = go.Figure(data=go.Heatmap(
z=heatmap_matrix.values,
x=heatmap_matrix.columns.tolist(),
y=heatmap_matrix.index.tolist(),
text=heatmap_matrix.values,
hovertemplate="Count: %{text}",
colorscale='Viridis'
))
fig.update_layout(title='Emotion Heatmap', xaxis_title='Predicted Emotion', yaxis_title='Predicted Emotion')
return fig
fig_heatmap = get_emotion_heatmap(predictions)
tab1, tab2, tab3 = st.tabs(["Emotion Analysis", "Emotion Counts Distribution", "Heatmap"])
with tab1:
st.plotly_chart(fig_pie)
with tab2:
st.plotly_chart(fig_bar)
with tab3:
st.plotly_chart(fig_heatmap)
|