Spaces:
Runtime error
Runtime error
Upload 7 files
Browse files- app.py +28 -21
- utils/models.py +3 -3
app.py
CHANGED
@@ -28,7 +28,7 @@ st.title("Instructor XL Embeddings")
|
|
28 |
|
29 |
|
30 |
st.write(
|
31 |
-
"The app compares the performance of the Instructor-XL Embedding Model on the text from AMD's Q1 2020 Earnings Call Transcript.
|
32 |
)
|
33 |
|
34 |
data = get_data()
|
@@ -114,29 +114,36 @@ index_mapping = {
|
|
114 |
"Represent the earnings call transcript answer for retrieval:": "week14-instructor-xl-amd-ecta-6",
|
115 |
}
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
)
|
123 |
-
|
124 |
-
pinecone_index_name = index_mapping[text_embedding_instruction]
|
125 |
-
pinecone.init(
|
126 |
-
api_key=st.secrets[f"pinecone_{pinecone_index_name}"],
|
127 |
-
environment="asia-southeast1-gcp-free",
|
128 |
-
)
|
129 |
-
|
130 |
-
pinecone_index = pinecone.Index(pinecone_index_name)
|
131 |
-
|
132 |
-
submitted = st.form_submit_button("Submit")
|
133 |
-
if submitted:
|
134 |
-
matches = query_pinecone(
|
135 |
-
dense_embedding, num_results, pinecone_index, indices
|
136 |
)
|
137 |
-
context = format_query(matches)
|
138 |
-
output_text = format_context(context)
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
tab1 = st.tabs(["View transcript"])
|
142 |
|
|
|
28 |
|
29 |
|
30 |
st.write(
|
31 |
+
"The app compares the performance of the Instructor-XL Embedding Model on the text from AMD's Q1 2020 Earnings Call Transcript."
|
32 |
)
|
33 |
|
34 |
data = get_data()
|
|
|
114 |
"Represent the earnings call transcript answer for retrieval:": "week14-instructor-xl-amd-ecta-6",
|
115 |
}
|
116 |
|
117 |
+
with col2:
|
118 |
+
with st.form("my_form"):
|
119 |
+
text_embedding_instruction = st.selectbox(
|
120 |
+
"Select instruction for Text Embedding",
|
121 |
+
text_embedding_instructions_choice,
|
122 |
+
)
|
123 |
|
124 |
+
pinecone_index_name = index_mapping[text_embedding_instruction]
|
125 |
+
pinecone.init(
|
126 |
+
api_key=st.secrets[f"pinecone_{pinecone_index_name}"],
|
127 |
+
environment="asia-southeast1-gcp-free",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
)
|
|
|
|
|
129 |
|
130 |
+
pinecone_index = pinecone.Index(pinecone_index_name)
|
131 |
+
|
132 |
+
submitted = st.form_submit_button("Submit")
|
133 |
+
if submitted:
|
134 |
+
matches = query_pinecone(
|
135 |
+
dense_embedding, num_results, pinecone_index, indices
|
136 |
+
)
|
137 |
+
context = format_query(matches)
|
138 |
+
output_text = format_context(context)
|
139 |
+
|
140 |
+
st.subheader("Retrieved Text:")
|
141 |
+
for output in output_text:
|
142 |
+
output = f"""{output}"""
|
143 |
+
st.write(
|
144 |
+
f"<ul><li><p>{output}</p></li></ul>",
|
145 |
+
unsafe_allow_html=True,
|
146 |
+
)
|
147 |
|
148 |
tab1 = st.tabs(["View transcript"])
|
149 |
|
utils/models.py
CHANGED
@@ -37,19 +37,19 @@ def preprocess_text(text):
|
|
37 |
return preprocessed_text
|
38 |
|
39 |
|
40 |
-
@st.
|
41 |
def get_data():
|
42 |
data = pd.read_csv("AMD_Q1_2020_earnings_call_data_keywords.csv")
|
43 |
return data
|
44 |
|
45 |
|
46 |
-
@st.
|
47 |
def get_instructor_embedding_model():
|
48 |
client = Client("https://awinml-api-instructor-xl-1.hf.space/")
|
49 |
return client
|
50 |
|
51 |
|
52 |
-
@st.
|
53 |
def get_bm25_model(data):
|
54 |
corpus = data.Text.tolist()
|
55 |
corpus_clean = [preprocess_text(x) for x in corpus]
|
|
|
37 |
return preprocessed_text
|
38 |
|
39 |
|
40 |
+
@st.cache_resource
|
41 |
def get_data():
|
42 |
data = pd.read_csv("AMD_Q1_2020_earnings_call_data_keywords.csv")
|
43 |
return data
|
44 |
|
45 |
|
46 |
+
@st.cache_resource
|
47 |
def get_instructor_embedding_model():
|
48 |
client = Client("https://awinml-api-instructor-xl-1.hf.space/")
|
49 |
return client
|
50 |
|
51 |
|
52 |
+
@st.cache_resource
|
53 |
def get_bm25_model(data):
|
54 |
corpus = data.Text.tolist()
|
55 |
corpus_clean = [preprocess_text(x) for x in corpus]
|