awinml's picture
Upload 7 files (#5)
92808fd
raw
history blame
1.11 kB
import numpy as np
def query_pinecone(dense_vec, top_k, index, indices):
xc = index.query(
vector=dense_vec,
top_k=top_k,
filter={"QA_Flag": {"$eq": "Answer"}, "index": {"$in": indices}},
include_metadata=True,
)
return xc["matches"]
def format_query(query_results):
# extract passage_text from Pinecone search result
context = [
(result["metadata"]["Text"], result["score"])
for result in query_results
]
return context
def format_context(context):
output_text = []
for text, score in context:
output_text.append(f"Text: {text}\nCosine Similarity: {score}")
return output_text
def get_bm25_search_hits(corpus, sparse_scores, top_n=50):
bm25_search = []
indices = []
for idx in sparse_scores:
if len(bm25_search) <= top_n:
bm25_search.append(corpus[idx])
indices.append(idx)
return indices
def retrieve_transcript():
open_file = open(
f"2020-Apr-28-AMD.txt",
"r",
)
file_text = open_file.read()
return f"""{file_text}"""