import numpy as np def query_pinecone(dense_vec, top_k, index, indices): xc = index.query( vector=dense_vec, top_k=top_k, filter={"QA_Flag": {"$eq": "Answer"}, "index": {"$in": indices}}, include_metadata=True, ) return xc["matches"] def format_query(query_results): # extract passage_text from Pinecone search result context = [ (result["metadata"]["Text"], result["score"]) for result in query_results ] return context def format_context(context): output_text = [] for text, score in context: output_text.append(f"Text: {text}\nCosine Similarity: {score}") return output_text def get_bm25_search_hits(corpus, sparse_scores, top_n=50): bm25_search = [] indices = [] for idx in sparse_scores: if len(bm25_search) <= top_n: bm25_search.append(corpus[idx]) indices.append(idx) return indices def retrieve_transcript(): open_file = open( f"2020-Apr-28-AMD.txt", "r", ) file_text = open_file.read() return f"""{file_text}"""