from pymilvus import MilvusClient, AnnSearchRequest, RRFRanker from langchain_community.embeddings.ollama import OllamaEmbeddings from pymilvus import WeightedRanker reranker = RRFRanker(k=10) """ embed_model = OllamaEmbeddings(model="bge-m3") client = MilvusClient(uri="http://192.168.5.103:19530") query = "Can I take pills?" query_embedding = embed_model.embed_query(query) # single vector search res = client.search( collection_name="t_sur_sex_ed_article_spider", data=[query_embedding], limit=2, search_params={"metric_type": "COSINE", "params": {}}, anns_field="chunk_vector", output_fields=["title", "chunk", "link", "category"] ) """ # hybrid search def hybrid_search(query, embed_model: OllamaEmbeddings, collection_name, client: MilvusClient): query_embedding = embed_model.embed_query(query) search_param1 = { "data":[query_embedding], "anns_field": "title_vector", "param":{ "metric_type": "COSINE", "params": {"nprobe":10, "level": 3} }, "limit": 3 } request1 = AnnSearchRequest(**search_param1) search_param2 = { "data":[query_embedding], "anns_field": "chunk_vector", "param":{ "metric_type": "COSINE", "params": {"nprobe":10, "level": 3} }, "limit": 3 } request2 = AnnSearchRequest(**search_param2) search_param3 = { "data":[query_embedding], "anns_field": "tags", "param":{ "metric_type": "COSINE", "params": {"nprobe":10, "level": 3} }, "limit": 3 } request3 = AnnSearchRequest(**search_param3) candidates = [request1, request2, request3] # Rerank rerank = WeightedRanker(0.3, 0.6, 0.1) res = client.hybrid_search( collection_name=collection_name, ranker=rerank, reqs=candidates, limit=3, output_fields=["title", "chunk", "link"] ) return res def single_vector_search(query, embed_model: OllamaEmbeddings, collection_name, client: MilvusClient, anns_field): query_embedding = embed_model.embed_query(query) res = client.search( collection_name=collection_name, data=[query_embedding], limit=20, search_params={"metric_type": "COSINE", "params": {}}, anns_field=anns_field, filter="content_type == 'A'", output_fields=["title", "content", "url", "content_type", "likes", "dislikes"] )[0] res = sorted(res, key=lambda x: x["entity"]["likes"]/(x["entity"]["dislikes"] + 1), reverse=True) return res if __name__ == "__main__": embed_model = OllamaEmbeddings(model="bge-m3") client = MilvusClient(uri="http://192.168.5.103:19530") query = "How to make a good blow job" search_res = hybrid_search(query, embed_model, "t_sur_sex_ed_article_spider", client)[0] # singel_search_res = single_vector_search(query, embed_model, "t_sur_sex_ed_question_answer_spider", client, "content_vector") for res in search_res: print(res["entity"]["chunk"]) print("\n #############################") # print("===============================\n\n") # for res in singel_search_res: # print(res["entity"]["content"]) # print(res["entity"]["content_type"]) # print(res["entity"]["likes"]) # print(res["entity"]["dislikes"]) # print("\n #############################")