Spaces:

ajaynagotha
/

bhagvad-gita-llm-v2

Sleeping

App Files Files Community

ajaynagotha commited on Nov 23, 2024

Commit

86e6f42

verified ·

1 Parent(s): 0757937

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -12

app.py CHANGED Viewed

@@ -1,27 +1,53 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 model_name = "google/flan-t5-xl"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-gita_context = """
-The Bhagavad Gita is a 700-verse Hindu scripture that is part of the Indian epic Mahabharata. It is a dialogue between Prince Arjuna and Lord Krishna, who serves as his charioteer. The Gita's core message includes:
-1. The immortality of the soul (Atman)
-2. The nature of action (Karma) and duty (Dharma)
-3. The importance of devotion (Bhakti)
-4. The pursuit of knowledge (Jnana) and wisdom
-5. Different types of Yoga: Karma Yoga, Bhakti Yoga, Jnana Yoga, and Raja Yoga
-6. The concept of detachment from the fruits of one's actions
-7. The divine nature of Krishna as an avatar of Vishnu
-Key teachings include performing one's duty without attachment to results, the importance of self-realization, and the path to liberation (Moksha).
-"""
 def generate_response(question):
-    prompt = f"Based on the following context about the Bhagavad Gita, answer the question.\n\nContext: {gita_context}\n\nQuestion: {question}\n\nAnswer:"
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids
     outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.95)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
 iface = gr.Interface(

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from sentence_transformers import SentenceTransformer, util
+import torch
+from datasets import load_dataset
+# Load the model and tokenizer
 model_name = "google/flan-t5-xl"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+# Load the Gita dataset
+ds = load_dataset("knowrohit07/gita_dataset")
+chapters = ds['train']['Chapter']
+sentence_ranges = ds['train']['sentence_range']
+texts = ds['train']['Text']
+# Load a sentence transformer model for semantic search
+sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Encode all texts for faster similarity search
+text_embeddings = sentence_model.encode(texts, convert_to_tensor=True)
+def find_relevant_texts(query, top_k=3):
+    query_embedding = sentence_model.encode(query, convert_to_tensor=True)
+    cos_scores = util.cos_sim(query_embedding, text_embeddings)[0]
+    top_results = torch.topk(cos_scores, k=top_k)
+    relevant_texts = []
+    for score, idx in zip(top_results[0], top_results[1]):
+        relevant_texts.append(f"Chapter {chapters[idx]}, Verses {sentence_ranges[idx]}: {texts[idx]}")
+    return "\n\n".join(relevant_texts)
 def generate_response(question):
+    relevant_texts = find_relevant_texts(question)
+    prompt = f"""Based on the following excerpts from the Bhagavad Gita, answer the question.
+Relevant excerpts:
+{relevant_texts}
+Question: {question}
+Answer:"""
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids
     outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.95)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
 iface = gr.Interface(