ajaynagotha commited on
Commit
86e6f42
·
verified ·
1 Parent(s): 0757937

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -12
app.py CHANGED
@@ -1,27 +1,53 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
3
 
 
4
  model_name = "google/flan-t5-xl"
5
  tokenizer = AutoTokenizer.from_pretrained(model_name)
6
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
 
8
- gita_context = """
9
- The Bhagavad Gita is a 700-verse Hindu scripture that is part of the Indian epic Mahabharata. It is a dialogue between Prince Arjuna and Lord Krishna, who serves as his charioteer. The Gita's core message includes:
10
- 1. The immortality of the soul (Atman)
11
- 2. The nature of action (Karma) and duty (Dharma)
12
- 3. The importance of devotion (Bhakti)
13
- 4. The pursuit of knowledge (Jnana) and wisdom
14
- 5. Different types of Yoga: Karma Yoga, Bhakti Yoga, Jnana Yoga, and Raja Yoga
15
- 6. The concept of detachment from the fruits of one's actions
16
- 7. The divine nature of Krishna as an avatar of Vishnu
17
- Key teachings include performing one's duty without attachment to results, the importance of self-realization, and the path to liberation (Moksha).
18
- """
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def generate_response(question):
21
- prompt = f"Based on the following context about the Bhagavad Gita, answer the question.\n\nContext: {gita_context}\n\nQuestion: {question}\n\nAnswer:"
 
 
 
 
 
 
 
 
 
 
22
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
23
  outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.95)
24
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
25
  return response
26
 
27
  iface = gr.Interface(
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import torch
5
+ from datasets import load_dataset
6
 
7
+ # Load the model and tokenizer
8
  model_name = "google/flan-t5-xl"
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
 
12
+ # Load the Gita dataset
13
+ ds = load_dataset("knowrohit07/gita_dataset")
14
+ chapters = ds['train']['Chapter']
15
+ sentence_ranges = ds['train']['sentence_range']
16
+ texts = ds['train']['Text']
17
+
18
+ # Load a sentence transformer model for semantic search
19
+ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
20
+
21
+ # Encode all texts for faster similarity search
22
+ text_embeddings = sentence_model.encode(texts, convert_to_tensor=True)
23
+
24
+ def find_relevant_texts(query, top_k=3):
25
+ query_embedding = sentence_model.encode(query, convert_to_tensor=True)
26
+ cos_scores = util.cos_sim(query_embedding, text_embeddings)[0]
27
+ top_results = torch.topk(cos_scores, k=top_k)
28
+
29
+ relevant_texts = []
30
+ for score, idx in zip(top_results[0], top_results[1]):
31
+ relevant_texts.append(f"Chapter {chapters[idx]}, Verses {sentence_ranges[idx]}: {texts[idx]}")
32
+
33
+ return "\n\n".join(relevant_texts)
34
 
35
  def generate_response(question):
36
+ relevant_texts = find_relevant_texts(question)
37
+
38
+ prompt = f"""Based on the following excerpts from the Bhagavad Gita, answer the question.
39
+
40
+ Relevant excerpts:
41
+ {relevant_texts}
42
+
43
+ Question: {question}
44
+
45
+ Answer:"""
46
+
47
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
48
  outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.95)
49
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
+
51
  return response
52
 
53
  iface = gr.Interface(