Spaces:

DarkAngel
/

BhagavadGita-LLama8b

Runtime error

App Files Files Community

DarkAngel commited on Dec 30, 2024

Commit

b2f4773

verified ·

1 Parent(s): bf11651

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -1,12 +1,17 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 from peft import PeftModel
-# Use a smaller model to reduce memory usage
-base_model = AutoModelForCausalLM.from_pretrained("meta/llama-2-7b-hf")  # Smaller model
 model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
-tokenizer = AutoTokenizer.from_pretrained("meta/llama-2-7b-hf")  # Use the tokenizer for the smaller model
 def generate_response(shloka, transliteration):
     """
     Generates the response using the fine-tuned LLaMA model.
@@ -17,14 +22,14 @@ def generate_response(shloka, transliteration):
             "content": f"Shloka: {shloka} Transliteration: {transliteration}"
         }
     ]
     inputs = tokenizer.apply_chat_template(
         input_message,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt"
-    ).to("cpu")  # Ensure CPU usage
     text_streamer = TextStreamer(tokenizer, skip_prompt=True)
     generated_tokens = model.generate(
         input_ids=inputs,
@@ -36,7 +41,6 @@ def generate_response(shloka, transliteration):
     )
     raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
     try:
         sections = raw_response.split("Hindi Meaning:")
         english_meaning = sections[0].strip()
@@ -44,17 +48,19 @@ def generate_response(shloka, transliteration):
         hindi_meaning = hindi_and_word[0].strip()
         word_meaning = hindi_and_word[1].strip()
         formatted_response = (
             f"English Meaning:\n{english_meaning}\n\n"
             f"Hindi Meaning:\n{hindi_meaning}\n\n"
             f"Word Meaning:\n{word_meaning}"
         )
     except IndexError:
         formatted_response = raw_response
     return formatted_response
-# Gradio interface
 interface = gr.Interface(
     fn=generate_response,
     inputs=[
@@ -68,4 +74,4 @@ interface = gr.Interface(
 # Launch the interface
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
+from unsloth import FastLanguageModel
+from transformers import TextStreamer
+# Load the fine-tuned model and tokenizer
+# model, tokenizer = FastLanguageModel.from_pretrained("lora_model")
 from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
 model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
+tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
+tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
 def generate_response(shloka, transliteration):
     """
     Generates the response using the fine-tuned LLaMA model.
             "content": f"Shloka: {shloka} Transliteration: {transliteration}"
         }
     ]
     inputs = tokenizer.apply_chat_template(
         input_message,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt"
+    ).to("cpu")
+    # Generate response
     text_streamer = TextStreamer(tokenizer, skip_prompt=True)
     generated_tokens = model.generate(
         input_ids=inputs,
     )
     raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
     try:
         sections = raw_response.split("Hindi Meaning:")
         english_meaning = sections[0].strip()
         hindi_meaning = hindi_and_word[0].strip()
         word_meaning = hindi_and_word[1].strip()
         formatted_response = (
             f"English Meaning:\n{english_meaning}\n\n"
             f"Hindi Meaning:\n{hindi_meaning}\n\n"
             f"Word Meaning:\n{word_meaning}"
         )
     except IndexError:
         formatted_response = raw_response
     return formatted_response
 interface = gr.Interface(
     fn=generate_response,
     inputs=[
 # Launch the interface
 if __name__ == "__main__":
+    interface.launch()