Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Sleeping

App Files Files Community

VanguardAI commited on Jun 10, 2024

Commit

55f4e8e

verified ·

1 Parent(s): 8933f51

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -35

app.py CHANGED Viewed

@@ -1,26 +1,39 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from peft import LoraConfig, PeftModel
 import gradio as gr
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters")
-# Load base model
-base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit")
-# Apply LoRA adapters
-lora_config = LoraConfig(
     r=16,
     lora_alpha=16,
-    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",],
     lora_dropout=0,
     bias="none",
     task_type="CAUSAL_LM"
 )
-model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=lora_config)
-condition= '''
 ALWAYS provide output in a JSON format.
 '''
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@@ -34,6 +47,7 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
 ### Response:
 {}"""
 @spaces.GPU(duration=300)
 def chunk_it(inventory_list, user_input_text):
     inputs = tokenizer(
@@ -41,7 +55,7 @@ def chunk_it(inventory_list, user_input_text):
             alpaca_prompt.format(
                 '''
                 You will receive text input that you need to analyze to perform the following tasks:
                 transaction: Record the details of an item transaction.
                 last n days transactions: Retrieve transaction records for a specified time period.
                 view risk inventory: View inventory items based on a risk category.
@@ -49,33 +63,33 @@ def chunk_it(inventory_list, user_input_text):
                 new items: Add new items to the inventory.
                 report generation: Generate various inventory reports.
                 delete item: Delete an existing Item.
                 Required Parameters:
                 Each task requires specific parameters to execute correctly:
                 transaction:
-                    ItemName (string)
-                    ItemQt (quantity - integer)
-                    Type (string: "sale" or "purchase" or "return")
-                    ReorderPoint (integer)
                 last n days transactions:
-                    ItemName (string)
-                    Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
                 view risk inventory:
-                    RiskType (string: "overstock", "understock", or "Null" for all risk types)
                 view inventory:
-                    ItemName (string)
                 new items:
-                    ItemName (string)
-                    SellingPrice (number)
-                    CostPrice (number)
                 report generation:
-                    ItemName (string)
-                    Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
-                    ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)
                 The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
-                ''' + inventory_list +
                 '''
                 ALWAYS provide output in a JSON format.
                 ''',  # instruction
@@ -83,16 +97,14 @@ def chunk_it(inventory_list, user_input_text):
                 "",  # output - leave this blank for generation!
             )
         ], return_tensors="pt").to("cuda")
-    outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
     content = tokenizer.batch_decode(outputs, skip_special_tokens=True)
     return content[0]
-iface=gr.Interface(fn=chunk_it,
-                  inputs="text",
-                  outputs="text",
-                  title="Bhashini_LLaMa_LoRA",
-                  )
 iface = gr.Interface(
     fn=chunk_it,
     inputs=[
@@ -102,4 +114,5 @@ iface = gr.Interface(
     outputs="text",
     title="Formatter Pro",
 )
 iface.launch(inline=False)

 import torch
+import spaces
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from peft import LoraConfig, PeftModel, get_peft_model
 import gradio as gr
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters")
+# Configuration for 4-bit quantization
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
+# Load base model with quantization (replace 'your-username' if needed)
+base_model = AutoModelForCausalLM.from_pretrained(
+    "meta-llama/Meta-Llama-3-8B-Instruct", # Replace with actual base model
+    quantization_config=bnb_config,
+)
+# Apply LoRA adapters
+peft_config = LoraConfig(
     r=16,
     lora_alpha=16,
+    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
     lora_dropout=0,
     bias="none",
     task_type="CAUSAL_LM"
 )
+model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=peft_config)
+condition = '''
 ALWAYS provide output in a JSON format.
 '''
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Response:
 {}"""
 @spaces.GPU(duration=300)
 def chunk_it(inventory_list, user_input_text):
     inputs = tokenizer(
             alpaca_prompt.format(
                 '''
                 You will receive text input that you need to analyze to perform the following tasks:
                 transaction: Record the details of an item transaction.
                 last n days transactions: Retrieve transaction records for a specified time period.
                 view risk inventory: View inventory items based on a risk category.
                 new items: Add new items to the inventory.
                 report generation: Generate various inventory reports.
                 delete item: Delete an existing Item.
                 Required Parameters:
                 Each task requires specific parameters to execute correctly:
                 transaction:
+                  ItemName (string)
+                  ItemQt (quantity - integer)
+                  Type (string: "sale" or "purchase" or "return")
+                  ReorderPoint (integer)
                 last n days transactions:
+                  ItemName (string)
+                  Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
                 view risk inventory:
+                  RiskType (string: "overstock", "understock", or "Null" for all risk types)
                 view inventory:
+                  ItemName (string)
                 new items:
+                  ItemName (string)
+                  SellingPrice (number)
+                  CostPrice (number)
                 report generation:
+                  ItemName (string)
+                  Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
+                  ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)
                 The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
+                ''' + inventory_list +
                 '''
                 ALWAYS provide output in a JSON format.
                 ''',  # instruction
                 "",  # output - leave this blank for generation!
             )
         ], return_tensors="pt").to("cuda")
+    # Generation with a longer max_length and better sampling
+    outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
     content = tokenizer.batch_decode(outputs, skip_special_tokens=True)
     return content[0]
+# Interface for inputs
 iface = gr.Interface(
     fn=chunk_it,
     inputs=[
     outputs="text",
     title="Formatter Pro",
 )
 iface.launch(inline=False)