FlawedLLM commited on
Commit
be551a6
1 Parent(s): 42fc25c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -2,22 +2,23 @@ import re
2
  import spaces
3
  import gradio as gr
4
  import torch
 
5
  # from peft import PeftModel, PeftConfig
6
 
7
 
8
- # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
9
- # quantization_config = BitsAndBytesConfig(
10
- # load_in_4bit=True,
11
- # bnb_4bit_use_double_quant=True,
12
- # bnb_4bit_quant_type="nf4",
13
- # bnb_4bit_compute_dtype=torch.float16)
14
- # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/BhashiniLLM",
15
- # device_map="auto",
16
- # quantization_config=quantization_config,
17
- # torch_dtype =torch.float16,
18
- # low_cpu_mem_usage=True,
19
- # use_safetensors=True,
20
- # )
21
 
22
  # # Assuming you have your HF repository in this format: "your_username/your_model_name"
23
  # model_id = "FlawedLLM/BhashiniLLM"
@@ -58,10 +59,10 @@ import torch
58
  # # torch_dtype =torch.float16
59
  # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",config=config, ignore_mismatched_sizes=True).to('cuda')
60
  # Load model directly
61
- from transformers import AutoTokenizer, AutoModelForCausalLM
62
 
63
- tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_00")
64
- model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_00").to('cuda')
 
65
 
66
  @spaces.GPU(duration=300)
67
  def chunk_it(input_command):
 
2
  import spaces
3
  import gradio as gr
4
  import torch
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
6
  # from peft import PeftModel, PeftConfig
7
 
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_00")
10
+ quantization_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_use_double_quant=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_compute_dtype=torch.float16)
15
+ model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_00",
16
+ device_map="auto",
17
+ quantization_config=quantization_config,
18
+ torch_dtype =torch.float16,
19
+ low_cpu_mem_usage=True,
20
+ use_safetensors=True,
21
+ )
22
 
23
  # # Assuming you have your HF repository in this format: "your_username/your_model_name"
24
  # model_id = "FlawedLLM/BhashiniLLM"
 
59
  # # torch_dtype =torch.float16
60
  # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",config=config, ignore_mismatched_sizes=True).to('cuda')
61
  # Load model directly
 
62
 
63
+
64
+ # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_00")
65
+ # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_00").to('cuda')
66
 
67
  @spaces.GPU(duration=300)
68
  def chunk_it(input_command):