Spaces:

Tonic
/

trinity

Runtime error

crazyTransBitch

by captainkyd - opened Feb 16

←

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,8 @@ import spaces
 import gradio as gr
 import torch
 import transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import os
 title = """# Welcome to 🌟Tonic's🐇🥷🏻Trinity
@@ -23,6 +24,11 @@ Answer the Question by exploring multiple reasoning paths as follows:
 - Please note that while the focus is on the final answer in the response, it should also include intermediate thoughts inline to illustrate the deliberative reasoning process.
 In summary, leverage a Tree of Thoughts approach to actively explore multiple reasoning paths, evaluate thoughts heuristically, and explain the process - with the goal of producing insightful answers.
 """
 model_path = "WhiteRabbitNeo/Trinity-13B"
@@ -32,10 +38,9 @@ if not hf_token:
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    load_in_8bit=True,
-    trust_remote_code=True,
 )
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

 import gradio as gr
 import torch
 import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+import accelerate
 import os
 title = """# Welcome to 🌟Tonic's🐇🥷🏻Trinity
 - Please note that while the focus is on the final answer in the response, it should also include intermediate thoughts inline to illustrate the deliberative reasoning process.
 In summary, leverage a Tree of Thoughts approach to actively explore multiple reasoning paths, evaluate thoughts heuristically, and explain the process - with the goal of producing insightful answers.
 """
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
 model_path = "WhiteRabbitNeo/Trinity-13B"
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
+     device_map="auto",
+     trust_remote_code=True,
+     quantization_config=quantization_config
 )
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)