Spaces:
Runtime error
Runtime error
vaishakgkumar
commited on
Commit
•
2241485
1
Parent(s):
443561d
Update app.py
Browse files
app.py
CHANGED
@@ -11,44 +11,50 @@ from huggingface_hub import login
|
|
11 |
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
|
12 |
login(hf_token)
|
13 |
|
14 |
-
# Define the device
|
15 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
|
17 |
# Load tokenizer and model
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
model
|
|
|
|
|
|
|
24 |
|
25 |
class ChatBot:
|
26 |
def __init__(self):
|
27 |
self.history = []
|
28 |
|
29 |
-
def predict(self, user_input, system_prompt="You are an expert analyst
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
)
|
44 |
-
|
45 |
-
#
|
46 |
-
|
47 |
-
|
48 |
-
# Free up memory
|
49 |
-
del tokens
|
50 |
-
torch.cuda.empty_cache()
|
51 |
|
|
|
|
|
52 |
return response_text
|
53 |
|
54 |
bot = ChatBot()
|
|
|
11 |
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
|
12 |
login(hf_token)
|
13 |
|
|
|
|
|
14 |
|
15 |
# Load tokenizer and model
|
16 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
+
|
18 |
+
# Use the base model's ID
|
19 |
+
base_model_id = "stabilityai/stablelm-3b-4e1t"
|
20 |
+
model_directory = "vaishakgkumar/stablemedv1"
|
21 |
+
|
22 |
+
# Instantiate the Tokenizer
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
|
24 |
+
# tokenizer = AutoTokenizer.from_pretrained("Tonic/stablemed", trust_remote_code=True, padding_side="left")
|
25 |
+
tokenizer.pad_token = tokenizer.eos_token
|
26 |
+
tokenizer.padding_side = 'left'
|
27 |
|
28 |
+
# Load the PEFT model
|
29 |
+
peft_config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1", token=hf_token)
|
30 |
+
peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
|
31 |
+
peft_model = PeftModel.from_pretrained(peft_model, "vaishakgkumar/stablemedv1", token=hf_token)
|
32 |
|
33 |
class ChatBot:
|
34 |
def __init__(self):
|
35 |
self.history = []
|
36 |
|
37 |
+
def predict(self, user_input, system_prompt="You are an expert medical analyst:"):
|
38 |
+
# Combine user input and system prompt
|
39 |
+
formatted_input = f"{user_input}{system_prompt}"
|
40 |
+
|
41 |
+
# Encode user input
|
42 |
+
user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
|
43 |
+
|
44 |
+
# Concatenate the user input with chat history
|
45 |
+
if len(self.history) > 0:
|
46 |
+
chat_history_ids = torch.cat([self.history, user_input_ids], dim=-1)
|
47 |
+
else:
|
48 |
+
chat_history_ids = user_input_ids
|
49 |
+
|
50 |
+
# Generate a response using the PEFT model
|
51 |
+
response = peft_model.generate(input_ids=chat_history_ids, max_length=1200, pad_token_id=tokenizer.eos_token_id)
|
52 |
+
|
53 |
+
# Update chat history
|
54 |
+
self.history = chat_history_ids
|
|
|
|
|
|
|
|
|
55 |
|
56 |
+
# Decode and return the response
|
57 |
+
response_text = tokenizer.decode(response[0], skip_special_tokens=True)
|
58 |
return response_text
|
59 |
|
60 |
bot = ChatBot()
|