File size: 3,391 Bytes
fe1ad0f c1ec7d4 cae450a fe1ad0f c1ec7d4 7c4cfa7 c1ec7d4 7c4cfa7 b1db2e4 fe1ad0f d4fef3a b1db2e4 d4fef3a b1db2e4 d4fef3a b1db2e4 d4fef3a b1db2e4 d4fef3a b1db2e4 405bbd1 b1db2e4 d4fef3a b1db2e4 d4fef3a 9d965a7 d4fef3a b1db2e4 d4fef3a c1ec7d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
---
tags:
- autotrain
- meta-llama
- meta-llama/Llama-2-7b-hf
inference: true
widget:
- text: >
instruction: "If you are a doctor, please answer the medical questions based
on the patient's description."
input: "Hi, I had a subarachnoid bleed and coiling of brain aneurysm last
year. I am having some major bilateral temple pain along with numbness that
comes and goes in my left arm/hand/fingers. I have had headaches since the
aneurysm, but this is different. Also, my moods have been horrible for the
past few weeks."
response: ''
library_name: peft
pipeline_tag: text-generation
---
```python
import transformers
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from torch import cuda, bfloat16
base_model_id = 'meta-llama/Llama-2-7b-chat-hf'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
bnb_config = transformers.BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type='nf4',
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=bfloat16
)
hf_auth = "your-huggingface-access-token"
model_config = transformers.AutoConfig.from_pretrained(
base_model_id,
use_auth_token=hf_auth
)
model = transformers.AutoModelForCausalLM.from_pretrained(
base_model_id,
trust_remote_code=True,
config=model_config,
quantization_config=bnb_config,
device_map='auto',
use_auth_token=hf_auth
)
config = PeftConfig.from_pretrained("Ashishkr/llama2_medical_consultation")
model = PeftModel.from_pretrained(model, "Ashishkr/llama2_medical_consultation").to(device)
model.eval()
print(f"Model loaded on {device}")
tokenizer = transformers.AutoTokenizer.from_pretrained(
base_model_id,
use_auth_token=hf_auth
)
```
```python
def llama_generate(
model: AutoModelForCausalLM,
tokenizer: AutoTokenizer,
prompt: str,
max_new_tokens: int = 128,
temperature: float = 0.92):
inputs = tokenizer(
[prompt],
return_tensors="pt",
return_token_type_ids=False,
).to(
device
)
# Check if bfloat16 is supported, otherwise use float16
dtype_to_use = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
with torch.autocast("cuda", dtype=dtype_to_use):
response = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
return_dict_in_generate=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
decoded_output = tokenizer.decode(
response["sequences"][0],
skip_special_tokens=True,
)
return decoded_output[len(prompt) :]
prompt = """
instruction: "If you are a doctor, please answer the medical questions based on the patient's description." \n
input: "Hi, I had a subarachnoid bleed and coiling of brain aneurysm last year.
I am having some major bilateral temple pain along with numbness that comes and
goes in my left arm/hand/fingers. I have had headaches since the aneurysm,
but this is different. Also, my moods have been horrible for the past few weeks.\n
response: """
# You can use the function as before
response = llama_generate(
model,
tokenizer,
prompt,
max_new_tokens=100,
temperature=0.92,
)
print(response)
``` |