Installing Libraries

Make sure these libraries are installed correctly.

  • pip install -q sentencepiece
  • pip install -q transformers
  • pip install -q accelerate
  • pip install --upgrade -q bitsandbytes

import torch
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

model_path = "Neurai/llama7b"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    "Neurai/llama7b",
    # load_in_8bit=True,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    device_map="auto",
    )
model.eval()
print('model loaded')

SYS_PROMPT = "زرافه چند سال عمر میکند؟"

def response_generate(input_prompt):
    input_ids = tokenizer(input_prompt, return_tensors="pt")
    outputs = model.generate(
        inputs=input_ids["input_ids"].to("cuda"),
        attention_mask=input_ids["attention_mask"].to("cuda"),
        do_sample=True,
        temperature=0.3,
        top_k=50, 
        top_p=0.9,
        max_new_tokens=512,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return response

print(response_generate(f"{SYS_PROMPT}"))
Downloads last month
10
Safetensors
Model size
6.9B params
Tensor type
F32
·
BF16
·
I8
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.