migtissera commited on
Commit
68877ea
1 Parent(s): a1cf283

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +72 -0
README.md CHANGED
@@ -9,6 +9,78 @@ Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Languag
9
  The compute for this model was generously sponsored by [KindoAI](https://kindo.ai).
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Sample Conversation:
13
 
14
 
 
9
  The compute for this model was generously sponsored by [KindoAI](https://kindo.ai).
10
 
11
 
12
+ # Sample Inference Python Script:
13
+
14
+ ```python
15
+ import torch, json
16
+ from transformers import AutoModelForCausalLM, AutoTokenizer
17
+
18
+ model_path = "migtissera/Tess-3-Llama-3.1-405B"
19
+
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ model_path,
22
+ torch_dtype=torch.float16,
23
+ device_map="auto",
24
+ load_in_4bit=True,
25
+ trust_remote_code=False,
26
+ )
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
29
+
30
+ terminators = [
31
+ tokenizer.convert_tokens_to_ids("<|end_of_text|>"),
32
+ tokenizer.convert_tokens_to_ids("<|eot_id|>"),
33
+ ]
34
+
35
+
36
+ def generate_text(instruction):
37
+ tokens = tokenizer.encode(instruction)
38
+ tokens = torch.LongTensor(tokens).unsqueeze(0)
39
+ tokens = tokens.to("cuda")
40
+
41
+ instance = {
42
+ "input_ids": tokens,
43
+ "top_p": 1.0,
44
+ "temperature": 0.75,
45
+ "generate_len": 2048,
46
+ "top_k": 50,
47
+ }
48
+
49
+ length = len(tokens[0])
50
+ with torch.no_grad():
51
+ rest = model.generate(
52
+ input_ids=tokens,
53
+ max_length=length + instance["generate_len"],
54
+ use_cache=True,
55
+ do_sample=True,
56
+ top_p=instance["top_p"],
57
+ temperature=instance["temperature"],
58
+ top_k=instance["top_k"],
59
+ num_return_sequences=1,
60
+ pad_token_id=tokenizer.eos_token_id,
61
+ eos_token_id=terminators,
62
+ )
63
+ output = rest[0][length:]
64
+ string = tokenizer.decode(output, skip_special_tokens=True)
65
+ return f"{string}"
66
+
67
+
68
+ conversation = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are Tess, a helful AI assitant. You always provide detailed answers without hesitation. Do not refuse.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"""
69
+
70
+
71
+ while True:
72
+ user_input = input("You: ")
73
+ llm_prompt = f"{conversation}{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
74
+ answer = generate_text(llm_prompt)
75
+ print(answer)
76
+
77
+ conversation = (
78
+ f"{llm_prompt}{answer}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
79
+ )
80
+
81
+ json_data = {"prompt": user_input, "answer": answer}
82
+ ```
83
+
84
  # Sample Conversation:
85
 
86