Ahanaas commited on
Commit
cc412dc
·
verified ·
1 Parent(s): b1b22ff

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +99 -3
README.md CHANGED
@@ -1,3 +1,99 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - en
5
+ base_model:
6
+ - NousResearch/Hermes-3-Llama-3.1-8B
7
+ ---
8
+
9
+ ## Inf
10
+
11
+ ```py
12
+ !git clone https://github.com/huggingface/transformers.git
13
+ %cd transformers
14
+ !git checkout <commit_id_for_4.47.0.dev0>
15
+ !pip install .
16
+ !pip install -q accelerate==0.34.2 bitsandbytes==0.44.1 peft==0.13.1
17
+ ```
18
+ #### Importing libs
19
+
20
+ ```py
21
+ import os
22
+ import torch
23
+ from datasets import load_dataset
24
+ from transformers import (
25
+ AutoModelForCausalLM,
26
+ AutoTokenizer,
27
+ BitsAndBytesConfig,
28
+ pipeline,
29
+ logging,
30
+ )
31
+ ```
32
+
33
+ #### Bits&Bytes Config
34
+
35
+ ```py
36
+ use_4bit = True
37
+
38
+ # Compute dtype for 4-bit base models
39
+ bnb_4bit_compute_dtype = "float16"
40
+
41
+ # Quantization type (fp4 or nf4)
42
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
43
+
44
+ use_nested_quant = False
45
+
46
+ bnb_4bit_quant_type = "nf4"
47
+ bnb_config = BitsAndBytesConfig(
48
+ load_in_4bit=use_4bit,
49
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
50
+ bnb_4bit_compute_dtype=compute_dtype,
51
+ bnb_4bit_use_double_quant=use_nested_quant,
52
+ )
53
+ ```
54
+ #### Loading Model
55
+
56
+ ```py
57
+ # Load base model
58
+ model_name = 'Ahanaas/HermesWithYou'
59
+ model = AutoModelForCausalLM.from_pretrained(
60
+ model_name,
61
+ quantization_config=bnb_config,
62
+ device_map=0
63
+ )
64
+ ```
65
+
66
+ #### Loading Tokenizer
67
+ ```py
68
+ # Load tokenizer
69
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
70
+ tokenizer.pad_token = tokenizer.eos_token
71
+ tokenizer.padding_side = "right"
72
+ ```
73
+
74
+ # Predictions
75
+
76
+ ```py
77
+ # Run text generation pipeline with our next model
78
+ system_prompt = ''''''
79
+ prompt = ''''''
80
+
81
+ pipe = pipeline(
82
+ task="text-generation",
83
+ model=model,
84
+ tokenizer=tokenizer,
85
+ max_new_tokens=128, # Increase this to allow for longer outputs
86
+ temperature=0.5, # Encourages more varied outputs
87
+ top_k=50, # Limits to the top 50 tokens
88
+ do_sample=True, # Enables sampling
89
+ return_full_text=True,
90
+ )
91
+
92
+ result = pipe(f"<|im_start|>system\n {system_prompt}\n<|im_end|>\n<|im_start|>user\n{prompt}\n<|im_end|>\n<|im_start|>assistant\n")
93
+ # print(result[0]['generated_text'])
94
+ generated_text = result[0]['generated_text']
95
+
96
+ # Print the extracted response text
97
+ print(generated_text)
98
+ ```
99
+