lokinfey commited on
Commit
0f309dd
·
verified ·
1 Parent(s): 42a92be

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +75 -3
README.md CHANGED
@@ -1,3 +1,75 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+
5
+ # **ChatGLM-4-onnx-cpu-int4**
6
+
7
+ <b><ul>Note: This is unoffical version,just for test and dev.</ul></b>
8
+
9
+ This is the ONNX format INT4 quantized version of the glm-4-9b.
10
+
11
+ 1. Install
12
+
13
+ ```bash
14
+
15
+ pip install torch transformers onnx onnxruntime
16
+
17
+ pip install --pre onnxruntime-genai
18
+
19
+ ```
20
+ 2. Sample
21
+
22
+ ```bash
23
+
24
+ import onnxruntime_genai as og
25
+ import numpy as np
26
+ import os
27
+
28
+
29
+ model_folder = ".\chatglm-onnx\model"
30
+
31
+
32
+ model = og.Model(model_folder)
33
+
34
+
35
+ tokenizer = og.Tokenizer(model)
36
+ tokenizer_stream = tokenizer.create_stream()
37
+
38
+
39
+ search_options = {}
40
+ search_options['max_length'] = 2048
41
+ search_options['past_present_share_buffer'] = False
42
+
43
+
44
+ chat_template = "<|user|>{input}<|assistant|>"
45
+
46
+
47
+ text = """介绍一下华南师范大学?"""
48
+
49
+
50
+ prompt = f'{chat_template.format(input=text)}'
51
+
52
+
53
+ input_tokens = tokenizer.encode(prompt)
54
+
55
+
56
+ params = og.GeneratorParams(model)
57
+
58
+
59
+ params.set_search_options(**search_options)
60
+ params.input_ids = input_tokens
61
+
62
+
63
+ generator = og.Generator(model, params)
64
+
65
+
66
+ while not generator.is_done():
67
+ generator.compute_logits()
68
+ generator.generate_next_token()
69
+
70
+ new_token = generator.get_next_tokens()[0]
71
+ print(tokenizer_stream.decode(new_token), end='', flush=True)
72
+
73
+
74
+ ```
75
+