xu song commited on
Commit
c38b609
·
1 Parent(s): 8988bbf
Files changed (2) hide show
  1. log_util.py +12 -0
  2. models/cpp_qwen2.py +24 -18
log_util.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import logging
3
+
4
+ logging.basicConfig(
5
+ format='[%(asctime)s] [%(levelname)s] [%(process)d:%(thread)d] [%(filename)s:%(lineno)d:%(funcName)s] %(message)s',
6
+ level=logging.INFO,
7
+ datefmt="%Y-%m-%d %H:%M:%S",
8
+
9
+ )
10
+
11
+ logger = logging.getLogger(__name__)
12
+ logger.setLevel(logging.INFO)
models/cpp_qwen2.py CHANGED
@@ -15,34 +15,38 @@ python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-C
15
 
16
  """
17
 
 
18
  from simulator import Simulator
19
  import llama_cpp
20
  # import llama_cpp.llama_tokenizer
21
  from transformers import AutoTokenizer
 
22
 
23
 
24
  class Qwen2Simulator(Simulator):
25
 
26
- def __init__(self, model_name_or_path=None):
27
- self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Chat")
28
- self.llm = llama_cpp.Llama.from_pretrained(
29
- repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
30
- filename="*fp16.gguf",
31
- tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
32
- verbose=False,
33
- )
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  ### local
36
- # self.hf_tokenizer = AutoTokenizer.from_pretrained("/workspace/xusong/huggingface/models/Qwen2-0.5B-Chat/")
37
- # self.llm = Llama(
38
- # model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Chat-GGUF/qwen2-0_5b-chat-q8_0.gguf",
39
- # # model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Chat/Qwen2-0.5B-Chat-F16.gguf",
40
- # # n_gpu_layers=-1, # Uncomment to use GPU acceleration
41
- # # seed=1337, # Uncomment to set a specific seed
42
- # # n_ctx=2048, # Uncomment to increase the context window
43
- # tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
44
- # verbose=False,
45
- # )
46
 
47
 
48
  def generate_query(self, messages):
@@ -51,6 +55,7 @@ class Qwen2Simulator(Simulator):
51
  :return:
52
  """
53
  assert messages[-1]["role"] != "user"
 
54
  inputs = self.hf_tokenizer.apply_chat_template(
55
  messages,
56
  tokenize=False,
@@ -63,6 +68,7 @@ class Qwen2Simulator(Simulator):
63
 
64
  def generate_response(self, messages):
65
  assert messages[-1]["role"] == "user"
 
66
  inputs = self.hf_tokenizer.apply_chat_template(
67
  messages,
68
  tokenize=False,
 
15
 
16
  """
17
 
18
+ import json
19
  from simulator import Simulator
20
  import llama_cpp
21
  # import llama_cpp.llama_tokenizer
22
  from transformers import AutoTokenizer
23
+ from log_util import logger
24
 
25
 
26
  class Qwen2Simulator(Simulator):
27
 
28
+ def __init__(self, from_local=False):
29
+ if from_local:
30
+ self.hf_tokenizer = AutoTokenizer.from_pretrained("/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
31
+ self.llm = llama_cpp.Llama(
32
+ model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
33
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
34
+ verbose=False,
35
+ )
36
+ else:
37
+ self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
38
+ self.llm = llama_cpp.Llama.from_pretrained(
39
+ repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
40
+ filename="*fp16.gguf",
41
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
42
+ verbose=False,
43
+ )
44
+ logger.info(f"llm has been initialized: {self.llm}")
45
+ # warmup
46
+
47
 
48
  ### local
49
+
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  def generate_query(self, messages):
 
55
  :return:
56
  """
57
  assert messages[-1]["role"] != "user"
58
+ logger.info(f"generating {json.dumps(messages)}")
59
  inputs = self.hf_tokenizer.apply_chat_template(
60
  messages,
61
  tokenize=False,
 
68
 
69
  def generate_response(self, messages):
70
  assert messages[-1]["role"] == "user"
71
+ logger.info(f"generating {json.dumps(messages)}")
72
  inputs = self.hf_tokenizer.apply_chat_template(
73
  messages,
74
  tokenize=False,