llmixer commited on
Commit
dc13cd1
1 Parent(s): e47221b

Added control vectors parameter and wrapper

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. exl2_wrapper.py +85 -0
  3. test_inference.py +5 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
exl2_wrapper.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import torch
3
+ from gguf.gguf_reader import GGUFReader
4
+
5
+ class ExLlamaV2ModuleWrapper:
6
+ @classmethod
7
+ def wrap(cls, model, vector_configs):
8
+ vectors = {}
9
+ for file in glob.glob(str(model.config.model_dir) + '-vectors/*.gguf'):
10
+ base = file.rsplit('-', 1)[-1].replace('.gguf', '')
11
+ vector, direction = base.split('__')
12
+ print(f"Loaded control vector: {vector}, Direction: {direction}")
13
+ reader = GGUFReader(file)
14
+
15
+ if reader.tensors[0].n_elements != model.config.hidden_size:
16
+ print(' ## Control vector n_elements ({reader.tensors[0].n_elements}) != model.config.hidden_size ({model.config.hidden_size})')
17
+ sys.exit()
18
+ layers = torch.zeros((model.config.num_hidden_layers, model.config.hidden_size), dtype=torch.float32)
19
+ for tensor in reader.tensors:
20
+ idx = int(tensor.name.split('.')[-1])
21
+ layers[idx] = torch.from_numpy(tensor.data.copy())
22
+ vectors.setdefault(vector, {})[direction] = layers
23
+
24
+ vector_configs = vector_configs.split(',')
25
+ control_vector = torch.zeros((model.config.num_hidden_layers, model.config.hidden_size), dtype=torch.float32)
26
+ for vector_config in vector_configs:
27
+ (vector, direction, weight) = vector_config.split(':')
28
+ vector_dirs = None
29
+ for k, v in vectors.items():
30
+ if vector in k:
31
+ vector = k
32
+ vector_dirs = v
33
+ break
34
+ if vector_dirs is None:
35
+ print(' !! Error: No vector for "%s" (%s)' % (vector, vector_config))
36
+ continue
37
+
38
+ debias_layers = vector_dirs.get('debias', None)
39
+ if debias_layers is None:
40
+ print(' !! Error: No debias for "%s" (%s)' % (vector, vector_config))
41
+ continue
42
+
43
+ direction_layers = vector_dirs.get(direction, None)
44
+ if direction_layers is None:
45
+ print(' !! Error: No "%s" for "%s" (%s)' % (direction, vector, vector_config))
46
+ continue
47
+
48
+ try:
49
+ weight = float(weight)
50
+ except Exception as e:
51
+ print(' !! Non float weight %s (%s)' % (weight, vector_config))
52
+ weight = 1.0
53
+
54
+ print(' -- Applying %s debias and %s * %s' % (vector, direction, weight))
55
+ control_vector += debias_layers
56
+ control_vector += direction_layers * weight
57
+
58
+
59
+ for idx, module in enumerate(model.modules):
60
+ if idx == 0 or idx >= (len(model.modules) - 2) or module.name != 'MLP':
61
+ continue
62
+ model.modules[idx] = ExLlamaV2ModuleWrapper(module, control_vector)
63
+
64
+ def __init__(self, module, control_vector):
65
+ self.module = module
66
+ self.control_vector = control_vector
67
+
68
+ def __getattribute__(self, name):
69
+ if name == 'forward':
70
+ return object.__getattribute__(self, 'wrapped_forward')
71
+
72
+ try:
73
+ return getattr(object.__getattribute__(self, 'module'), name)
74
+ except AttributeError:
75
+ pass
76
+ return object.__getattribute__(self, name)
77
+
78
+ def wrapped_forward(self, *args, **kwargs):
79
+ x = self.module.forward(*args, **kwargs)
80
+ try:
81
+ x += self.control_vector[self.module.layer_idx].clone().to(x.device)
82
+ except IndexError:
83
+ pass
84
+ return x
85
+
test_inference.py CHANGED
@@ -64,6 +64,7 @@ parser.add_argument("-sl", "--stream_layers", action = "store_true", help = "Loa
64
  parser.add_argument("-sp", "--standard_perplexity", choices = ["wiki2"], help = "Run standard (HF) perplexity test, stride 512 (experimental)")
65
  parser.add_argument("-rr", "--rank_reduce", type = str, help = "Rank-reduction for MLP layers of model, in reverse order (for experimentation)")
66
  parser.add_argument("-mol", "--max_output_len", type = int, help = "Set max output chunk size (incompatible with ppl tests)")
 
67
 
68
  # Initialize model and tokenizer
69
 
@@ -107,6 +108,10 @@ model, tokenizer = model_init.init(
107
  )
108
  cache = None
109
 
 
 
 
 
110
  # Auto split
111
 
112
  if not model.loaded and not args.stream_layers:
 
64
  parser.add_argument("-sp", "--standard_perplexity", choices = ["wiki2"], help = "Run standard (HF) perplexity test, stride 512 (experimental)")
65
  parser.add_argument("-rr", "--rank_reduce", type = str, help = "Rank-reduction for MLP layers of model, in reverse order (for experimentation)")
66
  parser.add_argument("-mol", "--max_output_len", type = int, help = "Set max output chunk size (incompatible with ppl tests)")
67
+ parser.add_argument("-cv", "--control_vectors", type = str, help = "List of control vectors to apply. Format: topic:direction:weight, e.g. -cv language:simple:0.5")
68
 
69
  # Initialize model and tokenizer
70
 
 
108
  )
109
  cache = None
110
 
111
+ if args.control_vectors:
112
+ from exl2_wrapper import ExLlamaV2ModuleWrapper
113
+ ExLlamaV2ModuleWrapper.wrap(model, args.control_vectors)
114
+
115
  # Auto split
116
 
117
  if not model.loaded and not args.stream_layers: