RyanMullins commited on
Commit
2b6fecd
1 Parent(s): dcd31e5

Update to Transformers v4.46.0

Browse files
Files changed (2) hide show
  1. app.py +33 -21
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,12 +1,18 @@
1
  from collections.abc import Sequence
2
- import json
3
  import random
4
- from typing import Optional, Tuple
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
- import transformers
 
 
 
 
 
 
 
10
 
11
  # If the watewrmark is not detected, consider the use case. Could be because of
12
  # the nature of the task (e.g., fatcual responses are lower entropy) or it could
@@ -15,7 +21,7 @@ import transformers
15
  _MODEL_IDENTIFIER = 'google/gemma-2b-it'
16
  _DETECTOR_IDENTIFIER = 'google/synthid-spaces-demo-detector'
17
 
18
- _PROMPTS: tuple[str] = (
19
  'Write an essay about my pets, a cat named Mika and a dog named Cleo.',
20
  'Tell me everything you can about Portugal.',
21
  'What is Hugging Face?',
@@ -24,7 +30,7 @@ _PROMPTS: tuple[str] = (
24
  _TORCH_DEVICE = (
25
  torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
26
  )
27
- _ANSWERS: list[tuple[str, str]] = []
28
 
29
  _WATERMARK_CONFIG_DICT = dict(
30
  ngram_len=5,
@@ -65,27 +71,27 @@ _WATERMARK_CONFIG_DICT = dict(
65
  context_history_size=1024,
66
  )
67
 
68
- _WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
69
  **_WATERMARK_CONFIG_DICT
70
  )
71
 
72
- tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER, padding_side="left")
 
 
73
  tokenizer.pad_token_id = tokenizer.eos_token_id
74
 
75
- model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
76
  model.to(_TORCH_DEVICE)
77
 
78
- logits_processor = transformers.generation.SynthIDTextWatermarkLogitsProcessor(
79
  **_WATERMARK_CONFIG_DICT,
80
  device=_TORCH_DEVICE,
81
  )
82
 
83
- detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
84
- _DETECTOR_IDENTIFIER,
85
- )
86
  detector_module.to(_TORCH_DEVICE)
87
 
88
- detector = transformers.generation.watermarking.SynthIDTextWatermarkDetector(
89
  detector_module=detector_module,
90
  logits_processor=logits_processor,
91
  tokenizer=tokenizer,
@@ -94,12 +100,12 @@ detector = transformers.generation.watermarking.SynthIDTextWatermarkDetector(
94
 
95
  @spaces.GPU
96
  def generate_outputs(
97
- prompts: Sequence[str],
98
- watermarking_config: Optional[
99
- transformers.generation.SynthIDTextWatermarkingConfig
100
- ] = None,
101
  ) -> Tuple[Sequence[str], torch.Tensor]:
102
- tokenized_prompts = tokenizer(prompts, return_tensors='pt', padding="longest").to(_TORCH_DEVICE)
 
 
103
  input_length = tokenized_prompts.input_ids.shape[1]
104
  output_sequences = model.generate(
105
  **tokenized_prompts,
@@ -110,8 +116,10 @@ def generate_outputs(
110
  )
111
  output_sequences = output_sequences[:, input_length:]
112
  detections = detector(output_sequences)
113
- print(detections)
114
- return (tokenizer.batch_decode(output_sequences, skip_special_tokens=True), detections)
 
 
115
 
116
 
117
  with gr.Blocks() as demo:
@@ -163,7 +171,11 @@ with gr.Blocks() as demo:
163
  ```json
164
  {
165
  "ngram_len": 5,
166
- "keys": [654, 400, 836, 123, 340, 443, 597, 160, 57,29, 590, 639, 13,715, 468, 990, 966, 226, 324, 585, 118, 504, 421, 521, 129, 669, 732, 225, 90, 960],
 
 
 
 
167
  "sampling_table_size": 65536,
168
  "sampling_table_seed": 0,
169
  "context_history_size": 1024
 
1
  from collections.abc import Sequence
 
2
  import random
3
+ from typing import Optional, List, Tuple
4
 
5
  import gradio as gr
6
  import spaces
7
  import torch
8
+ from transformers import (
9
+ AutoModelForCausalLM,
10
+ AutoTokenizer,
11
+ BayesianDetectorModel,
12
+ SynthIDTextWatermarkingConfig,
13
+ SynthIDTextWatermarkDetector,
14
+ SynthIDTextWatermarkLogitsProcessor,
15
+ )
16
 
17
  # If the watewrmark is not detected, consider the use case. Could be because of
18
  # the nature of the task (e.g., fatcual responses are lower entropy) or it could
 
21
  _MODEL_IDENTIFIER = 'google/gemma-2b-it'
22
  _DETECTOR_IDENTIFIER = 'google/synthid-spaces-demo-detector'
23
 
24
+ _PROMPTS: Tuple[str] = (
25
  'Write an essay about my pets, a cat named Mika and a dog named Cleo.',
26
  'Tell me everything you can about Portugal.',
27
  'What is Hugging Face?',
 
30
  _TORCH_DEVICE = (
31
  torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
32
  )
33
+ _ANSWERS: List[Tuple[str, str]] = []
34
 
35
  _WATERMARK_CONFIG_DICT = dict(
36
  ngram_len=5,
 
71
  context_history_size=1024,
72
  )
73
 
74
+ _WATERMARK_CONFIG = SynthIDTextWatermarkingConfig(
75
  **_WATERMARK_CONFIG_DICT
76
  )
77
 
78
+ tokenizer = AutoTokenizer.from_pretrained(
79
+ _MODEL_IDENTIFIER, padding_side="left"
80
+ )
81
  tokenizer.pad_token_id = tokenizer.eos_token_id
82
 
83
+ model = AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
84
  model.to(_TORCH_DEVICE)
85
 
86
+ logits_processor = SynthIDTextWatermarkLogitsProcessor(
87
  **_WATERMARK_CONFIG_DICT,
88
  device=_TORCH_DEVICE,
89
  )
90
 
91
+ detector_module = BayesianDetectorModel.from_pretrained(_DETECTOR_IDENTIFIER)
 
 
92
  detector_module.to(_TORCH_DEVICE)
93
 
94
+ detector = SynthIDTextWatermarkDetector(
95
  detector_module=detector_module,
96
  logits_processor=logits_processor,
97
  tokenizer=tokenizer,
 
100
 
101
  @spaces.GPU
102
  def generate_outputs(
103
+ prompts: Sequence[str],
104
+ watermarking_config: Optional[SynthIDTextWatermarkingConfig] = None,
 
 
105
  ) -> Tuple[Sequence[str], torch.Tensor]:
106
+ tokenized_prompts = tokenizer(
107
+ prompts, return_tensors='pt', padding="longest"
108
+ ).to(_TORCH_DEVICE)
109
  input_length = tokenized_prompts.input_ids.shape[1]
110
  output_sequences = model.generate(
111
  **tokenized_prompts,
 
116
  )
117
  output_sequences = output_sequences[:, input_length:]
118
  detections = detector(output_sequences)
119
+ return (
120
+ tokenizer.batch_decode(output_sequences, skip_special_tokens=True),
121
+ detections
122
+ )
123
 
124
 
125
  with gr.Blocks() as demo:
 
171
  ```json
172
  {
173
  "ngram_len": 5,
174
+ "keys": [
175
+ 654, 400, 836, 123, 340, 443, 597, 160, 57, 29,
176
+ 590, 639, 13, 715, 468, 990, 966, 226, 324, 585,
177
+ 118, 504, 421, 521, 129, 669, 732, 225, 90, 960
178
+ ],
179
  "sampling_table_size": 65536,
180
  "sampling_table_seed": 0,
181
  "context_history_size": 1024
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio
2
  spaces
3
- transformers @ git+https://github.com/sumedhghaisas2/transformers_private
4
 
5
  --extra-index-url https://download.pytorch.org/whl/cu113
6
  torch
 
1
  gradio
2
  spaces
3
+ transformers>=4.46.0
4
 
5
  --extra-index-url https://download.pytorch.org/whl/cu113
6
  torch