RyanMullins commited on
Commit
80c639a
1 Parent(s): 2a04008

Committing in broken state for sharing with HF

Browse files
Files changed (2) hide show
  1. app.py +77 -6
  2. requirements.txt +1 -1
app.py CHANGED
@@ -11,7 +11,8 @@ import transformers
11
  # the nature of the task (e.g., fatcual responses are lower entropy) or it could
12
  # be another
13
 
14
- _MODEL_IDENTIFIER = 'hf-internal-testing/tiny-random-gpt2'
 
15
 
16
  _PROMPTS: tuple[str] = (
17
  'prompt 1',
@@ -25,7 +26,7 @@ _TORCH_DEVICE = (
25
  torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
26
  )
27
 
28
- _WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
29
  ngram_len=5,
30
  keys=[
31
  654,
@@ -64,12 +65,32 @@ _WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
64
  context_history_size=1024,
65
  )
66
 
 
 
 
67
 
68
  tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER)
69
  tokenizer.pad_token_id = tokenizer.eos_token_id
 
70
  model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
71
  model.to(_TORCH_DEVICE)
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  @spaces.GPU
75
  def generate_outputs(
@@ -86,10 +107,50 @@ def generate_outputs(
86
  max_length=500,
87
  top_k=40,
88
  )
 
 
89
  return tokenizer.batch_decode(output_sequences)
90
 
91
 
92
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  prompt_inputs = [
94
  gr.Textbox(value=prompt, lines=4, label='Prompt')
95
  for prompt in _PROMPTS
@@ -97,6 +158,11 @@ with gr.Blocks() as demo:
97
  generate_btn = gr.Button('Generate')
98
 
99
  with gr.Column(visible=False) as generations_col:
 
 
 
 
 
100
  generations_grp = gr.CheckboxGroup(
101
  label='All generations, in random order',
102
  info='Select the generations you think are watermarked!',
@@ -104,6 +170,11 @@ with gr.Blocks() as demo:
104
  reveal_btn = gr.Button('Reveal', visible=False)
105
 
106
  with gr.Column(visible=False) as detections_col:
 
 
 
 
 
107
  revealed_grp = gr.CheckboxGroup(
108
  label='Ground truth for all generations',
109
  info=(
@@ -160,10 +231,10 @@ with gr.Blocks() as demo:
160
  value.append(choice)
161
 
162
  return {
163
- reveal_btn: gr.Button(visible=False),
164
- detections_col: gr.Column(visible=True),
165
- revealed_grp: gr.CheckboxGroup(choices=choices, value=value),
166
- detect_btn: gr.Button(visible=True),
167
  }
168
 
169
  reveal_btn.click(
 
11
  # the nature of the task (e.g., fatcual responses are lower entropy) or it could
12
  # be another
13
 
14
+ _MODEL_IDENTIFIER = 'google/gemma-2b'
15
+ _DETECTOR_IDENTIFIER = 'gg-hf/detector_2b_1.0_demo'
16
 
17
  _PROMPTS: tuple[str] = (
18
  'prompt 1',
 
26
  torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
27
  )
28
 
29
+ _WATERMARK_CONFIG_DICT = dict(
30
  ngram_len=5,
31
  keys=[
32
  654,
 
65
  context_history_size=1024,
66
  )
67
 
68
+ _WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
69
+ **_WATERMARK_CONFIG_DICT
70
+ )
71
 
72
  tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER)
73
  tokenizer.pad_token_id = tokenizer.eos_token_id
74
+
75
  model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
76
  model.to(_TORCH_DEVICE)
77
 
78
+ logits_processor = transformers.generation.SynthIDTextWatermarkLogitsProcessor(
79
+ **_WATERMARK_CONFIG_DICT,
80
+ device=_TORCH_DEVICE,
81
+ )
82
+
83
+ detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
84
+ _DETECTOR_IDENTIFIER,
85
+ )
86
+ detector_module.to(_TORCH_DEVICE)
87
+
88
+ detector = transformers.generation.watermarking.BayesianDetectorModel(
89
+ detector_module=detector_module,
90
+ logits_processor=logits_processor,
91
+ tokenizer=tokenizer,
92
+ )
93
+
94
 
95
  @spaces.GPU
96
  def generate_outputs(
 
107
  max_length=500,
108
  top_k=40,
109
  )
110
+ detections = detector(output_sequences)
111
+ print(detections)
112
  return tokenizer.batch_decode(output_sequences)
113
 
114
 
115
  with gr.Blocks() as demo:
116
+ gr.Markdown(
117
+ '''
118
+ # Using SynthID Text in your Genreative AI projects
119
+
120
+ [SynthID][synthid] is a Google DeepMind technology that watermarks and
121
+ identifies AI-generated content by embedding digital watermarks directly
122
+ into AI-generated images, audio, text or video.
123
+
124
+ SynthID Text is an open source implementation of this technology available
125
+ in Hugging Face Transformers that has two major components:
126
+
127
+ * A [logits processor][synthid-hf-logits-processor] that is
128
+ [configured][synthid-hf-config] on a per-model basis and activated when
129
+ calling `.generate()`; and
130
+ * A [detector][synthid-hf-detector] trained to recognized watermarked text
131
+ generated by a specific model with a specific configuraiton.
132
+
133
+ This Space demonstrates:
134
+
135
+ 1. How to use SynthID Text to apply a watermark to text generated by your
136
+ model; and
137
+ 1. How to indetify that text using a ready-made detector.
138
+
139
+ Note that this detector is trained specifically fore this demonstration. You
140
+ should maintain a specific watermarking configuration for every model you
141
+ use and protect that configuration as you would any other secret. See the
142
+ [end-to-end guide][synthid-hf-detector-e2e] for more on training your own
143
+ detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
144
+ how this technology works.
145
+
146
+ [raitk-synthid]: /responsible/docs/safeguards/synthid
147
+ [synthid]: https://deepmind.google/technologies/synthid/
148
+ [synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
149
+ [synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
150
+ [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
151
+ [synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
152
+ '''
153
+ )
154
  prompt_inputs = [
155
  gr.Textbox(value=prompt, lines=4, label='Prompt')
156
  for prompt in _PROMPTS
 
158
  generate_btn = gr.Button('Generate')
159
 
160
  with gr.Column(visible=False) as generations_col:
161
+ gr.Markdown(
162
+ '''
163
+ # SynthID: Tool
164
+ '''
165
+ )
166
  generations_grp = gr.CheckboxGroup(
167
  label='All generations, in random order',
168
  info='Select the generations you think are watermarked!',
 
170
  reveal_btn = gr.Button('Reveal', visible=False)
171
 
172
  with gr.Column(visible=False) as detections_col:
173
+ gr.Markdown(
174
+ '''
175
+ # SynthID: Tool
176
+ '''
177
+ )
178
  revealed_grp = gr.CheckboxGroup(
179
  label='Ground truth for all generations',
180
  info=(
 
231
  value.append(choice)
232
 
233
  return {
234
+ reveal_btn: gr.Button(visible=False),
235
+ detections_col: gr.Column(visible=True),
236
+ revealed_grp: gr.CheckboxGroup(choices=choices, value=value),
237
+ detect_btn: gr.Button(visible=True),
238
  }
239
 
240
  reveal_btn.click(
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio
2
  spaces
3
- transformers @ git+https://github.com/sumedhghaisas2/transformers_private@synthid_text
4
 
5
  --extra-index-url https://download.pytorch.org/whl/cu113
6
  torch
 
1
  gradio
2
  spaces
3
+ transformers>=4.46.0
4
 
5
  --extra-index-url https://download.pytorch.org/whl/cu113
6
  torch