Spaces:
Running
on
L40S
Running
on
L40S
RyanMullins
commited on
Commit
•
80c639a
1
Parent(s):
2a04008
Committing in broken state for sharing with HF
Browse files- app.py +77 -6
- requirements.txt +1 -1
app.py
CHANGED
@@ -11,7 +11,8 @@ import transformers
|
|
11 |
# the nature of the task (e.g., fatcual responses are lower entropy) or it could
|
12 |
# be another
|
13 |
|
14 |
-
_MODEL_IDENTIFIER = '
|
|
|
15 |
|
16 |
_PROMPTS: tuple[str] = (
|
17 |
'prompt 1',
|
@@ -25,7 +26,7 @@ _TORCH_DEVICE = (
|
|
25 |
torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
26 |
)
|
27 |
|
28 |
-
|
29 |
ngram_len=5,
|
30 |
keys=[
|
31 |
654,
|
@@ -64,12 +65,32 @@ _WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
|
|
64 |
context_history_size=1024,
|
65 |
)
|
66 |
|
|
|
|
|
|
|
67 |
|
68 |
tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER)
|
69 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
|
70 |
model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
|
71 |
model.to(_TORCH_DEVICE)
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
@spaces.GPU
|
75 |
def generate_outputs(
|
@@ -86,10 +107,50 @@ def generate_outputs(
|
|
86 |
max_length=500,
|
87 |
top_k=40,
|
88 |
)
|
|
|
|
|
89 |
return tokenizer.batch_decode(output_sequences)
|
90 |
|
91 |
|
92 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
prompt_inputs = [
|
94 |
gr.Textbox(value=prompt, lines=4, label='Prompt')
|
95 |
for prompt in _PROMPTS
|
@@ -97,6 +158,11 @@ with gr.Blocks() as demo:
|
|
97 |
generate_btn = gr.Button('Generate')
|
98 |
|
99 |
with gr.Column(visible=False) as generations_col:
|
|
|
|
|
|
|
|
|
|
|
100 |
generations_grp = gr.CheckboxGroup(
|
101 |
label='All generations, in random order',
|
102 |
info='Select the generations you think are watermarked!',
|
@@ -104,6 +170,11 @@ with gr.Blocks() as demo:
|
|
104 |
reveal_btn = gr.Button('Reveal', visible=False)
|
105 |
|
106 |
with gr.Column(visible=False) as detections_col:
|
|
|
|
|
|
|
|
|
|
|
107 |
revealed_grp = gr.CheckboxGroup(
|
108 |
label='Ground truth for all generations',
|
109 |
info=(
|
@@ -160,10 +231,10 @@ with gr.Blocks() as demo:
|
|
160 |
value.append(choice)
|
161 |
|
162 |
return {
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
}
|
168 |
|
169 |
reveal_btn.click(
|
|
|
11 |
# the nature of the task (e.g., fatcual responses are lower entropy) or it could
|
12 |
# be another
|
13 |
|
14 |
+
_MODEL_IDENTIFIER = 'google/gemma-2b'
|
15 |
+
_DETECTOR_IDENTIFIER = 'gg-hf/detector_2b_1.0_demo'
|
16 |
|
17 |
_PROMPTS: tuple[str] = (
|
18 |
'prompt 1',
|
|
|
26 |
torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
27 |
)
|
28 |
|
29 |
+
_WATERMARK_CONFIG_DICT = dict(
|
30 |
ngram_len=5,
|
31 |
keys=[
|
32 |
654,
|
|
|
65 |
context_history_size=1024,
|
66 |
)
|
67 |
|
68 |
+
_WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
|
69 |
+
**_WATERMARK_CONFIG_DICT
|
70 |
+
)
|
71 |
|
72 |
tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER)
|
73 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
74 |
+
|
75 |
model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
|
76 |
model.to(_TORCH_DEVICE)
|
77 |
|
78 |
+
logits_processor = transformers.generation.SynthIDTextWatermarkLogitsProcessor(
|
79 |
+
**_WATERMARK_CONFIG_DICT,
|
80 |
+
device=_TORCH_DEVICE,
|
81 |
+
)
|
82 |
+
|
83 |
+
detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
|
84 |
+
_DETECTOR_IDENTIFIER,
|
85 |
+
)
|
86 |
+
detector_module.to(_TORCH_DEVICE)
|
87 |
+
|
88 |
+
detector = transformers.generation.watermarking.BayesianDetectorModel(
|
89 |
+
detector_module=detector_module,
|
90 |
+
logits_processor=logits_processor,
|
91 |
+
tokenizer=tokenizer,
|
92 |
+
)
|
93 |
+
|
94 |
|
95 |
@spaces.GPU
|
96 |
def generate_outputs(
|
|
|
107 |
max_length=500,
|
108 |
top_k=40,
|
109 |
)
|
110 |
+
detections = detector(output_sequences)
|
111 |
+
print(detections)
|
112 |
return tokenizer.batch_decode(output_sequences)
|
113 |
|
114 |
|
115 |
with gr.Blocks() as demo:
|
116 |
+
gr.Markdown(
|
117 |
+
'''
|
118 |
+
# Using SynthID Text in your Genreative AI projects
|
119 |
+
|
120 |
+
[SynthID][synthid] is a Google DeepMind technology that watermarks and
|
121 |
+
identifies AI-generated content by embedding digital watermarks directly
|
122 |
+
into AI-generated images, audio, text or video.
|
123 |
+
|
124 |
+
SynthID Text is an open source implementation of this technology available
|
125 |
+
in Hugging Face Transformers that has two major components:
|
126 |
+
|
127 |
+
* A [logits processor][synthid-hf-logits-processor] that is
|
128 |
+
[configured][synthid-hf-config] on a per-model basis and activated when
|
129 |
+
calling `.generate()`; and
|
130 |
+
* A [detector][synthid-hf-detector] trained to recognized watermarked text
|
131 |
+
generated by a specific model with a specific configuraiton.
|
132 |
+
|
133 |
+
This Space demonstrates:
|
134 |
+
|
135 |
+
1. How to use SynthID Text to apply a watermark to text generated by your
|
136 |
+
model; and
|
137 |
+
1. How to indetify that text using a ready-made detector.
|
138 |
+
|
139 |
+
Note that this detector is trained specifically fore this demonstration. You
|
140 |
+
should maintain a specific watermarking configuration for every model you
|
141 |
+
use and protect that configuration as you would any other secret. See the
|
142 |
+
[end-to-end guide][synthid-hf-detector-e2e] for more on training your own
|
143 |
+
detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
|
144 |
+
how this technology works.
|
145 |
+
|
146 |
+
[raitk-synthid]: /responsible/docs/safeguards/synthid
|
147 |
+
[synthid]: https://deepmind.google/technologies/synthid/
|
148 |
+
[synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
|
149 |
+
[synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
|
150 |
+
[synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
|
151 |
+
[synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
|
152 |
+
'''
|
153 |
+
)
|
154 |
prompt_inputs = [
|
155 |
gr.Textbox(value=prompt, lines=4, label='Prompt')
|
156 |
for prompt in _PROMPTS
|
|
|
158 |
generate_btn = gr.Button('Generate')
|
159 |
|
160 |
with gr.Column(visible=False) as generations_col:
|
161 |
+
gr.Markdown(
|
162 |
+
'''
|
163 |
+
# SynthID: Tool
|
164 |
+
'''
|
165 |
+
)
|
166 |
generations_grp = gr.CheckboxGroup(
|
167 |
label='All generations, in random order',
|
168 |
info='Select the generations you think are watermarked!',
|
|
|
170 |
reveal_btn = gr.Button('Reveal', visible=False)
|
171 |
|
172 |
with gr.Column(visible=False) as detections_col:
|
173 |
+
gr.Markdown(
|
174 |
+
'''
|
175 |
+
# SynthID: Tool
|
176 |
+
'''
|
177 |
+
)
|
178 |
revealed_grp = gr.CheckboxGroup(
|
179 |
label='Ground truth for all generations',
|
180 |
info=(
|
|
|
231 |
value.append(choice)
|
232 |
|
233 |
return {
|
234 |
+
reveal_btn: gr.Button(visible=False),
|
235 |
+
detections_col: gr.Column(visible=True),
|
236 |
+
revealed_grp: gr.CheckboxGroup(choices=choices, value=value),
|
237 |
+
detect_btn: gr.Button(visible=True),
|
238 |
}
|
239 |
|
240 |
reveal_btn.click(
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
gradio
|
2 |
spaces
|
3 |
-
transformers
|
4 |
|
5 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
6 |
torch
|
|
|
1 |
gradio
|
2 |
spaces
|
3 |
+
transformers>=4.46.0
|
4 |
|
5 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
6 |
torch
|