Spaces:

google
/

synthid-text

Running on L40S

App Files Files Community

RyanMullins commited on Oct 31, 2024

Commit

2b6fecd

1 Parent(s): dcd31e5

Update to Transformers v4.46.0

Browse files

Files changed (2) hide show

app.py +33 -21
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,12 +1,18 @@
 from collections.abc import Sequence
-import json
 import random
-from typing import Optional, Tuple
 import gradio as gr
 import spaces
 import torch
-import transformers
 # If the watewrmark is not detected, consider the use case. Could be because of
 # the nature of the task (e.g., fatcual responses are lower entropy) or it could
@@ -15,7 +21,7 @@ import transformers
 _MODEL_IDENTIFIER = 'google/gemma-2b-it'
 _DETECTOR_IDENTIFIER = 'google/synthid-spaces-demo-detector'
-_PROMPTS: tuple[str] = (
     'Write an essay about my pets, a cat named Mika and a dog named Cleo.',
     'Tell me everything you can about Portugal.',
     'What is Hugging Face?',
@@ -24,7 +30,7 @@ _PROMPTS: tuple[str] = (
 _TORCH_DEVICE = (
     torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
 )
-_ANSWERS: list[tuple[str, str]] = []
 _WATERMARK_CONFIG_DICT = dict(
     ngram_len=5,
@@ -65,27 +71,27 @@ _WATERMARK_CONFIG_DICT = dict(
     context_history_size=1024,
 )
-_WATERMARK_CONFIG = transformers.generation.SynthIDTextWatermarkingConfig(
     **_WATERMARK_CONFIG_DICT
 )
-tokenizer = transformers.AutoTokenizer.from_pretrained(_MODEL_IDENTIFIER, padding_side="left")
 tokenizer.pad_token_id = tokenizer.eos_token_id
-model = transformers.AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
 model.to(_TORCH_DEVICE)
-logits_processor = transformers.generation.SynthIDTextWatermarkLogitsProcessor(
     **_WATERMARK_CONFIG_DICT,
     device=_TORCH_DEVICE,
 )
-detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
-    _DETECTOR_IDENTIFIER,
-)
 detector_module.to(_TORCH_DEVICE)
-detector = transformers.generation.watermarking.SynthIDTextWatermarkDetector(
     detector_module=detector_module,
     logits_processor=logits_processor,
     tokenizer=tokenizer,
@@ -94,12 +100,12 @@ detector = transformers.generation.watermarking.SynthIDTextWatermarkDetector(
 @spaces.GPU
 def generate_outputs(
-  prompts: Sequence[str],
-  watermarking_config: Optional[
-      transformers.generation.SynthIDTextWatermarkingConfig
-  ] = None,
 ) -> Tuple[Sequence[str], torch.Tensor]:
-  tokenized_prompts = tokenizer(prompts, return_tensors='pt', padding="longest").to(_TORCH_DEVICE)
   input_length = tokenized_prompts.input_ids.shape[1]
   output_sequences = model.generate(
       **tokenized_prompts,
@@ -110,8 +116,10 @@ def generate_outputs(
   )
   output_sequences = output_sequences[:, input_length:]
   detections = detector(output_sequences)
-  print(detections)
-  return (tokenizer.batch_decode(output_sequences, skip_special_tokens=True), detections)
 with gr.Blocks() as demo:
@@ -163,7 +171,11 @@ with gr.Blocks() as demo:
     ```json
     {
         "ngram_len": 5,
-        "keys": [654, 400, 836, 123, 340, 443, 597, 160, 57,29, 590, 639, 13,715, 468, 990, 966, 226, 324, 585, 118, 504, 421, 521, 129, 669, 732, 225, 90, 960],
         "sampling_table_size": 65536,
         "sampling_table_seed": 0,
         "context_history_size": 1024

 from collections.abc import Sequence
 import random
+from typing import Optional, List, Tuple
 import gradio as gr
 import spaces
 import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BayesianDetectorModel,
+    SynthIDTextWatermarkingConfig,
+    SynthIDTextWatermarkDetector,
+    SynthIDTextWatermarkLogitsProcessor,
+)
 # If the watewrmark is not detected, consider the use case. Could be because of
 # the nature of the task (e.g., fatcual responses are lower entropy) or it could
 _MODEL_IDENTIFIER = 'google/gemma-2b-it'
 _DETECTOR_IDENTIFIER = 'google/synthid-spaces-demo-detector'
+_PROMPTS: Tuple[str] = (
     'Write an essay about my pets, a cat named Mika and a dog named Cleo.',
     'Tell me everything you can about Portugal.',
     'What is Hugging Face?',
 _TORCH_DEVICE = (
     torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
 )
+_ANSWERS: List[Tuple[str, str]] = []
 _WATERMARK_CONFIG_DICT = dict(
     ngram_len=5,
     context_history_size=1024,
 )
+_WATERMARK_CONFIG = SynthIDTextWatermarkingConfig(
     **_WATERMARK_CONFIG_DICT
 )
+tokenizer = AutoTokenizer.from_pretrained(
+    _MODEL_IDENTIFIER, padding_side="left"
+)
 tokenizer.pad_token_id = tokenizer.eos_token_id
+model = AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER)
 model.to(_TORCH_DEVICE)
+logits_processor = SynthIDTextWatermarkLogitsProcessor(
     **_WATERMARK_CONFIG_DICT,
     device=_TORCH_DEVICE,
 )
+detector_module = BayesianDetectorModel.from_pretrained(_DETECTOR_IDENTIFIER)
 detector_module.to(_TORCH_DEVICE)
+detector = SynthIDTextWatermarkDetector(
     detector_module=detector_module,
     logits_processor=logits_processor,
     tokenizer=tokenizer,
 @spaces.GPU
 def generate_outputs(
+    prompts: Sequence[str],
+    watermarking_config: Optional[SynthIDTextWatermarkingConfig] = None,
 ) -> Tuple[Sequence[str], torch.Tensor]:
+  tokenized_prompts = tokenizer(
+      prompts, return_tensors='pt', padding="longest"
+  ).to(_TORCH_DEVICE)
   input_length = tokenized_prompts.input_ids.shape[1]
   output_sequences = model.generate(
       **tokenized_prompts,
   )
   output_sequences = output_sequences[:, input_length:]
   detections = detector(output_sequences)
+  return (
+      tokenizer.batch_decode(output_sequences, skip_special_tokens=True),
+      detections
+  )
 with gr.Blocks() as demo:
     ```json
     {
         "ngram_len": 5,
+        "keys": [
+            654, 400, 836, 123, 340, 443, 597, 160,  57,  29,
+            590, 639,  13, 715, 468, 990, 966, 226, 324, 585,
+            118, 504, 421, 521, 129, 669, 732, 225,  90, 960
+        ],
         "sampling_table_size": 65536,
         "sampling_table_seed": 0,
         "context_history_size": 1024

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 gradio
 spaces
-transformers @ git+https://github.com/sumedhghaisas2/transformers_private
 --extra-index-url https://download.pytorch.org/whl/cu113
 torch

 gradio
 spaces
+transformers>=4.46.0
 --extra-index-url https://download.pytorch.org/whl/cu113
 torch