Spaces:

Shizuku-AI
/

StreamDiffusion-realtime-txt2img

Running on A10G

App Files Files Community

radames commited on Dec 18, 2023

Commit

8eae2b8

•

1 Parent(s): b788820

Upload 21 files

Browse files

Files changed (7) hide show

server/.DS_Store +0 -0
server/config.py +0 -7
server/main.py +2 -57
server/requirements.txt +2 -1
server/wrapper.py +63 -44
view/.DS_Store +0 -0
view/src/App.tsx +92 -27

server/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

server/config.py CHANGED Viewed

@@ -21,18 +21,11 @@ class Config:
     port: int = 9090
     workers: int = 1
-    ####################################################################
-    # Generation configuration
-    ####################################################################
-    # The threshold for the Levenstein distance.
-    levenstein_distance_threshold: int = 3
     ####################################################################
     # Model configuration
     ####################################################################
     # SD1.x variant model
     model_id: str = "SimianLuo/LCM_Dreamshaper_v7"
     # LCM-LORA model
     lcm_lora_id: str = "latent-consistency/lcm-lora-sdv1-5"
     # TinyVAE model

     port: int = 9090
     workers: int = 1
     ####################################################################
     # Model configuration
     ####################################################################
     # SD1.x variant model
     model_id: str = "SimianLuo/LCM_Dreamshaper_v7"
     # LCM-LORA model
     lcm_lora_id: str = "latent-consistency/lcm-lora-sdv1-5"
     # TinyVAE model

server/main.py CHANGED Viewed

@@ -32,7 +32,7 @@ class PredictResponseModel(BaseModel):
     The response model for the /predict endpoint.
     """
-    base64_images: list[str]
 class UpdatePromptResponseModel(BaseModel):
@@ -86,7 +86,6 @@ class Api:
         self._update_prompt_lock = asyncio.Lock()
         self.last_prompt: str = ""
-        self.last_images: list[str] = [""]
     async def _predict(self, inp: PredictInputModel) -> PredictResponseModel:
         """
@@ -103,15 +102,7 @@ class Api:
             The prediction result.
         """
         async with self._predict_lock:
-            if (
-                self._calc_levenstein_distance(inp.prompt, self.last_prompt)
-                < self.config.levenstein_distance_threshold
-            ):
-                logger.info("Using cached images")
-                return PredictResponseModel(base64_images=self.last_images)
-            self.last_prompt = inp.prompt
-            self.last_images = [self._pil_to_base64(image) for image in self.stream_diffusion(inp.prompt)]
-            return PredictResponseModel(base64_images=self.last_images)
     def _pil_to_base64(self, image: Image.Image, format: str = "JPEG") -> bytes:
         """
@@ -152,52 +143,6 @@ class Api:
             base64_image = base64_image.split("base64,")[1]
         return Image.open(BytesIO(base64.b64decode(base64_image))).convert("RGB")
-    def _calc_levenstein_distance(self, a: str, b: str) -> int:
-        """
-        Calculate the Levenstein distance between two strings.
-        Parameters
-        ----------
-        a : str
-            The first string.
-        b : str
-            The second string.
-        Returns
-        -------
-        int
-            The Levenstein distance.
-        """
-        if a == b:
-            return 0
-        a_k = len(a)
-        b_k = len(b)
-        if a == "":
-            return b_k
-        if b == "":
-            return a_k
-        matrix = [[] for i in range(a_k + 1)]
-        for i in range(a_k + 1):
-            matrix[i] = [0 for j in range(b_k + 1)]
-        for i in range(a_k + 1):
-            matrix[i][0] = i
-        for j in range(b_k + 1):
-            matrix[0][j] = j
-        for i in range(1, a_k + 1):
-            ac = a[i - 1]
-            for j in range(1, b_k + 1):
-                bc = b[j - 1]
-                cost = 0 if (ac == bc) else 1
-                matrix[i][j] = min(
-                    [
-                        matrix[i - 1][j] + 1,
-                        matrix[i][j - 1] + 1,
-                        matrix[i - 1][j - 1] + cost,
-                    ]
-                )
-        return matrix[a_k][b_k]
 if __name__ == "__main__":
     from config import Config

     The response model for the /predict endpoint.
     """
+    base64_image: str
 class UpdatePromptResponseModel(BaseModel):
         self._update_prompt_lock = asyncio.Lock()
         self.last_prompt: str = ""
     async def _predict(self, inp: PredictInputModel) -> PredictResponseModel:
         """
             The prediction result.
         """
         async with self._predict_lock:
+            return PredictResponseModel(base64_image=self._pil_to_base64(self.stream_diffusion(inp.prompt)))
     def _pil_to_base64(self, image: Image.Image, format: str = "JPEG") -> bytes:
         """
             base64_image = base64_image.split("base64,")[1]
         return Image.open(BytesIO(base64.b64decode(base64_image))).convert("RGB")
 if __name__ == "__main__":
     from config import Config

server/requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ torchvision
 torchaudio
 triton
 # https://github.com/chengzeyi/stable-fast --index-url https://download.pytorch.org/whl/cu121
-https://github.com/chengzeyi/stable-fast/releases/download/v0.0.14/stable_fast-0.0.14+torch210cu121-cp310-cp310-manylinux2014_x86_64.whl

 torchaudio
 triton
 # https://github.com/chengzeyi/stable-fast --index-url https://download.pytorch.org/whl/cu121
+# https://github.com/chengzeyi/stable-fast/releases/download/v0.0.14/stable_fast-0.0.14+torch210cu121-cp310-cp310-manylinux2014_x86_64.whl
+https://github.com/chengzeyi/stable-fast/releases/download/v0.0.15.post1/stable_fast-0.0.15.post1+torch211cu121-cp310-cp310-manylinux2014_x86_64.whl

server/wrapper.py CHANGED Viewed

@@ -8,7 +8,6 @@ import torch
 from diffusers import AutoencoderTiny, StableDiffusionPipeline
 from streamdiffusion import StreamDiffusion
-from streamdiffusion.acceleration.sfast import accelerate_with_stable_fast
 from streamdiffusion.image_utils import postprocess_image
@@ -33,6 +32,7 @@ class StreamDiffusionWrapper:
         self.device = device
         self.dtype = dtype
         self.prompt = ""
         self.stream = self._load_model(
             model_id=model_id,
@@ -44,13 +44,18 @@ class StreamDiffusionWrapper:
         self.safety_checker = None
         if safety_checker:
             from transformers import CLIPFeatureExtractor
-            from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
             self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
-                "CompVis/stable-diffusion-safety-checker").to(self.device)
             self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
-                "openai/clip-vit-base-patch32")
-            self.nsfw_fallback_img = PIL.Image.new(
-                "RGB", (512, 512), (0, 0, 0))
     def _load_model(
         self,
@@ -61,13 +66,13 @@ class StreamDiffusionWrapper:
         warmup: int,
     ):
         if os.path.exists(model_id):
-            pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_single_file(model_id).to(
-                device=self.device, dtype=self.dtype
-            )
         else:
-            pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_pretrained(model_id).to(
-                device=self.device, dtype=self.dtype
-            )
         stream = StreamDiffusion(
             pipe=pipe,
@@ -77,8 +82,32 @@ class StreamDiffusionWrapper:
         )
         stream.load_lcm_lora(lcm_lora_id)
         stream.fuse_lora()
-        stream.vae = AutoencoderTiny.from_pretrained(vae_id).to(device=pipe.device, dtype=pipe.dtype)
-        stream = accelerate_with_stable_fast(stream)
         stream.prepare(
             "",
@@ -99,37 +128,27 @@ class StreamDiffusionWrapper:
         return stream
-    def __call__(self, prompt: str) -> List[PIL.Image.Image]:
-        self.stream.prepare("")
-        images = []
-        for i in range(9 + 3):
-            start = torch.cuda.Event(enable_timing=True)
-            end = torch.cuda.Event(enable_timing=True)
-            start.record()
-            if self.prompt != prompt:
-                self.stream.update_prompt(prompt)
-                self.prompt = prompt
-            x_output = self.stream.txt2img()
-            if i >= 3:
-                image = postprocess_image(x_output, output_type="pil")[0]
-                if self.safety_checker:
-                    safety_checker_input = self.feature_extractor(
-                        image, return_tensors="pt").to(self.device)
-                    _, has_nsfw_concept = self.safety_checker(
-                        images=x_output, clip_input=safety_checker_input.pixel_values.to(
-                            self.dtype)
-                    )
-                    image = self.nsfw_fallback_img if has_nsfw_concept[0] else image
-                images.append(image)
-            end.record()
-            torch.cuda.synchronize()
-        return images
 if __name__ == "__main__":

 from diffusers import AutoencoderTiny, StableDiffusionPipeline
 from streamdiffusion import StreamDiffusion
 from streamdiffusion.image_utils import postprocess_image
         self.device = device
         self.dtype = dtype
         self.prompt = ""
+        self.batch_size = len(t_index_list)
         self.stream = self._load_model(
             model_id=model_id,
         self.safety_checker = None
         if safety_checker:
             from transformers import CLIPFeatureExtractor
+            from diffusers.pipelines.stable_diffusion.safety_checker import (
+                StableDiffusionSafetyChecker,
+            )
             self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
+                "CompVis/stable-diffusion-safety-checker"
+            ).to(self.device)
             self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
+                "openai/clip-vit-base-patch32"
+            )
+            self.nsfw_fallback_img = PIL.Image.new("RGB", (512, 512), (0, 0, 0))
+        self.stream.prepare("")
     def _load_model(
         self,
         warmup: int,
     ):
         if os.path.exists(model_id):
+            pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_single_file(
+                model_id
+            ).to(device=self.device, dtype=self.dtype)
         else:
+            pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_pretrained(
+                model_id
+            ).to(device=self.device, dtype=self.dtype)
         stream = StreamDiffusion(
             pipe=pipe,
         )
         stream.load_lcm_lora(lcm_lora_id)
         stream.fuse_lora()
+        stream.vae = AutoencoderTiny.from_pretrained(vae_id).to(
+            device=pipe.device, dtype=pipe.dtype
+        )
+        try:
+            from streamdiffusion.acceleration.tensorrt import accelerate_with_tensorrt
+            stream = accelerate_with_tensorrt(
+                stream,
+                "engines",
+                max_batch_size=self.batch_size,
+                engine_build_options={"build_static_batch": False},
+            )
+            print("TensorRT acceleration enabled.")
+        except Exception:
+            print("TensorRT acceleration has failed. Trying to use Stable Fast.")
+            try:
+                from streamdiffusion.acceleration.sfast import (
+                    accelerate_with_stable_fast,
+                )
+                stream = accelerate_with_stable_fast(stream)
+                print("StableFast acceleration enabled.")
+            except Exception:
+                print("StableFast acceleration has failed. Using normal mode.")
+                pass
         stream.prepare(
             "",
         return stream
+    def __call__(self, prompt: str) -> PIL.Image.Image:
+        if self.prompt != prompt:
+            self.stream.update_prompt(prompt)
+            self.prompt = prompt
+            for i in range(self.batch_size):
+                x_output = self.stream.txt2img()
+        x_output = self.stream.txt2img()
+        image = postprocess_image(x_output, output_type="pil")[0]
+        if self.safety_checker:
+            safety_checker_input = self.feature_extractor(
+                image, return_tensors="pt"
+            ).to(self.device)
+            _, has_nsfw_concept = self.safety_checker(
+                images=x_output,
+                clip_input=safety_checker_input.pixel_values.to(self.dtype),
+            )
+            image = self.nsfw_fallback_img if has_nsfw_concept[0] else image
+        return image
 if __name__ == "__main__":

view/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

view/src/App.tsx CHANGED Viewed

@@ -1,28 +1,75 @@
-import React, { useCallback, useEffect, useState } from "react";
-import { TextField, Grid, Paper } from "@mui/material";
 function App() {
   const [inputPrompt, setInputPrompt] = useState("");
-  const [images, setImages] = useState(Array(9).fill("images/white.jpg"));
-  const fetchImages = useCallback(async () => {
-    try {
-      const response = await fetch("/api/predict", {
-        method: "POST",
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ prompt: inputPrompt })
-      });
-      const data = await response.json();
-      const imageUrls = data.base64_images.map((base64: string) => `data:image/jpeg;base64,${base64}`);
-      setImages(imageUrls);
-    } catch (error) {
-      console.error("Error fetching images:", error);
     }
-  }, [inputPrompt]);
   const handlePromptChange = (event: React.ChangeEvent<HTMLInputElement>) => {
     setInputPrompt(event.target.value);
-    fetchImages();
   };
   return (
@@ -48,20 +95,38 @@ function App() {
           flexDirection: "column",
         }}
       >
-      <Grid container spacing={2}>
-        {images.map((image, index) => (
-          <Grid item xs={4} key={index}>
-            <Paper style={{ padding: "10px", textAlign: "center" }}>
-              <img src={image} alt={`Generated ${index}`} style={{ maxWidth: "100%", maxHeight: "200px", borderRadius: "10px" }} />
-            </Paper>
-          </Grid>
-        ))}
-      </Grid>
         <TextField
           variant="outlined"
           value={inputPrompt}
           onChange={handlePromptChange}
-          style={{ marginBottom: "20px", marginTop: "20px", width: "640px", color: "#ffffff", borderColor: "#ffffff", borderRadius: "10px", backgroundColor: "#ffffff" }}
           placeholder="Enter a prompt"
         />
       </div>

+import React, { useCallback, useState } from "react";
+import { TextField, Grid } from "@mui/material";
 function App() {
   const [inputPrompt, setInputPrompt] = useState("");
+  const [lastPrompt, setLastPrompt] = useState("");
+  const [images, setImages] = useState(Array(16).fill("images/white.jpg"));
+  const calculateEditDistance = (a: string, b: string) => {
+    if (a.length === 0) return b.length;
+    if (b.length === 0) return a.length;
+    const matrix = [];
+    for (let i = 0; i <= b.length; i++) {
+      matrix[i] = [i];
+    }
+    for (let i = 0; i <= a.length; i++) {
+      matrix[0][i] = i;
     }
+    for (let i = 1; i <= b.length; i++) {
+      for (let j = 1; j <= a.length; j++) {
+        if (b.charAt(i - 1) === a.charAt(j - 1)) {
+          matrix[i][j] = matrix[i - 1][j - 1];
+        } else {
+          matrix[i][j] = Math.min(
+            matrix[i - 1][j - 1] + 1,
+            Math.min(matrix[i][j - 1] + 1, matrix[i - 1][j] + 1)
+          );
+        }
+      }
+    }
+    return matrix[b.length][a.length];
+  };
+  const fetchImage = useCallback(
+    async (index: number) => {
+      try {
+        const response = await fetch("/api/predict", {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ prompt: inputPrompt }),
+        });
+        const data = await response.json();
+        const imageUrl = `data:image/jpeg;base64,${data.base64_image}`;
+        setImages((prevImages) => {
+          const newImages = [...prevImages];
+          newImages[index] = imageUrl;
+          return newImages;
+        });
+      } catch (error) {
+        console.error("Error fetching image:", error);
+      }
+    },
+    [inputPrompt]
+  );
   const handlePromptChange = (event: React.ChangeEvent<HTMLInputElement>) => {
     setInputPrompt(event.target.value);
+    const newPrompt = event.target.value;
+    const editDistance = calculateEditDistance(lastPrompt, newPrompt);
+    if (editDistance >= 2) {
+      setInputPrompt(newPrompt);
+      setLastPrompt(newPrompt);
+      for (let i = 0; i < 16; i++) {
+        fetchImage(i);
+      }
+    }
   };
   return (
           flexDirection: "column",
         }}
       >
+        <Grid
+          container
+          spacing={1}
+          style={{ maxWidth: "50%", maxHeight: "70%" }}
+        >
+          {images.map((image, index) => (
+            <Grid item xs={3} key={index}>
+              <img
+                src={image}
+                alt={`Generated ${index}`}
+                style={{
+                  maxWidth: "100%",
+                  maxHeight: "150px",
+                  borderRadius: "10px",
+                }}
+              />
+            </Grid>
+          ))}
+        </Grid>
         <TextField
           variant="outlined"
           value={inputPrompt}
           onChange={handlePromptChange}
+          style={{
+            marginBottom: "20px",
+            marginTop: "20px",
+            width: "640px",
+            color: "#ffffff",
+            borderColor: "#ffffff",
+            borderRadius: "10px",
+            backgroundColor: "#ffffff",
+          }}
           placeholder="Enter a prompt"
         />
       </div>