Spaces:

unik-style
/

unik-ml

Runtime error

App Files Files Community

change file name

by RoniFinTech - opened Aug 3, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+61

-174

Files changed (11) hide show

Dockerfile → Dockerfile.hf +4 -7
README.md +0 -4
__init__.py +0 -0
cache/__init__.py +0 -0
cache/local_cache.py +0 -42
config.py +0 -15
main.py +56 -30
requirements.txt +1 -6
routers/__init__.py +0 -0
routers/intference/__init__.py +0 -0
routers/intference/stable_diffusion.py +0 -70

Dockerfile → Dockerfile.hf RENAMED Viewed

@@ -1,12 +1,9 @@
-# Use an official CUDA runtime as a parent image
-FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
 # Set the working directory to /code
 WORKDIR /code
-# Install Python
-RUN apt-get update && apt-get install -y python3.10 python3-pip
 # Copy the current directory contents into the container at /code
 COPY ./requirements.txt /code/requirements.txt
@@ -18,7 +15,7 @@ RUN useradd -m -u 1000 user
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
-ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH
 # Set the working directory to the user's home directory
@@ -28,4 +25,4 @@ WORKDIR $HOME/app
 COPY --chown=user . $HOME/app
 # Start the FastAPI app on port 7860, the default port expected by Spaces
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

+# Use the official Python 3.9 image
+FROM python:3.9
 # Set the working directory to /code
 WORKDIR /code
 # Copy the current directory contents into the container at /code
 COPY ./requirements.txt /code/requirements.txt
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
+ENV HOME=/home/user \\
     PATH=/home/user/.local/bin:$PATH
 # Set the working directory to the user's home directory
 COPY --chown=user . $HOME/app
 # Start the FastAPI app on port 7860, the default port expected by Spaces
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -9,10 +9,6 @@ colorTo: gray
 sdk: docker
-python_version: 3.9
-suggested_hardware: a10g-small
 pinned: false
 license: openrail

 sdk: docker
 pinned: false
 license: openrail

__init__.py DELETED Viewed

File without changes

cache/__init__.py DELETED Viewed

File without changes

cache/local_cache.py DELETED Viewed

@@ -1,42 +0,0 @@
-from datetime import datetime, timedelta
-from functools import wraps
-from io import BytesIO
-from fastapi.responses import StreamingResponse
-CACHE_SIZE = 50
-_cache = {}
-_cache_time = {}
-def ttl_cache(key_name, media_type=None, ttl_secs=20):
-    def decorator(func):
-        @wraps(func)
-        async def wrapper(*args, **kwargs):
-            # Assuming the prompt is the key for caching, change as necessary
-            key = kwargs.get(key_name)
-            ttl = timedelta(seconds=ttl_secs)
-            # Check cache
-            if key in _cache:
-                if datetime.now() - _cache_time[key] > ttl:
-                    # Cache has expired
-                    del _cache[key]
-                    del _cache_time[key]
-                else:
-                    # if media_type == 'image/png':
-                    #     return StreamingResponse(BytesIO(_cache[key]), media_type=media_type)
-                    # else:
-                    return StreamingResponse(BytesIO(_cache[key]), media_type="image/png")
-            # Call the actual function if not in cache or expired
-            response, image_data = await func(*args, **kwargs)
-            # Cache the content of the response's body.
-            _cache[key] = image_data
-            _cache_time[key] = datetime.now()
-            return response
-        return wrapper
-    return decorator

config.py DELETED Viewed

@@ -1,15 +0,0 @@
-import os
-from pydantic import BaseModel
-class Settings(BaseModel):
-    hf_token: str = os.environ.get("hf_token")
-    base_sd_model: str = os.environ.get("base_sd_model") or "stabilityai/stable-diffusion-xl-base-1.0"
-    refiner_sd_model: str = os.environ.get("refiner_sd_model") or "stabilityai/stable-diffusion-xl-refiner-1.0"
-    version: str = "0.1.0"
-    url_version: str = "v1"
-    prefix: str = "/v1/unik-ml"
-settings = Settings()

main.py CHANGED Viewed

@@ -1,38 +1,64 @@
-import uvicorn
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from huggingface_hub import login
-from config import settings
-from routers.intference import stable_diffusion
-login(settings.hf_token)
-app = FastAPI(
-    title="UNIK ML",
-    version=settings.version,
-    openapi_url=f"{settings.prefix}/openapi.json",
-    docs_url=f"{settings.prefix}/docs",
-    redoc_url=f"{settings.prefix}/redoc",
-    swagger_ui_oauth2_redirect_url=f"{settings.prefix}/docs/oauth2-redirect")
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-    allow_credentials=True,
 )
-@app.get("/")
-async def root():
-    return {"message": "UNIK ML API"}
-app.include_router(stable_diffusion.router, prefix=settings.prefix, tags=["inference"])
-# Start your FastAPI application
-# if __name__ == "__main__":
-#     uvicorn.run(app, host="0.0.0.0", port=8000)
-#

+from io import BytesIO
+import torch
+from diffusers import DiffusionPipeline
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+# load both base & refiner
+base = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
+)
+base.to("cuda")
+# base.enable_model_cpu_offload()
+base.enable_attention_slicing()
+refiner = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-refiner-1.0",
+    text_encoder_2=base.text_encoder_2,
+    vae=base.vae,
+    torch_dtype=torch.float16,
+    use_safetensors=True,
+    variant="fp16",
 )
+refiner.to("cuda")
+# refiner.enable_model_cpu_offload()
+refiner.enable_attention_slicing()
+# Create a new FastAPI app instance
+app = FastAPI()
+# Define a function to handle the GET request at `/generate`
+# The generate() function is defined as a FastAPI route that takes a
+# string parameter called text. The function generates text based on the # input using the pipeline() object, and returns a JSON response
+# containing the generated text under the key "output"
+@app.get("/generate")
+def generate(text: str):
+    """
+    generate image
+    """
+    # Define how many steps and what % of steps to be run on each experts (80/20) here
+    n_steps = 40
+    high_noise_frac = 0.8
+    negative = "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly. bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, disgusting, poorly drawn hands, missing limb, floating limbs, disconnected limbs, malformed hands, blurry, mutated hands and fingers, watermark, watermarked, oversaturated, censored, distorted hands, amputation, missing hands, obese, doubled face, double hands, two women, anime style, cartoon, toon."
+    prompt = "Designs should play with different textures and layering but stick to a monochrome palette. Think leather jackets over mesh tops, or satin draped over matte cotton. in a studio. zoomed-in. single model."
+    # run both experts
+    image = base(
+        prompt=prompt,
+        negative_prompt=negative,
+        num_inference_steps=n_steps,
+        denoising_end=high_noise_frac,
+        output_type="latent",
+    ).images
+    final_image = refiner(
+        prompt=prompt,
+        negative_prompt=negative,
+        num_inference_steps=n_steps,
+        denoising_start=high_noise_frac,
+        image=image,
+    ).images[0]
+    return StreamingResponse(BytesIO(final_image), media_type="image/png")
+    # Return the generated text in a JSON response
+    # return {"output": output[0]["generated_text"]}

requirements.txt CHANGED Viewed

@@ -1,12 +1,7 @@
-# --extra-index-url https://download.pytorch.org/whl/cu118
 fastapi==0.100.1
 pydantic==2.1.1
 pylint==2.17.5
 uvicorn>=0.23.2
 torch==2.0.1
 transformers==4.31.0
-accelerate==0.21.0
-diffusers==0.19.3
-torchvision==0.15.2
-safetensors==0.3.1
-huggingface-hub==0.16.4

 fastapi==0.100.1
 pydantic==2.1.1
 pylint==2.17.5
 uvicorn>=0.23.2
 torch==2.0.1
 transformers==4.31.0
+accelerate==0.21.0

routers/__init__.py DELETED Viewed

File without changes

routers/intference/__init__.py DELETED Viewed

File without changes

routers/intference/stable_diffusion.py DELETED Viewed

@@ -1,70 +0,0 @@
-# load both base & refiner
-from io import BytesIO
-import torch
-from diffusers import DiffusionPipeline
-from fastapi import APIRouter
-from fastapi.responses import StreamingResponse
-from cache.local_cache import ttl_cache
-from config import settings
-router = APIRouter()
-base = DiffusionPipeline.from_pretrained(
-    settings.base_sd_model, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
-)
-base.to("cuda")
-# base.enable_model_cpu_offload()
-base.enable_attention_slicing()
-refiner = DiffusionPipeline.from_pretrained(
-    settings.refiner_sd_model,
-    text_encoder_2=base.text_encoder_2,
-    vae=base.vae,
-    torch_dtype=torch.float16,
-    use_safetensors=True,
-    variant="fp16",
-)
-refiner.to("cuda")
-# refiner.enable_model_cpu_offload()
-refiner.enable_attention_slicing()
-@router.get("/generate")
-@ttl_cache(key_name='prompt', media_type="image/png", ttl_secs=20)
-async def generate(prompt: str):
-    """
-    generate image
-    """
-    # Define how many steps and what % of steps to be run on each experts (80/20) here
-    n_steps = 40
-    high_noise_frac = 0.8
-    negative = "disfigured, ugly, bad, immature, cartoon, anime, 3d, painting, b&w, sketch, blurry, deformed, bad anatomy, poorly drawn face, mutation, multiple people."
-    prompt = f"single image. single model. {prompt}. zoomed in. full-body. real person. realistic. 4k. best quality."
-    print(prompt)
-    # run both experts
-    image = base(
-        prompt=prompt,
-        negative_prompt=negative,
-        num_inference_steps=n_steps,
-        denoising_end=high_noise_frac,
-        output_type="latent",
-    ).images[0]
-    final_image = refiner(
-        prompt=prompt,
-        negative_prompt=negative,
-        num_inference_steps=n_steps,
-        denoising_start=high_noise_frac,
-        image=image,
-    ).images[0]
-    memory_stream = BytesIO()
-    final_image.save(memory_stream, format="PNG")
-    image_data = memory_stream.getvalue()  # get bytes of the image
-    memory_stream.seek(0)
-    return StreamingResponse(memory_stream, media_type="image/png"), image_data