Spaces:
Running
Running
File size: 1,348 Bytes
05b1066 272c24b 05b1066 0029af0 05b1066 0029af0 05b1066 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
import clip
import torch
import logging
import json
import pandas as pd
from PIL import Image
import gradio as gr
from autogluon.tabular import TabularPredictor
predictor = TabularPredictor.load("ag-20240618_230402", require_py_version_match=False)
# set logging level
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("AQ")
CLIP_MODEL_NAME = "ViT-B/32"
clip_model, preprocess = clip.load(CLIP_MODEL_NAME, device="cpu")
def predict_fn(input_img):
input_img = Image.fromarray(input_img.astype("uint8"), "RGB")
image = preprocess(input_img).unsqueeze(0)
with torch.no_grad():
image_features = clip_model.encode_image(image).numpy()
input_df = pd.DataFrame(image_features[0].reshape(1, -1))
quality_score = float(predictor.predict(input_df).iloc[0])
logger.info(f"decision: {quality_score}")
decision_json = json.dumps({"quality_score": quality_score}).encode("utf-8")
logger.info(f"decision_json: {decision_json}")
return decision_json
iface = gr.Interface(
fn=predict_fn,
inputs="image",
outputs="text",
description="""
The model returns quality score for an avatar based on visual apeal and humanoid appearance.
""",
allow_flagging="manual",
)
iface.launch()
|