File size: 1,348 Bytes
05b1066
 
 
 
 
 
 
 
 
 
272c24b
05b1066
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0029af0
05b1066
 
 
 
 
 
 
 
 
 
 
 
0029af0
05b1066
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import clip
import torch
import logging
import json
import pandas as pd
from PIL import Image
import gradio as gr
from autogluon.tabular import TabularPredictor

predictor = TabularPredictor.load("ag-20240618_230402", require_py_version_match=False)
# set logging level
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("AQ")
CLIP_MODEL_NAME = "ViT-B/32"


clip_model, preprocess = clip.load(CLIP_MODEL_NAME, device="cpu")


def predict_fn(input_img):
    input_img = Image.fromarray(input_img.astype("uint8"), "RGB")
    image = preprocess(input_img).unsqueeze(0)
    with torch.no_grad():
        image_features = clip_model.encode_image(image).numpy()
        input_df = pd.DataFrame(image_features[0].reshape(1, -1))
        quality_score = float(predictor.predict(input_df).iloc[0])

    logger.info(f"decision: {quality_score}")
    decision_json = json.dumps({"quality_score": quality_score}).encode("utf-8")
    logger.info(f"decision_json: {decision_json}")
    return decision_json


iface = gr.Interface(
    fn=predict_fn,
    inputs="image",
    outputs="text",
    description="""
    The model returns quality score for an avatar based on visual apeal and humanoid appearance.
    """,
    allow_flagging="manual",
)
iface.launch()