Spaces:

MusIre
/

Dissertation

Sleeping

App Files Files Community

MusIre commited on 12 days ago

Commit

b7c2afa

verified ·

1 Parent(s): f23dba2

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -24

app.py CHANGED Viewed

@@ -86,8 +86,7 @@ scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, ver
 # Load GPT-Neo and CLIP
 model_clip = open_clip.create_model('ViT-B/32', pretrained='openai').to(device)
-image_size = (224, 224)
-preprocess_clip = open_clip.image_transform(image_size=image_size, is_train=False)
 tokenizer_clip = open_clip.get_tokenizer('ViT-B/32')
 model_clip.eval()
@@ -95,32 +94,43 @@ model_name = "EleutherAI/gpt-neo-1.3B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model_gptneo = AutoModelForCausalLM.from_pretrained(model_name).to(device)
-# Generate prediction using ResNet and CLIP
-def predict(image_path):
     image = Image.open(image_path).convert("RGB")
-    image_tensor = data_transforms(image).unsqueeze(0).to(device)
-    # Predict with ResNet
-    style_logits, artist_logits = model_resnet(image_tensor)
-    style_idx = torch.argmax(style_logits, dim=1).item()
-    artist_idx = torch.argmax(artist_logits, dim=1).item()
-    predicted_style = list(label_map_style.keys())[list(label_map_style.values()).index(style_idx)]
-    predicted_artist = list(label_map_artist.keys())[list(label_map_artist.values()).index(artist_idx)]
-    # Enrich prompt with additional information
-    prompt = enrich_prompt(predicted_artist, predicted_style)
-    # Generate text description using GPT-Neo
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-    output = model_gptneo.generate(input_ids, max_length=350, num_return_sequences=1)
-    description = tokenizer.decode(output[0], skip_special_tokens=True)
-    return predicted_style, predicted_artist, description
 # Gradio interface
 def gradio_interface(image):
-    predicted_style, predicted_artist, description = predict(image)
     return f"Predicted Style: {predicted_style}\nPredicted Artist: {predicted_artist}\n\nDescription:\n{description}"
 iface = gr.Interface(

 # Load GPT-Neo and CLIP
 model_clip = open_clip.create_model('ViT-B/32', pretrained='openai').to(device)
+preprocess_clip = open_clip.image_transform((224, 224), is_train=False)
 tokenizer_clip = open_clip.get_tokenizer('ViT-B/32')
 model_clip.eval()
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model_gptneo = AutoModelForCausalLM.from_pretrained(model_name).to(device)
+def generate_description(image_path):
     image = Image.open(image_path).convert("RGB")
+    image_resnet = data_transforms(image).unsqueeze(0).to(device)
+    model_resnet.eval()
+    with torch.no_grad():
+        outputs_style, outputs_artist = model_resnet(image_resnet)
+        _, predicted_style_idx = torch.max(outputs_style, 1)
+        _, predicted_artist_idx = torch.max(outputs_artist, 1)
+    idx_to_style = {v: k for k, v in label_map_style.items()}
+    idx_to_artist = {v: k for k, v in label_map_artist.items()}
+    predicted_style = idx_to_style[predicted_style_idx.item()]
+    predicted_artist = idx_to_artist[predicted_artist_idx.item()]
+    enriched_prompt = enrich_prompt(predicted_artist, predicted_style)
+    full_prompt = (
+        f"This is an artwork created by {predicted_artist} in the style of {predicted_style}. {enriched_prompt} "
+        "Describe its distinctive features, considering both the artist's techniques and the artistic style."
+    )
+    input_ids = tokenizer.encode(full_prompt, return_tensors="pt").to(device)
+    output = model_gptneo.generate(
+        input_ids=input_ids,
+        max_length=300,
+        temperature=0.7,
+        top_p=0.9,
+        repetition_penalty=1.2
+    )
+    description_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return predicted_style, predicted_artist, description_text
 # Gradio interface
 def gradio_interface(image):
+    predicted_style, predicted_artist, description = generate_description(image)
     return f"Predicted Style: {predicted_style}\nPredicted Artist: {predicted_artist}\n\nDescription:\n{description}"
 iface = gr.Interface(