Spaces:
Running
Running
import streamlit as st | |
from ultralytics import YOLO | |
import tensorflow as tf # Change this to import TensorFlow | |
import numpy as np | |
from PIL import Image, ImageOps, ImageDraw, ImageFont | |
import pandas as pd | |
import time | |
from paddleocr import PaddleOCR, draw_ocr | |
import re | |
import dateparser | |
import os | |
import matplotlib.pyplot as plt | |
#######Llama3bi integration######## | |
import torch | |
from transformers import pipeline | |
model_id = "meta-llama/Llama-3.2-3B-Instruct" | |
pipe = pipeline( | |
"text-generation", | |
model=model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
) | |
messages = [ | |
{"role": "system", "content": """Your task is to get the product details out of the text given. The text given will be raw text from OCR of social media images of products, | |
and the goal is to get product details and description so that it can be used for amazon product listing. """}, | |
] | |
# Function to get Instagram post details | |
import instaloader | |
def get_instagram_post_details(post_url): | |
try: | |
shortcode = post_url.split('/')[-2] | |
post = instaloader.Post.from_shortcode(L.context, shortcode) | |
# Retrieve caption and image URL | |
caption = post.caption | |
image_url = post.url | |
return caption, image_url | |
except Exception as e: | |
return str(e), None | |
# Initialize PaddleOCR model | |
ocr = PaddleOCR(use_angle_cls=True, lang='en') | |
# Team details | |
team_members = [ | |
{"name": "Aman Deep", "image": "aman.jpg"}, # Replace with actual paths to images | |
{"name": "Nandini", "image": "myimage.jpg"}, | |
{"name": "Abhay Sharma", "image": "gaurav.jpg"}, | |
{"name": "Ratan Prakash Mishra", "image": "anandimg.jpg"} | |
] | |
# Function to preprocess the images for the model | |
from PIL import Image | |
import numpy as np | |
def preprocess_image(image): | |
""" | |
Preprocess the input image for model prediction. | |
Args: | |
image (PIL.Image): Input image in PIL format. | |
Returns: | |
np.ndarray: Preprocessed image array ready for prediction. | |
""" | |
try: | |
# Resize image to match model input size | |
img = image.resize((128, 128), Image.LANCZOS) # Using LANCZOS filter for high-quality resizing | |
# Convert image to NumPy array | |
img_array = np.array(img) | |
# Check if the image is grayscale and convert to RGB if needed | |
if img_array.ndim == 2: # Grayscale image | |
img_array = np.stack([img_array] * 3, axis=-1) # Convert to 3-channel RGB | |
elif img_array.shape[2] == 1: # Single-channel image | |
img_array = np.concatenate([img_array, img_array, img_array], axis=-1) # Convert to RGB | |
# Normalize pixel values to [0, 1] range | |
img_array = img_array / 255.0 | |
# Add batch dimension | |
img_array = np.expand_dims(img_array, axis=0) # Shape: (1, 128, 128, 3) | |
return img_array | |
except Exception as e: | |
print(f"Error processing image: {e}") | |
return None # Return None if there's an error | |
# Function to create a high-quality circular mask for an image | |
def make_image_circular1(img, size=(256, 256)): | |
img = img.resize(size, Image.LANCZOS) | |
mask = Image.new("L", size, 0) | |
draw = ImageDraw.Draw(mask) | |
draw.ellipse((0, 0) + size, fill=255) | |
output = ImageOps.fit(img, mask.size, centering=(0.5, 0.5)) | |
output.putalpha(mask) # Apply the mask as transparency | |
return output | |
# Function to check if a file exists | |
def file_exists(file_path): | |
return os.path.isfile(file_path) | |
def make_image_circular(image): | |
# Create a circular mask | |
mask = Image.new("L", image.size, 0) | |
draw = ImageDraw.Draw(mask) | |
draw.ellipse((0, 0, image.size[0], image.size[1]), fill=255) | |
# Apply the mask to the image | |
circular_image = Image.new("RGB", image.size) | |
circular_image.paste(image.convert("RGBA"), (0, 0), mask) | |
return circular_image | |
# Function to extract dates from recognized text using regex | |
def extract_dates_with_dateparser(texts, result): | |
date_texts = [] | |
date_boxes = [] | |
date_scores = [] | |
def is_potential_date(text): | |
valid_date_pattern = r'^(0[1-9]|[12][0-9]|3[01])[-/.]?(0[1-9]|1[0-2])[-/.]?(\d{2}|\d{4})$|' \ | |
r'^(0[1-9]|[12][0-9]|3[01])[-/.]?[A-Za-z]{3}[-/.]?(\d{2}|\d{4})$|' \ | |
r'^(0[1-9]|1[0-2])[-/.]?(\d{2}|\d{4})$|' \ | |
r'^[A-Za-z]{3}[-/.]?(\d{2}|\d{4})$' | |
return bool(re.match(valid_date_pattern, text)) | |
dates_found = [] | |
for i, text in enumerate(texts): | |
if is_potential_date(text): # Only process texts that are potential dates | |
parsed_date = dateparser.parse(text, settings={'DATE_ORDER': 'DMY'}) | |
if parsed_date: | |
dates_found.append(parsed_date.strftime('%Y-%m-%d')) # Store as 'YYYY-MM-DD' | |
date_texts.append(text) # Store the original text | |
date_boxes.append(result[0][i][0]) # Store the bounding box | |
date_scores.append(result[0][i][1][1]) # Store confidence score | |
return dates_found, date_texts, date_boxes, date_scores | |
# Function to display circular images in a matrix format | |
def display_images_in_grid(images, max_images_per_row=4): | |
num_images = len(images) | |
num_rows = (num_images + max_images_per_row - 1) // max_images_per_row # Calculate number of rows | |
for i in range(num_rows): | |
cols = st.columns(min(max_images_per_row, num_images - i * max_images_per_row)) | |
for j, img in enumerate(images[i * max_images_per_row:(i + 1) * max_images_per_row]): | |
with cols[j]: | |
st.image(img, use_column_width=True) | |
# Function to display team members in circular format | |
def display_team_members(members, max_members_per_row=4): | |
num_members = len(members) | |
num_rows = (num_members + max_members_per_row - 1) // max_members_per_row # Calculate number of rows | |
for i in range(num_rows): | |
cols = st.columns(min(max_members_per_row, num_members - i * max_members_per_row)) | |
for j, member in enumerate(members[i * max_members_per_row:(i + 1) * max_members_per_row]): | |
with cols[j]: | |
img = Image.open(member["image"]) # Load the image | |
circular_img = make_image_circular(img) # Convert to circular format | |
st.image(circular_img, use_column_width=True) # Display the circular image | |
st.write(member["name"]) # Display the name below the image | |
# Title and description | |
st.title("Amazon Smbhav") | |
# Team Details with links | |
st.sidebar.title("Amazon Smbhav") | |
st.sidebar.write("DELHI TECHNOLOGICAL UNIVERSITY") | |
# Navbar with task tabs | |
st.sidebar.title("Navigation") | |
st.sidebar.write("Team Name: sadhya") | |
app_mode = st.sidebar.selectbox("Choose the task", ["Welcome","Project Details", "Task 1","Team Details"]) | |
if app_mode == "Welcome": | |
# Navigation Menu | |
st.write("# Welcome to Amazon Smbhav! 🎉") | |
# Example for adding a local video | |
video_file = open('Finalist.mp4', 'rb') # Replace with the path to your video file | |
video_bytes = video_file.read() | |
# Embed the video using st.video() | |
st.video(video_bytes) | |
# Add a welcome image | |
welcome_image = Image.open("grid_banner.jpg") # Replace with the path to your welcome image | |
st.image(welcome_image, use_column_width=True) # Display the welcome image | |
elif app_mode=="Project Details": | |
st.markdown(""" | |
## Navigation | |
- [Project Overview](#project-overview) | |
- [Proposal Round](#proposal-round) | |
- [Problem Statement](#problem-statement) | |
- [Proposed Solution](#proposed-solution) | |
""") | |
# Project Overview | |
st.write("## Project Overview:") | |
st.write(""" | |
### Problem Statement | |
_Develop a system that automates Amazon product listings from social media content, extracting and organizing details from posts to generate accurate, engaging, and optimized listings._ | |
--- | |
### Solution Overview | |
Our system simplifies the listing process by analyzing social media content, using OCR, image recognition, LLMs, and internet data to create professional Amazon listings. | |
--- | |
### Task Breakdown | |
#### Task 1: OCR for Image and Label Details | |
**Objective:** Extract core product details from images, labels, and packaging found in social media posts. | |
- **Tools:** PaddleOCR, LLMs. | |
- **Approach:** | |
- Use PaddleOCR to scan images for text, identifying product names, brands, and key features. | |
- Apply LLMs to refine extracted data, categorize key information (product name, type, features), and enhance product descriptions. | |
- Integrate internet sources to cross-verify product details, retrieve additional information, and collect metadata like the brand background or product specs. | |
--- | |
#### Additional Task: Image Recognition & Object Counting | |
**Objective:** Quantify objects within social media images for batch products or multi-item listings. | |
- **Tools:** YOLOv8. | |
- **Approach:** | |
- Train YOLOv8 on a relevant dataset to recognize specific product types or packaging layouts. | |
- Use object detection counts to provide quantitative data (e.g., "3-item bundle"), enhancing accuracy in listings. | |
--- | |
#### Task 2: Data Validation & Structuring | |
**Objective:** Organize and validate extracted information, ensuring it’s formatted to meet Amazon’s listing requirements. | |
- **Tools:** Regex, LLMs. | |
- **Approach:** | |
- Format and validate extracted details into Amazon-compliant structures (titles, descriptions, bullet points). | |
- Use regex and parser tools for accuracy checks. | |
- Leverage LLMs to create compelling descriptions and marketing brochures. | |
- Search online for supplementary media (images/videos) to enrich the listing. | |
--- | |
#### Task 3: Amazon API Integration | |
**Objective:** Connect with Amazon’s API to publish fully formed product listings directly. | |
- **Tools:** Amazon MWS or Selling Partner API. | |
- **Approach:** | |
- Send structured listing data (text, media, product details) to Amazon’s API endpoints. | |
- Handle feedback for submission errors and make necessary adjustments. | |
- Develop a UI/dashboard for users to preview and edit listings before publishing. | |
--- | |
### Future Enhancements | |
- **Model Improvement:** Further refine OCR and parsing accuracy. | |
- **Dashboard Development:** Enable users to preview and customize listings. | |
- **Multi-Market Compatibility:** Expand support to other e-commerce platforms. | |
This approach automates listing creation directly from social media content, helping sellers quickly launch optimized Amazon product pages. | |
""") | |
elif app_mode == "Team Details": | |
st.write("## Meet Our Team:") | |
display_team_members(team_members) | |
st.write("Delhi Technological University") | |
elif app_mode == "Task 1": | |
st.write("## Task 1: 🖼️ OCR to Extract Details 📄") | |
st.write("Using OCR to extract details from product packaging material, including brand name and pack size.") | |
# Instantiate Instaloader | |
L = instaloader.Instaloader() | |
# Streamlit UI | |
st.title("Instagram Post Details Extractor") | |
# Text input for Instagram post URL | |
post_url = st.text_input("Enter Instagram Post URL:") | |
if post_url: | |
caption, image_path = get_instagram_post_details(post_url) | |
if image_path and os.path.exists(image_path): | |
st.subheader("Caption:") | |
st.write(caption) | |
st.subheader("Image:") | |
# Load and display the image | |
image = Image.open(image_path) | |
st.image(image, use_column_width=True) | |
# Clean up (optional) | |
os.remove(image_path) | |
else: | |
st.error("Failed to retrieve the post details. Please check the URL.") | |
# File uploader for images (supports multiple files) | |
uploaded_files = st.file_uploader("Upload images of products", type=["jpeg", "png", "jpg"], accept_multiple_files=True) | |
if uploaded_files: | |
st.write("### Uploaded Images in Circular Format:") | |
circular_images = [] | |
for uploaded_file in uploaded_files: | |
img = Image.open(uploaded_file) | |
circular_img = make_image_circular(img) # Create circular images | |
circular_images.append(circular_img) | |
# Display the circular images in a matrix/grid format | |
display_images_in_grid(circular_images, max_images_per_row=4) | |
# Function to simulate loading process with a progress bar | |
def simulate_progress(): | |
progress_bar = st.progress(0) | |
for percent_complete in range(100): | |
time.sleep(0.02) | |
progress_bar.progress(percent_complete + 1) | |
# Function to remove gibberish using regex (removes non-alphanumeric chars, filters out very short text) | |
def clean_text(text): | |
# Keep text with letters, digits, and spaces, and remove short/irrelevant text | |
return re.sub(r'[^a-zA-Z0-9\s]', '', text).strip() | |
# Function to extract the most prominent text (product name) and other details | |
def extract_product_info(results): | |
product_name = "" | |
product_details = "" | |
largest_text_size = 0 | |
for line in results: | |
for box in line: | |
text, confidence = box[1][0], box[1][1] | |
text_size = box[0][2][1] - box[0][0][1] # Calculate height of the text box | |
# Clean the text to avoid gibberish | |
clean_text_line = clean_text(text) | |
if confidence > 0.7 and len(clean_text_line) > 2: # Only consider confident, meaningful text | |
if text_size > largest_text_size: # Assume the largest text is the product name | |
largest_text_size = text_size | |
product_name = clean_text_line | |
else: | |
product_details += clean_text_line + " " | |
return product_name, product_details.strip() | |
if st.button("Start Analysis"): | |
simulate_progress() | |
# Loop through each uploaded image and process them | |
for uploaded_image in uploaded_files: | |
# Load the uploaded image | |
image = Image.open(uploaded_image) | |
# st.image(image, caption=f'Uploaded Image: {uploaded_image.name}', use_column_width=True) | |
# Convert image to numpy array for OCR processing | |
img_array = np.array(image) | |
# Perform OCR on the image | |
st.write(f"Extracting details from {uploaded_image.name}...") | |
result = ocr.ocr(img_array, cls=True) | |
############################# | |
#OCR result text to be parsed here through LLM and get product listing content. | |
messages.append({"role": "user", "content": ""}) | |
outputs = pipe( | |
messages, | |
max_new_tokens=256, | |
) | |
productListingContent = outputs[0]["generated_text"][-1] | |
st.markdown(productListingContent) | |
############################# | |
# Process the OCR result to extract product name and properties | |
product_name, product_details = extract_product_info(result) | |
# UI display for single image product details | |
st.markdown("---") | |
st.markdown(f"### **Product Name:** `{product_name}`") | |
st.write(f"**Product Properties:** {product_details}") | |
st.markdown("---") | |
else: | |
st.write("Please upload images to extract product details.") | |
def make_image_circular1(image): | |
# Create a circular mask | |
mask = Image.new("L", image.size, 0) | |
draw = ImageDraw.Draw(mask) | |
draw.ellipse((0, 0, image.size[0], image.size[1]), fill=255) | |
# Apply the mask to the image | |
circular_image = Image.new("RGB", image.size) | |
circular_image.paste(image.convert("RGBA"), (0, 0), mask) | |
return circular_image | |
def display_images_in_grid1(images, max_images_per_row=4): | |
rows = (len(images) + max_images_per_row - 1) // max_images_per_row # Calculate number of rows needed | |
for i in range(0, len(images), max_images_per_row): | |
cols_to_show = images[i:i + max_images_per_row] | |
# Prepare to display in a grid format | |
cols = st.columns(max_images_per_row) # Create columns dynamically | |
for idx, img in enumerate(cols_to_show): | |
img = img.convert("RGB") # Ensure the image is in RGB mode | |
if idx < len(cols): | |
cols[idx].image(img, use_column_width=True) | |
# Footer with animation | |
st.markdown(""" | |
<style> | |
@keyframes fade-in { | |
from { opacity: 0; } | |
to { opacity: 1;} | |
} | |
.footer { | |
text-align: center; | |
font-size: 1.1em; | |
animation: fade-in 2s; | |
padding-top: 2rem; | |
} | |
</style> | |
<div class="footer"> | |
<p>© 2024 Amazon Smbhav Challenge. All rights reserved.</p> | |
</div> | |
""", unsafe_allow_html=True) | |