Spaces:

AlexCool2024
/

video_screen

Running

File size: 2,827 Bytes

64fa793
 
 
a24791f
b71294f
 
 
 
 
c3906ca
 
 
 
 
 
e65e6e5
a24791f
64fa793
e65e6e5
64fa793
 
e65e6e5
4504b4a
 
64fa793
 
 
 
e65e6e5
64fa793
 
 
 
4504b4a
 
 
 
 
e65e6e5
4504b4a
b71294f
 
 
 
4504b4a
b71294f
 
 
 
 
 
9572852
b71294f
 
 
 
 
 
 
 
a24791f
b71294f
 
64fa793
4504b4a
64fa793
4504b4a
e65e6e5
4504b4a
 
07ecc9d

import streamlit as st
import numpy as np
import cv2
import tempfile
from gradio_client import Client
from PIL import Image

# Инициализация клиента для нового API
client = Client("https://pragnakalp-ocr-image-to-text.hf.space/--replicas/lhzf3/")
result = client.predict(
		PaddleOCR,	# Literal[PaddleOCR, EasyOCR, KerasOCR]  in 'Method' Radio component
		https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png,	# filepath  in 'img' Image component
							api_name="/predict"
)
print(result)

# Заголовок приложения
st.title("Video Frame to Image Description")

# Загрузка видеофайла
uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov"])

cap = None  # Инициализируем объект cap как None

if uploaded_file is not None:
    # Создаем временный файл для хранения видео
    tfile = tempfile.NamedTemporaryFile(delete=False)
    tfile.write(uploaded_file.read())
    
    # Захват видео
    cap = cv2.VideoCapture(tfile.name)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    if length > 0:
        # Выбор случайного кадра
        random_frame = np.random.randint(length)
        cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame)
        ret, frame = cap.read()

        if ret:
            # Конвертация кадра OpenCV в PIL Image
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(frame_rgb)

            # Отображение выбранного кадра
            st.image(pil_image, caption=f"Random Frame {random_frame}")

            # Сохранение изображения в байты
            buf = tempfile.NamedTemporaryFile(suffix='.jpg', delete=False)
            pil_image.save(buf, format='JPEG')
            buf.close()

            # Отправка изображения в новый API
            try:
                result = client.predict(
                    "PaddleOCR",  # Метод для использования
                    buf.name,  # Путь к изображению
                    api_name="/predict"
                )
                description = result['data']
                st.success(f"Generated Description: {description}")
            except Exception as e:
                st.error(f"Error: Could not get a response from the model. {str(e)}")
        else:
            st.error("Error: Could not read a frame from the video.")
    else:
        st.error("Error: Video file does not contain any frames.")

# Проверяем, был ли cap создан, и только тогда освобождаем ресурсы
if cap is not None:
    cap.release()