Spaces:

WpythonW
/

interface

Sleeping

File size: 6,052 Bytes

8ae97f5

import os
import cv2
import torch
import pandas as pd
from tqdm import tqdm
from ultralytics import YOLO
from PIL import Image
import pillow_heif
import numpy as np

class MediaProcessor:
    def __init__(self, output_path, model_path, batch_size=16):
        self.output_path = output_path
        self.model_path = model_path
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = YOLO(self.model_path).to(self.device)
        self.colors = {
            0: (255, 0, 0),    # quadrotor - красный
            1: (0, 255, 0),    # airplane - зеленый
            2: (0, 0, 255),    # helicopter - синий
            3: (255, 255, 0),  # bird - желтый
            4: (255, 0, 255)   # uav-plane - фиолетовый
        }
        self.batch_size = batch_size

    def process_single_video(self, video_path):
        cap = cv2.VideoCapture(video_path)
        output_video_path = os.path.join(self.output_path, os.path.basename(video_path))
        fourcc = cv2.VideoWriter_fourcc(*'avc1')#*'avc1')
        fps = cap.get(cv2.CAP_PROP_FPS)
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (int(cap.get(3)), int(cap.get(4))))

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frames = []

        columns = ['frame_num', 'timestamp', 'class', 'confidence', 'x1', 'y1', 'x2', 'y2']
        data = []

        frame_num = 0

        with tqdm(total=total_frames, desc=f"Processing Video {os.path.basename(video_path)}", position=0, leave=True) as pbar:
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                frames.append(frame)
                frame_num += 1

                if len(frames) == self.batch_size or frame_num == total_frames:
                    results = self.model(frames, verbose=False)
                    
                    for i, result in enumerate(results):
                        current_frame_num = frame_num - len(frames) + i + 1
                        timestamp = current_frame_num / fps
                        for box in result.boxes:
                            x1, y1, x2, y2 = box.xyxy[0].tolist()
                            conf = box.conf[0].item()
                            cls = box.cls[0].item()
                            label = f'{self.model.names[int(cls)]} {conf:.2f}'
                            color = self.colors.get(int(cls), (0, 255, 0))
                            cv2.rectangle(frames[i], (int(x1), int(y1)), (int(x2), int(y2)), color, 1)
                            cv2.putText(frames[i], label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

                            data.append([current_frame_num, timestamp, self.model.names[int(cls)], conf, int(x1), int(y1), int(x2), int(y2)])

                        out.write(frames[i])
                        pbar.update(1)

                    frames = []

        cap.release()
        out.release()
        cv2.destroyAllWindows()

        df = pd.DataFrame(data, columns=columns)
        df.to_csv(os.path.join('metadata', f"{os.path.basename(video_path)}_detection_results.csv"), index=False)
        print(df)
        return output_video_path

    def load_image(self, path):
        if path.lower().endswith('.heic'):
            heif_file = pillow_heif.open_heif(path)
            image = Image.frombytes(
                heif_file.mode,
                heif_file.size,
                heif_file.data,
                "raw",
                heif_file.mode,
                heif_file.stride,
            )
            return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        else:
            return cv2.imread(path)

    def process_images(self, input_paths):
        images = [self.load_image(path) for path in input_paths]
        results = self.model(images, verbose=False)
        #print(results)
        processed_images = []

        for i, result in enumerate(results):
            for box in result.boxes:
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                conf = box.conf[0].item()
                cls = box.cls[0].item()
                label = f'{self.model.names[int(cls)]} {conf:.2f}'
                color = self.colors.get(int(cls), (0, 255, 0))
                cv2.rectangle(images[i], (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
                cv2.putText(images[i], label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            # Сохраняем все изображения в формате PNG
            processed_image_path = os.path.join(self.output_path, str(os.path.splitext(os.path.basename(input_paths[i]))[0]) + '.png')
            print(f"Сохранение изображения по пути: {processed_image_path}")
            processed_image = Image.fromarray(cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB))
            processed_image.save(processed_image_path, format='PNG')
            processed_images.append(processed_image_path)

        return processed_images

    def process_videos(self, input_paths):
        vids = []
        for video_path in input_paths:
            output_video_path = self.process_single_video(video_path)
            vids.append(output_video_path)
        return vids

def process_media(input_paths, processor):
    image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.heic', '.heif', '.webp')
    video_extensions = ('.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.webm')

    image_paths = [path for path in input_paths if path.lower().endswith(image_extensions)]
    video_paths = [path for path in input_paths if path.lower().endswith(video_extensions)]

    imgs, vids = [], []

    if image_paths:
        imgs = processor.process_images(image_paths)
    if video_paths:
        vids = processor.process_videos(video_paths)
    return imgs, vids