|
from enum import Enum |
|
from types import MappingProxyType |
|
from typing import List, Set, Mapping |
|
|
|
""" |
|
Base Enums/Consts |
|
""" |
|
|
|
|
|
class FileType(Enum): |
|
AAC = "AAC" |
|
CSV = "CSV" |
|
DOC = "DOC" |
|
DOCX = "DOCX" |
|
FLAC = "FLAC" |
|
FLV = "FLV" |
|
GIF = "GIF" |
|
GOOGLE_DOC = "GOOGLE_DOC" |
|
GOOGLE_DRAWINGS = "GOOGLE_DRAWINGS" |
|
GOOGLE_SHEETS = "GOOGLE_SHEETS" |
|
GOOGLE_SLIDES = "GOOGLE_SLIDES" |
|
HEIC = "HEIC" |
|
HEIF = "HEIF" |
|
HTML = "HTML" |
|
JPEG = "JPEG" |
|
JSON = "JSON" |
|
M4A = "M4A" |
|
M4V = "M4V" |
|
MOV = "MOV" |
|
MP3 = "MP3" |
|
MP4 = "MP4" |
|
MPEG = "MPEG" |
|
MPEGPS = "MPEGPS" |
|
MPG = "MPG" |
|
MPA = "MPA" |
|
MPGA = "MPGA" |
|
OGG = "OGG" |
|
OPUS = "OPUS" |
|
PDF = "PDF" |
|
PCM = "PCM" |
|
PNG = "PNG" |
|
PPT = "PPT" |
|
PPTX = "PPTX" |
|
RTF = "RTF" |
|
THREE_GPP = "3GPP" |
|
TXT = "TXT" |
|
WAV = "WAV" |
|
WEBM = "WEBM" |
|
WEBP = "WEBP" |
|
WMV = "WMV" |
|
XLS = "XLS" |
|
XLSX = "XLSX" |
|
|
|
|
|
FILE_EXTENSIONS: Mapping[FileType, List[str]] = MappingProxyType( |
|
{ |
|
FileType.AAC: ["aac"], |
|
FileType.CSV: ["csv"], |
|
FileType.DOC: ["doc"], |
|
FileType.DOCX: ["docx"], |
|
FileType.FLAC: ["flac"], |
|
FileType.FLV: ["flv"], |
|
FileType.GIF: ["gif"], |
|
FileType.GOOGLE_DOC: ["gdoc"], |
|
FileType.GOOGLE_DRAWINGS: ["gdraw"], |
|
FileType.GOOGLE_SHEETS: ["gsheet"], |
|
FileType.GOOGLE_SLIDES: ["gslides"], |
|
FileType.HEIC: ["heic"], |
|
FileType.HEIF: ["heif"], |
|
FileType.HTML: ["html", "htm"], |
|
FileType.JPEG: ["jpeg", "jpg"], |
|
FileType.JSON: ["json"], |
|
FileType.M4A: ["m4a"], |
|
FileType.M4V: ["m4v"], |
|
FileType.MOV: ["mov"], |
|
FileType.MP3: ["mp3"], |
|
FileType.MP4: ["mp4"], |
|
FileType.MPEG: ["mpeg"], |
|
FileType.MPEGPS: ["mpegps"], |
|
FileType.MPG: ["mpg"], |
|
FileType.MPA: ["mpa"], |
|
FileType.MPGA: ["mpga"], |
|
FileType.OGG: ["ogg"], |
|
FileType.OPUS: ["opus"], |
|
FileType.PDF: ["pdf"], |
|
FileType.PCM: ["pcm"], |
|
FileType.PNG: ["png"], |
|
FileType.PPT: ["ppt"], |
|
FileType.PPTX: ["pptx"], |
|
FileType.RTF: ["rtf"], |
|
FileType.THREE_GPP: ["3gpp"], |
|
FileType.TXT: ["txt"], |
|
FileType.WAV: ["wav"], |
|
FileType.WEBM: ["webm"], |
|
FileType.WEBP: ["webp"], |
|
FileType.WMV: ["wmv"], |
|
FileType.XLS: ["xls"], |
|
FileType.XLSX: ["xlsx"], |
|
} |
|
) |
|
|
|
FILE_MIME_TYPES: Mapping[FileType, str] = MappingProxyType( |
|
{ |
|
FileType.AAC: "audio/aac", |
|
FileType.CSV: "text/csv", |
|
FileType.DOC: "application/msword", |
|
FileType.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
|
FileType.FLAC: "audio/flac", |
|
FileType.FLV: "video/x-flv", |
|
FileType.GIF: "image/gif", |
|
FileType.GOOGLE_DOC: "application/vnd.google-apps.document", |
|
FileType.GOOGLE_DRAWINGS: "application/vnd.google-apps.drawing", |
|
FileType.GOOGLE_SHEETS: "application/vnd.google-apps.spreadsheet", |
|
FileType.GOOGLE_SLIDES: "application/vnd.google-apps.presentation", |
|
FileType.HEIC: "image/heic", |
|
FileType.HEIF: "image/heif", |
|
FileType.HTML: "text/html", |
|
FileType.JPEG: "image/jpeg", |
|
FileType.JSON: "application/json", |
|
FileType.M4A: "audio/x-m4a", |
|
FileType.M4V: "video/x-m4v", |
|
FileType.MOV: "video/quicktime", |
|
FileType.MP3: "audio/mpeg", |
|
FileType.MP4: "video/mp4", |
|
FileType.MPEG: "video/mpeg", |
|
FileType.MPEGPS: "video/mpegps", |
|
FileType.MPG: "video/mpg", |
|
FileType.MPA: "audio/m4a", |
|
FileType.MPGA: "audio/mpga", |
|
FileType.OGG: "audio/ogg", |
|
FileType.OPUS: "audio/opus", |
|
FileType.PDF: "application/pdf", |
|
FileType.PCM: "audio/pcm", |
|
FileType.PNG: "image/png", |
|
FileType.PPT: "application/vnd.ms-powerpoint", |
|
FileType.PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation", |
|
FileType.RTF: "application/rtf", |
|
FileType.THREE_GPP: "video/3gpp", |
|
FileType.TXT: "text/plain", |
|
FileType.WAV: "audio/wav", |
|
FileType.WEBM: "video/webm", |
|
FileType.WEBP: "image/webp", |
|
FileType.WMV: "video/wmv", |
|
FileType.XLS: "application/vnd.ms-excel", |
|
FileType.XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
} |
|
) |
|
|
|
""" |
|
Util Functions |
|
""" |
|
|
|
|
|
def get_file_extension_from_mime_type(mime_type: str) -> str: |
|
for file_type, mime in FILE_MIME_TYPES.items(): |
|
if mime.lower() == mime_type.lower(): |
|
return FILE_EXTENSIONS[file_type][0] |
|
raise ValueError(f"Unknown extension for mime type: {mime_type}") |
|
|
|
|
|
def get_file_type_from_extension(extension: str) -> FileType: |
|
for file_type, extensions in FILE_EXTENSIONS.items(): |
|
if extension.lower() in extensions: |
|
return file_type |
|
|
|
raise ValueError(f"Unknown file type for extension: {extension}") |
|
|
|
|
|
def get_file_extension_for_file_type(file_type: FileType) -> str: |
|
return FILE_EXTENSIONS[file_type][0] |
|
|
|
|
|
def get_file_mime_type_for_file_type(file_type: FileType) -> str: |
|
return FILE_MIME_TYPES[file_type] |
|
|
|
|
|
def get_file_mime_type_from_extension(extension: str) -> str: |
|
file_type = get_file_type_from_extension(extension) |
|
return get_file_mime_type_for_file_type(file_type) |
|
|
|
|
|
""" |
|
FileType Type Groupings (Videos, Images, etc) |
|
""" |
|
|
|
|
|
IMAGE_FILE_TYPES = { |
|
FileType.PNG, |
|
FileType.JPEG, |
|
FileType.GIF, |
|
FileType.WEBP, |
|
FileType.HEIC, |
|
FileType.HEIF, |
|
} |
|
|
|
|
|
def is_image_file_type(file_type): |
|
return file_type in IMAGE_FILE_TYPES |
|
|
|
|
|
|
|
VIDEO_FILE_TYPES = { |
|
FileType.MOV, |
|
FileType.MP4, |
|
FileType.MPEG, |
|
FileType.M4V, |
|
FileType.FLV, |
|
FileType.MPEGPS, |
|
FileType.MPG, |
|
FileType.WEBM, |
|
FileType.WMV, |
|
FileType.THREE_GPP, |
|
} |
|
|
|
|
|
def is_video_file_type(file_type): |
|
return file_type in VIDEO_FILE_TYPES |
|
|
|
|
|
|
|
AUDIO_FILE_TYPES = { |
|
FileType.AAC, |
|
FileType.FLAC, |
|
FileType.MP3, |
|
FileType.MPA, |
|
FileType.MPGA, |
|
FileType.OPUS, |
|
FileType.PCM, |
|
FileType.WAV, |
|
} |
|
|
|
|
|
def is_audio_file_type(file_type): |
|
return file_type in AUDIO_FILE_TYPES |
|
|
|
|
|
|
|
TEXT_FILE_TYPES = {FileType.CSV, FileType.HTML, FileType.RTF, FileType.TXT} |
|
|
|
|
|
def is_text_file_type(file_type): |
|
return file_type in TEXT_FILE_TYPES |
|
|
|
|
|
""" |
|
Other FileType Groupings |
|
""" |
|
|
|
|
|
GEMINI_1_5_ACCEPTED_FILE_TYPES: Set[FileType] = { |
|
|
|
FileType.PNG, |
|
FileType.JPEG, |
|
|
|
FileType.AAC, |
|
FileType.FLAC, |
|
FileType.MP3, |
|
FileType.MPA, |
|
FileType.MPGA, |
|
FileType.OPUS, |
|
FileType.PCM, |
|
FileType.WAV, |
|
|
|
FileType.FLV, |
|
FileType.MOV, |
|
FileType.MPEG, |
|
FileType.MPEGPS, |
|
FileType.MPG, |
|
FileType.MP4, |
|
FileType.WEBM, |
|
FileType.WMV, |
|
FileType.THREE_GPP, |
|
|
|
FileType.PDF, |
|
} |
|
|
|
|
|
def is_gemini_1_5_accepted_file_type(file_type: FileType) -> bool: |
|
return file_type in GEMINI_1_5_ACCEPTED_FILE_TYPES |
|
|