Spaces:

Sabbah13
/

Video_Call_Recording_and_Transcription_API_Service

Runtime error

App Files Files Community

Sabbah13 commited on Sep 12

Commit

f37f889

•

1 Parent(s): a24ffe8

Create main.py

Browse files

Files changed (1) hide show

main.py +176 -0

main.py ADDED Viewed

	@@ -0,0 +1,176 @@

+from fastapi import FastAPI, HTTPException, Body, Request, File, UploadFile, BackgroundTasks, Form, Depends
+from pydantic import BaseModel, constr
+from huggingface_hub import HfApi
+from fastapi.security import OAuth2PasswordBearer
+from typing import Optional, Dict
+import httpx
+import os
+import asyncio
+import logging
+from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens, process_transcribation_with_gigachat
+from openai_requests import get_completion_from_openai, process_transcribation_with_assistant
+repo_id = os.getenv('HF_SPACE_NAME')
+api = HfApi()
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+# Настройка логгера
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# Создание обработчика для вывода в консоль
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+console_handler.setFormatter(formatter)
+logger.addHandler(console_handler)
+app = FastAPI()
+# Определяем модель данных для запроса
+class TranscriptRequest(BaseModel):
+    transcript: str
+    json_transcript: Dict
+    final_url: str
+    llm: str
+    base_prompt: str
+    proccess_prompt: str
+    need_proccessing: bool
+class FinalRequest(BaseModel):
+    transcript: str
+    proccessed_transcript: str
+    summary: str
+    json_transcript: Dict
+def verify_token(token: str = Depends(oauth2_scheme)):
+    if token != os.environ.get("AUTH_TOKEN"):
+        raise HTTPException(status_code=401, detail="Invalid token")
+# Главная страница с текстом "server is running"
+@app.get("/")
+async def read_root():
+    return {"text": "server is running"}
+@app.post("/test")
+def echo_text(text_request: FinalRequest):
+    logger.info(f"Final endpoint received transcript! Transcript: {text_request.transcript}.\n Proccessed transcript: {text_request.proccessed_transcript}.\n Json transcript: {text_request.json_transcript} Summary: {text_request.summary}")
+    return {"transcript": text_request.transcript, "summary": text_request.summary}
+async def send_to_llm(transcript_request: TranscriptRequest):
+    transcript = transcript_request.transcript
+    base_prompt = transcript_request.base_prompt
+    llm = transcript_request.llm
+    need_proccessing = transcript_request.need_proccessing
+    processing_prompt = transcript_request.proccess_prompt
+    proccessed_transcript = ''
+    if (llm == 'GigaChat'):
+        access_token = get_access_token()
+        logger.info('Got access token for GigaChat')
+    if (need_proccessing):
+        logger.info('Strarting proccessing')
+        if (llm == 'GigaChat'):
+            number_of_tokens = get_number_of_tokens(transcript, access_token)
+            logger.info('Количество токенов в транскрибации: ' + str(number_of_tokens))
+            proccessed_transcript = process_transcribation_with_gigachat(processing_prompt, transcript, number_of_tokens + 1000, access_token)
+            logger.info('Proccessed transcript: ' + transcript)
+        elif (llm == 'ChatGPT'):
+            proccessed_transcript = process_transcribation_with_assistant(processing_prompt, transcript)
+            logger.info('Proccessed transcript: ' + transcript)
+    logger.info('Strarting summarization')
+    transcript_for_summary = proccessed_transcript if need_proccessing else transcript
+    # Получение саммари
+    if (llm == 'GigaChat'):
+        summary_answer = get_completion_from_gigachat(base_prompt + transcript_for_summary, 1024, access_token)
+    elif (llm == 'ChatGPT'):
+        summary_answer = get_completion_from_openai(base_prompt + transcript_for_summary, 1024)
+    async with httpx.AsyncClient() as client:
+        response = await client.post(transcript_request.final_url, json={"transcript": transcript, "json_transcript": transcript_request.json_transcript, 'proccessed_transcript': proccessed_transcript, "summary": summary_answer})
+@app.post("/send_transcript")
+async def send_transcript(transcript_request: TranscriptRequest, background_tasks: BackgroundTasks = BackgroundTasks(), token: str = Depends(verify_token)):
+    logger.info('Got transcript, starting summarization. Your llm is ' + transcript_request.llm)
+    background_tasks.add_task(send_to_llm, transcript_request)
+    return {"message": "Transcript received, sending to llm"}
+async def restart_and_check_space(repo_id):
+    # Перезапускаем пространство
+    logger.info('Restarting space')
+    api.restart_space(repo_id=repo_id)
+    # Проверяем статус каждые 15 секунд
+    while True:
+        run_time = api.get_space_runtime(repo_id=repo_id)
+        if run_time.stage == 'RUNNING':
+            logger.info('Transcribation space is running, sending file')
+            break
+        else:
+            logger.info('Waiting for space to be running...')
+            await asyncio.sleep(15)
+async def send_file_to_transcribation(url: str, file: UploadFile, llm: str, base_prompt: str, proccess_prompt: str, need_proccessing: bool, max_speakes: int, min_speakers: int):
+    run_time = api.get_space_runtime(repo_id=repo_id)
+    if run_time.stage != 'RUNNING':
+        await restart_and_check_space(repo_id)
+    else:
+        logger.info('Transcribation space is running, sending file')
+    async with httpx.AsyncClient() as client:
+        # Считываем содержимое файла в байты
+        file_content = await file.read()
+        files = {
+            'file': ('file', file_content, file.content_type),
+            'transcript_url': (None, os.getenv('HF_TRANSCRIPT_URL')),
+        }
+        data = {
+            'final_url': str(url),
+            'llm': str(llm),
+            'base_prompt': str(base_prompt),
+            'proccess_prompt': str(proccess_prompt),
+            'need_proccessing': need_proccessing,
+            'max_speakers': max_speakes,
+            'min_speakers': min_speakers,
+        }
+        headers = {
+            'Authorization': f'Bearer {os.environ.get("HF_TOKEN")}'
+        }
+        response = await client.post(os.getenv('HF_TRANSCRIBATION_SPACE_URL'), headers=headers, files=files, data=data)
+        logger.info(f"Status code: {response.status_code}, Data: {response.text}")
+@app.post("/upload")
+def upload_file(file: UploadFile = File(...),
+                url: str = Form(...),
+                llm: str = Form(...),
+                base_prompt: str = Form(...),
+                proccess_prompt: str = Form(...),
+                need_proccessing: bool = Form(...),
+                max_speakers: Optional[int] = Form(None),
+                min_speakers: Optional[int] = Form(None),
+                background_tasks: BackgroundTasks = BackgroundTasks(),
+                token: str = Depends(verify_token)):
+    if (llm != 'GigaChat' and llm != 'ChatGPT'):
+        raise HTTPException(status_code=422, detail='Llm must be GigaChat or ChatGPT')
+    background_tasks.add_task(send_file_to_transcribation, url, file, llm, base_prompt, proccess_prompt, need_proccessing, max_speakers, min_speakers)
+    return {"message": "Got file with name: " + file.filename + ', After proccessing, transcript will be sent to ' + url }