Spaces:

astro21
/

resume-revealer

Sleeping

File size: 3,428 Bytes

da7be98
 
 
 
 
 
 
 
 
 
 
 
ad8446f
 
 
da7be98

import os
import shutil
from dedoc import DedocManager
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from ResumeStructure import ResumeStructure
from fastapi import UploadFile
from prompt_template import template_format_instructions, template
from typing import List

# Create a directory to store temporary files
TEMP_DIR = "/code/temp_files"
# if not os.path.exists(TEMP_DIR):
#     os.makedirs(TEMP_DIR)


async def process_file_with_dedoc(file: UploadFile):
    """
    Process the file using Dedoc and return the output data.

    Args:
    - file: The UploadedFile object to be processed.

    Returns:
    - Output data if the file is processed successfully, None otherwise.
    """
    manager = DedocManager()

    supported_formats = ['jpg', 'jpeg', 'png', 'docx', 'pdf', 'html', 'doc']

    print(f"Processing file '{file.filename}'...")

    # Save the uploaded file to a temporary directory
    file_path = os.path.join(TEMP_DIR, file.filename)

    with open(file_path, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)

    # Extract file extension from the file name
    file_name, file_extension = os.path.splitext(file.filename)
    file_extension = file_extension[1:].lower()  # Remove the leading dot and convert to lowercase

    # Check if the file extension is supported
    if file_extension not in supported_formats:
        print(f"Cannot process file '{file.filename}'. Unsupported file format.")
        return None

    # Process the file using Dedoc
    output = manager.parse(file_path)
    output_data = output.to_api_schema().model_dump()

    # Remove the temporary file
    os.remove(file_path)

    return output_data


async def extract_text_from_all_levels(data):
    """
    Extract text from all levels of subparagraphs in the JSON data.

    Args:
    - data: The JSON data containing subparagraphs.

    Returns:
    - A string containing the text from all levels of subparagraphs.
    """
    text = ""

    if 'subparagraphs' in data['content']['structure']:
        subparagraphs = data['content']['structure']['subparagraphs']
        text += await extract_text_from_subparagraphs(subparagraphs)
    return text


async def extract_text_from_subparagraphs(subparagraphs):
    """
    Recursively extract text from subparagraphs.

    Args:
    - subparagraphs: A list of subparagraphs.

    Returns:
    - A string containing the text from all subparagraphs.
    """
    text = ""
    for subpara in subparagraphs:
        text += subpara['text'] + "\n"
        if 'subparagraphs' in subpara:
            text += await extract_text_from_subparagraphs(subpara['subparagraphs'])
    return text


def generate_formatted_resume(resume, chat_llm):
    prompt = PromptTemplate(
        template=template,
        input_variables=["text"],
    )
    chain = prompt | chat_llm

    result = chain.invoke({"text": resume})

    return result.content


def generate_json_structured_resume(resume, chat_llm):
    parser = JsonOutputParser(pydantic_object=ResumeStructure)

    prompt = PromptTemplate(
        template=template_format_instructions,
        input_variables=["text"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    chain = prompt | chat_llm | parser

    result = chain.invoke({"text": resume})

    return result