import os import shutil from dedoc import DedocManager from langchain.chat_models import ChatOpenAI from langchain.prompts import PromptTemplate from langchain_core.output_parsers import JsonOutputParser from ResumeStructure import ResumeStructure from fastapi import UploadFile from prompt_template import template_format_instructions, template from typing import List # Create a directory to store temporary files TEMP_DIR = "/temp_files" # if not os.path.exists(TEMP_DIR): # os.makedirs(TEMP_DIR) async def process_file_with_dedoc(file: UploadFile): """ Process the file using Dedoc and return the output data. Args: - file: The UploadedFile object to be processed. Returns: - Output data if the file is processed successfully, None otherwise. """ manager = DedocManager() supported_formats = ['jpg', 'jpeg', 'png', 'docx', 'pdf', 'html', 'doc'] print(f"Processing file '{file.filename}'...") # Save the uploaded file to a temporary directory file_path = os.path.join(TEMP_DIR, file.filename) with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) # Extract file extension from the file name file_name, file_extension = os.path.splitext(file.filename) file_extension = file_extension[1:].lower() # Remove the leading dot and convert to lowercase # Check if the file extension is supported if file_extension not in supported_formats: print(f"Cannot process file '{file.filename}'. Unsupported file format.") return None # Process the file using Dedoc output = manager.parse(file_path) output_data = output.to_api_schema().model_dump() # Remove the temporary file os.remove(file_path) return output_data async def extract_text_from_all_levels(data): """ Extract text from all levels of subparagraphs in the JSON data. Args: - data: The JSON data containing subparagraphs. Returns: - A string containing the text from all levels of subparagraphs. """ text = "" if 'subparagraphs' in data['content']['structure']: subparagraphs = data['content']['structure']['subparagraphs'] text += await extract_text_from_subparagraphs(subparagraphs) return text async def extract_text_from_subparagraphs(subparagraphs): """ Recursively extract text from subparagraphs. Args: - subparagraphs: A list of subparagraphs. Returns: - A string containing the text from all subparagraphs. """ text = "" for subpara in subparagraphs: text += subpara['text'] + "\n" if 'subparagraphs' in subpara: text += await extract_text_from_subparagraphs(subpara['subparagraphs']) return text def generate_formatted_resume(resume, chat_llm): prompt = PromptTemplate( template=template, input_variables=["text"], ) chain = prompt | chat_llm result = chain.invoke({"text": resume}) return result.content def generate_json_structured_resume(resume, chat_llm): parser = JsonOutputParser(pydantic_object=ResumeStructure) prompt = PromptTemplate( template=template_format_instructions, input_variables=["text"], partial_variables={"format_instructions": parser.get_format_instructions()} ) chain = prompt | chat_llm | parser result = chain.invoke({"text": resume}) return result def delete_files_in_directory(directory): """ Deletes all files in the specified directory. Args: directory (str): The path to the directory containing files to be deleted. Returns: None """ # Check if the directory exists if not os.path.exists(directory): print(f"Directory '{directory}' does not exist.") return # Get a list of all files in the directory files = os.listdir(directory) # Iterate over each file and delete it for file in files: file_path = os.path.join(directory, file) if os.path.isfile(file_path): os.remove(file_path) print(f"Deleted file: {file_path}")