File size: 4,113 Bytes
da7be98
 
 
 
 
 
 
 
 
 
 
1b6cc04
 
da7be98
7366e67
ad8446f
 
da7be98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b6cc04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import shutil
from dedoc import DedocManager
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from ResumeStructure import ResumeStructure
from fastapi import UploadFile
from prompt_template import template_format_instructions, template
from typing import List



# Create a directory to store temporary files
TEMP_DIR = "/temp_files"
# if not os.path.exists(TEMP_DIR):
#     os.makedirs(TEMP_DIR)


async def process_file_with_dedoc(file: UploadFile):
    """
    Process the file using Dedoc and return the output data.

    Args:
    - file: The UploadedFile object to be processed.

    Returns:
    - Output data if the file is processed successfully, None otherwise.
    """
    manager = DedocManager()

    supported_formats = ['jpg', 'jpeg', 'png', 'docx', 'pdf', 'html', 'doc']

    print(f"Processing file '{file.filename}'...")

    # Save the uploaded file to a temporary directory
    file_path = os.path.join(TEMP_DIR, file.filename)

    with open(file_path, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)

    # Extract file extension from the file name
    file_name, file_extension = os.path.splitext(file.filename)
    file_extension = file_extension[1:].lower()  # Remove the leading dot and convert to lowercase

    # Check if the file extension is supported
    if file_extension not in supported_formats:
        print(f"Cannot process file '{file.filename}'. Unsupported file format.")
        return None

    # Process the file using Dedoc
    output = manager.parse(file_path)
    output_data = output.to_api_schema().model_dump()

    # Remove the temporary file
    os.remove(file_path)

    return output_data


async def extract_text_from_all_levels(data):
    """
    Extract text from all levels of subparagraphs in the JSON data.

    Args:
    - data: The JSON data containing subparagraphs.

    Returns:
    - A string containing the text from all levels of subparagraphs.
    """
    text = ""

    if 'subparagraphs' in data['content']['structure']:
        subparagraphs = data['content']['structure']['subparagraphs']
        text += await extract_text_from_subparagraphs(subparagraphs)
    return text


async def extract_text_from_subparagraphs(subparagraphs):
    """
    Recursively extract text from subparagraphs.

    Args:
    - subparagraphs: A list of subparagraphs.

    Returns:
    - A string containing the text from all subparagraphs.
    """
    text = ""
    for subpara in subparagraphs:
        text += subpara['text'] + "\n"
        if 'subparagraphs' in subpara:
            text += await extract_text_from_subparagraphs(subpara['subparagraphs'])
    return text


def generate_formatted_resume(resume, chat_llm):
    prompt = PromptTemplate(
        template=template,
        input_variables=["text"],
    )
    chain = prompt | chat_llm

    result = chain.invoke({"text": resume})

    return result.content


def generate_json_structured_resume(resume, chat_llm):
    parser = JsonOutputParser(pydantic_object=ResumeStructure)

    prompt = PromptTemplate(
        template=template_format_instructions,
        input_variables=["text"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    chain = prompt | chat_llm | parser

    result = chain.invoke({"text": resume})

    return result


def delete_files_in_directory(directory):
    """
    Deletes all files in the specified directory.

    Args:
    directory (str): The path to the directory containing files to be deleted.

    Returns:
    None
    """
    # Check if the directory exists
    if not os.path.exists(directory):
        print(f"Directory '{directory}' does not exist.")
        return

    # Get a list of all files in the directory
    files = os.listdir(directory)

    # Iterate over each file and delete it
    for file in files:
        file_path = os.path.join(directory, file)
        if os.path.isfile(file_path):
            os.remove(file_path)
            print(f"Deleted file: {file_path}")