Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,110 @@
|
|
1 |
from dotenv import load_dotenv
|
|
|
2 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
load_dotenv()
|
5 |
|
6 |
-
|
|
|
|
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from dotenv import load_dotenv
|
2 |
+
import io
|
3 |
import streamlit as st
|
4 |
+
from langchain.prompts import PromptTemplate
|
5 |
+
from langchain.output_parsers import PydanticOutputParser
|
6 |
+
from langchain_community.chat_models import ChatAnthropic
|
7 |
+
from langchain_openai import ChatOpenAI
|
8 |
+
from pydantic import ValidationError
|
9 |
+
from resume_template import Resume
|
10 |
+
from json import JSONDecodeError
|
11 |
+
import PyPDF2
|
12 |
+
import json
|
13 |
|
14 |
load_dotenv()
|
15 |
|
16 |
+
def pdf_to_string(file):
|
17 |
+
"""
|
18 |
+
Convert a PDF file to a string.
|
19 |
|
20 |
+
Parameters:
|
21 |
+
file (io.BytesIO): A file-like object representing the PDF file.
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
str: The extracted text from the PDF.
|
25 |
+
"""
|
26 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
27 |
+
num_pages = len(pdf_reader.pages)
|
28 |
+
text = ''
|
29 |
+
for i in range(num_pages):
|
30 |
+
page = pdf_reader.pages[i]
|
31 |
+
text += page.extract_text()
|
32 |
+
file.close()
|
33 |
+
return text
|
34 |
+
|
35 |
+
def extract_resume_fields(full_text, model):
|
36 |
+
"""
|
37 |
+
Analyze a resume text and extract structured information using a specified language model.
|
38 |
+
|
39 |
+
Parameters:
|
40 |
+
full_text (str): The text content of the resume.
|
41 |
+
model (str): The language model object to use for processing the text.
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
dict: A dictionary containing structured information extracted from the resume.
|
45 |
+
"""
|
46 |
+
# The Resume object is imported from the local resume_template file
|
47 |
+
|
48 |
+
with open("prompts/resume_extraction.prompt", "r") as f:
|
49 |
+
template = f.read()
|
50 |
+
|
51 |
+
parser = PydanticOutputParser(pydantic_object=Resume)
|
52 |
+
|
53 |
+
prompt_template = PromptTemplate(
|
54 |
+
template=template,
|
55 |
+
input_variables=["resume"],
|
56 |
+
partial_variables={"response_template": parser.get_format_instructions()},
|
57 |
+
)
|
58 |
+
# Invoke the language model and process the resume
|
59 |
+
formatted_input = prompt_template.format_prompt(resume=full_text)
|
60 |
+
llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
|
61 |
+
# print("llm", llm)
|
62 |
+
output = llm.invoke(formatted_input.to_string())
|
63 |
+
|
64 |
+
# print(output) # Print the output object for debugging
|
65 |
+
|
66 |
+
try:
|
67 |
+
parsed_output = parser.parse(output.content)
|
68 |
+
json_output = parsed_output.json()
|
69 |
+
print(json_output)
|
70 |
+
return json_output
|
71 |
+
|
72 |
+
except ValidationError as e:
|
73 |
+
print(f"Validation error: {e}")
|
74 |
+
print(output)
|
75 |
+
return output.content
|
76 |
+
|
77 |
+
except JSONDecodeError as e:
|
78 |
+
print(f"JSONDecodeError error: {e}")
|
79 |
+
print(output)
|
80 |
+
return output.content
|
81 |
+
|
82 |
+
st.title("Resume Parser")
|
83 |
+
|
84 |
+
# Set up the LLM dictionary
|
85 |
+
llm_dict = {
|
86 |
+
"gpt-4-1106-preview": ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
|
87 |
+
"gpt-4": ChatOpenAI(temperature=0, model="gpt-4"),
|
88 |
+
"gpt-3.5-turbo-1106": ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106"),
|
89 |
+
"claude-2": ChatAnthropic(model="claude-2", max_tokens=20_000),
|
90 |
+
"claude-instant-1": ChatAnthropic(model="claude-instant-1", max_tokens=20_000)
|
91 |
+
}
|
92 |
+
|
93 |
+
# Add a Streamlit dropdown menu for model selection
|
94 |
+
selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
|
95 |
+
|
96 |
+
# Add a file uploader
|
97 |
+
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
|
98 |
+
|
99 |
+
# Check if a file is uploaded
|
100 |
+
if uploaded_file is not None:
|
101 |
+
# Add a button to trigger the conversion
|
102 |
+
if st.button("Convert PDF to Text"):
|
103 |
+
# Convert the uploaded file to a string
|
104 |
+
text = pdf_to_string(uploaded_file)
|
105 |
+
|
106 |
+
# Extract resume fields using the selected model
|
107 |
+
extracted_fields = extract_resume_fields(text, selected_model)
|
108 |
+
|
109 |
+
# Display the extracted fields on the Streamlit app
|
110 |
+
st.json(extracted_fields)
|