LeoWalker commited on
Commit
5ee2f5f
1 Parent(s): 8065b35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -1
app.py CHANGED
@@ -1,7 +1,110 @@
1
  from dotenv import load_dotenv
 
2
  import streamlit as st
 
 
 
 
 
 
 
 
 
3
 
4
  load_dotenv()
5
 
6
- st.title("Resume Parser via LLM")
 
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from dotenv import load_dotenv
2
+ import io
3
  import streamlit as st
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.output_parsers import PydanticOutputParser
6
+ from langchain_community.chat_models import ChatAnthropic
7
+ from langchain_openai import ChatOpenAI
8
+ from pydantic import ValidationError
9
+ from resume_template import Resume
10
+ from json import JSONDecodeError
11
+ import PyPDF2
12
+ import json
13
 
14
  load_dotenv()
15
 
16
+ def pdf_to_string(file):
17
+ """
18
+ Convert a PDF file to a string.
19
 
20
+ Parameters:
21
+ file (io.BytesIO): A file-like object representing the PDF file.
22
+
23
+ Returns:
24
+ str: The extracted text from the PDF.
25
+ """
26
+ pdf_reader = PyPDF2.PdfReader(file)
27
+ num_pages = len(pdf_reader.pages)
28
+ text = ''
29
+ for i in range(num_pages):
30
+ page = pdf_reader.pages[i]
31
+ text += page.extract_text()
32
+ file.close()
33
+ return text
34
+
35
+ def extract_resume_fields(full_text, model):
36
+ """
37
+ Analyze a resume text and extract structured information using a specified language model.
38
+
39
+ Parameters:
40
+ full_text (str): The text content of the resume.
41
+ model (str): The language model object to use for processing the text.
42
+
43
+ Returns:
44
+ dict: A dictionary containing structured information extracted from the resume.
45
+ """
46
+ # The Resume object is imported from the local resume_template file
47
+
48
+ with open("prompts/resume_extraction.prompt", "r") as f:
49
+ template = f.read()
50
+
51
+ parser = PydanticOutputParser(pydantic_object=Resume)
52
+
53
+ prompt_template = PromptTemplate(
54
+ template=template,
55
+ input_variables=["resume"],
56
+ partial_variables={"response_template": parser.get_format_instructions()},
57
+ )
58
+ # Invoke the language model and process the resume
59
+ formatted_input = prompt_template.format_prompt(resume=full_text)
60
+ llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
61
+ # print("llm", llm)
62
+ output = llm.invoke(formatted_input.to_string())
63
+
64
+ # print(output) # Print the output object for debugging
65
+
66
+ try:
67
+ parsed_output = parser.parse(output.content)
68
+ json_output = parsed_output.json()
69
+ print(json_output)
70
+ return json_output
71
+
72
+ except ValidationError as e:
73
+ print(f"Validation error: {e}")
74
+ print(output)
75
+ return output.content
76
+
77
+ except JSONDecodeError as e:
78
+ print(f"JSONDecodeError error: {e}")
79
+ print(output)
80
+ return output.content
81
+
82
+ st.title("Resume Parser")
83
+
84
+ # Set up the LLM dictionary
85
+ llm_dict = {
86
+ "gpt-4-1106-preview": ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
87
+ "gpt-4": ChatOpenAI(temperature=0, model="gpt-4"),
88
+ "gpt-3.5-turbo-1106": ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106"),
89
+ "claude-2": ChatAnthropic(model="claude-2", max_tokens=20_000),
90
+ "claude-instant-1": ChatAnthropic(model="claude-instant-1", max_tokens=20_000)
91
+ }
92
+
93
+ # Add a Streamlit dropdown menu for model selection
94
+ selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
95
+
96
+ # Add a file uploader
97
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
98
+
99
+ # Check if a file is uploaded
100
+ if uploaded_file is not None:
101
+ # Add a button to trigger the conversion
102
+ if st.button("Convert PDF to Text"):
103
+ # Convert the uploaded file to a string
104
+ text = pdf_to_string(uploaded_file)
105
+
106
+ # Extract resume fields using the selected model
107
+ extracted_fields = extract_resume_fields(text, selected_model)
108
+
109
+ # Display the extracted fields on the Streamlit app
110
+ st.json(extracted_fields)