LeoWalker commited on
Commit
828df56
1 Parent(s): 024bd40

updated streamlit app and Resume object with newer libraries, updated requirements.txt accordingly. works with openai and anthropic

Browse files
Files changed (3) hide show
  1. app.py +55 -45
  2. requirements.txt +82 -10
  3. resume_template.py +3 -3
app.py CHANGED
@@ -2,15 +2,22 @@ from dotenv import load_dotenv
2
  import io
3
  import streamlit as st
4
  from langchain.prompts import PromptTemplate
5
- from langchain.output_parsers import PydanticOutputParser
6
- from langchain_community.chat_models import ChatAnthropic
7
  from langchain_openai import ChatOpenAI
8
  from pydantic import ValidationError
 
9
  from resume_template import Resume
10
  from json import JSONDecodeError
11
  import PyPDF2
12
  import json
13
  import time
 
 
 
 
 
 
14
 
15
  load_dotenv()
16
 
@@ -33,14 +40,13 @@ def pdf_to_string(file):
33
  file.close()
34
  return text
35
 
 
36
  def extract_resume_fields(full_text, model):
37
  """
38
  Analyze a resume text and extract structured information using a specified language model.
39
-
40
  Parameters:
41
  full_text (str): The text content of the resume.
42
  model (str): The language model object to use for processing the text.
43
-
44
  Returns:
45
  dict: A dictionary containing structured information extracted from the resume.
46
  """
@@ -57,71 +63,75 @@ def extract_resume_fields(full_text, model):
57
  partial_variables={"response_template": parser.get_format_instructions()},
58
  )
59
  # Invoke the language model and process the resume
60
- formatted_input = prompt_template.format_prompt(resume=full_text)
61
  llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
62
  # print("llm", llm)
63
- output = llm.invoke(formatted_input.to_string())
64
-
 
65
  # print(output) # Print the output object for debugging
 
 
 
 
 
 
 
66
 
67
- try:
68
- parsed_output = parser.parse(output.content)
69
- json_output = parsed_output.json()
70
- print(json_output)
71
- return json_output
72
-
73
- except ValidationError as e:
74
- print(f"Validation error: {e}")
75
- print(output)
76
- return output.content
77
 
78
- except JSONDecodeError as e:
79
- print(f"JSONDecodeError error: {e}")
80
- print(output)
81
- return output.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  st.title("Resume Parser")
84
 
85
- # Set up the LLM dictionary
86
  llm_dict = {
87
- # "gpt-4-1106-preview": ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
88
- # "gpt-4": ChatOpenAI(temperature=0, model="gpt-4"),
89
- "gpt-3.5-turbo-1106": ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106"),
90
- # "claude-2": ChatAnthropic(model="claude-2", max_tokens=20_000),
91
- "claude-instant-1": ChatAnthropic(model="claude-instant-1", max_tokens=20_000)
92
  }
93
 
94
- # Add a Streamlit dropdown menu for model selection
95
  selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
96
 
97
- # Add a file uploader
98
  uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
99
 
100
- # Check if a file is uploaded
101
  if uploaded_file is not None:
102
- # Add a button to trigger the conversion
103
  if st.button("Convert PDF to Text"):
104
- start_time = time.time() # Start the timer
105
 
106
- # Convert the uploaded file to a string
107
  text = pdf_to_string(uploaded_file)
108
 
109
- # Extract resume fields using the selected model
110
  extracted_fields = extract_resume_fields(text, selected_model)
111
 
112
- end_time = time.time() # Stop the timer
113
- elapsed_time = end_time - start_time # Calculate the elapsed time
114
 
115
- # Display the elapsed time
116
  st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
117
-
118
- # # Display the extracted fields on the Streamlit app
119
- # st.json(extracted_fields)
120
 
121
- # If extracted_fields is a JSON string, convert it to a dictionary
122
- if isinstance(extracted_fields, str):
123
- extracted_fields = json.loads(extracted_fields)
124
 
125
- for key, value in extracted_fields.items():
126
- st.write(f"{key}: {value}")
127
 
 
2
  import io
3
  import streamlit as st
4
  from langchain.prompts import PromptTemplate
5
+ from langchain_core.output_parsers import PydanticOutputParser
6
+ from langchain_anthropic import ChatAnthropic
7
  from langchain_openai import ChatOpenAI
8
  from pydantic import ValidationError
9
+ from langchain_core.pydantic_v1 import BaseModel, Field
10
  from resume_template import Resume
11
  from json import JSONDecodeError
12
  import PyPDF2
13
  import json
14
  import time
15
+ import os
16
+ # Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
17
+ os.environ['LANGCHAIN_TRACING_V2'] = 'true'
18
+
19
+ # Set the LANGCHAIN_PROJECT environment variable to the desired project name
20
+ os.environ['LANGCHAIN_PROJECT'] = 'Resume_Project'
21
 
22
  load_dotenv()
23
 
 
40
  file.close()
41
  return text
42
 
43
+
44
  def extract_resume_fields(full_text, model):
45
  """
46
  Analyze a resume text and extract structured information using a specified language model.
 
47
  Parameters:
48
  full_text (str): The text content of the resume.
49
  model (str): The language model object to use for processing the text.
 
50
  Returns:
51
  dict: A dictionary containing structured information extracted from the resume.
52
  """
 
63
  partial_variables={"response_template": parser.get_format_instructions()},
64
  )
65
  # Invoke the language model and process the resume
66
+ # formatted_input = prompt_template.format_prompt(resume=full_text)
67
  llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
68
  # print("llm", llm)
69
+ # output = llm.invoke(formatted_input.to_string())
70
+ chain = prompt_template | llm | parser
71
+ output = chain.invoke(full_text)
72
  # print(output) # Print the output object for debugging
73
+ print(output)
74
+ return output
75
+ # try:
76
+ # parsed_output = parser.parse(output.content)
77
+ # json_output = parsed_output.json()
78
+ # print(json_output)
79
+ # return json_output
80
 
81
+ # except ValidationError as e:
82
+ # print(f"Validation error: {e}")
83
+ # print(output)
84
+ # return output.content
 
 
 
 
 
 
85
 
86
+ # except JSONDecodeError as e:
87
+ # print(f"JSONDecodeError error: {e}")
88
+ # print(output)
89
+ # return output.content
90
+
91
+ def display_extracted_fields(obj, section_title=None, indent=0):
92
+ if section_title:
93
+ st.subheader(section_title)
94
+
95
+ for field_name, field_value in obj:
96
+ if isinstance(field_value, BaseModel):
97
+ display_extracted_fields(field_value, field_name, indent + 1)
98
+ elif isinstance(field_value, list):
99
+ st.write(" " * indent + field_name + ":")
100
+ for item in field_value:
101
+ if isinstance(item, BaseModel):
102
+ display_extracted_fields(item, None, indent + 1)
103
+ else:
104
+ st.write(" " * (indent + 1) + "- " + str(item))
105
+ else:
106
+ st.write(" " * indent + field_name + ": " + str(field_value))
107
+
108
 
109
  st.title("Resume Parser")
110
 
 
111
  llm_dict = {
112
+ "gpt-3.5-turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
113
+ "sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
 
 
 
114
  }
115
 
 
116
  selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
117
 
 
118
  uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
119
 
 
120
  if uploaded_file is not None:
 
121
  if st.button("Convert PDF to Text"):
122
+ start_time = time.time()
123
 
 
124
  text = pdf_to_string(uploaded_file)
125
 
 
126
  extracted_fields = extract_resume_fields(text, selected_model)
127
 
128
+ end_time = time.time()
129
+ elapsed_time = end_time - start_time
130
 
 
131
  st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
 
 
 
132
 
133
+ display_extracted_fields(extracted_fields, "Extracted Resume Fields")
 
 
134
 
135
+ # for key, value in extracted_fields.items():
136
+ # st.write(f"{key}: {value}")
137
 
requirements.txt CHANGED
@@ -1,10 +1,82 @@
1
- streamlit
2
- python-dotenv
3
- pydantic
4
- PyPDF2
5
-
6
- openai
7
- anthropic
8
- langchain
9
- langchain-community
10
- langchain_openai
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ aiosignal==1.3.1
3
+ altair==5.3.0
4
+ annotated-types==0.6.0
5
+ anthropic==0.25.7
6
+ anyio==4.3.0
7
+ attrs==23.2.0
8
+ blinker==1.8.1
9
+ cachetools==5.3.3
10
+ certifi==2024.2.2
11
+ charset-normalizer==3.3.2
12
+ click==8.1.7
13
+ dataclasses-json==0.6.5
14
+ defusedxml==0.7.1
15
+ distro==1.9.0
16
+ filelock==3.14.0
17
+ frozenlist==1.4.1
18
+ fsspec==2024.3.1
19
+ gitdb==4.0.11
20
+ GitPython==3.1.43
21
+ h11==0.14.0
22
+ httpcore==1.0.5
23
+ httpx==0.27.0
24
+ huggingface-hub==0.22.2
25
+ idna==3.7
26
+ Jinja2==3.1.3
27
+ jsonpatch==1.33
28
+ jsonpointer==2.4
29
+ jsonschema==4.21.1
30
+ jsonschema-specifications==2023.12.1
31
+ langchain==0.1.16
32
+ langchain-anthropic==0.1.11
33
+ langchain-community==0.0.34
34
+ langchain-core==0.1.46
35
+ langchain-openai==0.1.4
36
+ langchain-text-splitters==0.0.1
37
+ langsmith==0.1.52
38
+ markdown-it-py==3.0.0
39
+ MarkupSafe==2.1.5
40
+ marshmallow==3.21.1
41
+ mdurl==0.1.2
42
+ multidict==6.0.5
43
+ mypy-extensions==1.0.0
44
+ numpy==1.26.4
45
+ openai==1.24.0
46
+ orjson==3.10.1
47
+ packaging==23.2
48
+ pandas==2.2.2
49
+ pillow==10.3.0
50
+ protobuf==4.25.3
51
+ pyarrow==16.0.0
52
+ pydantic==2.7.1
53
+ pydantic_core==2.18.2
54
+ pydeck==0.9.0
55
+ Pygments==2.17.2
56
+ PyPDF2==3.0.1
57
+ python-dateutil==2.9.0.post0
58
+ python-dotenv==1.0.1
59
+ pytz==2024.1
60
+ PyYAML==6.0.1
61
+ referencing==0.35.0
62
+ regex==2024.4.28
63
+ requests==2.31.0
64
+ rich==13.7.1
65
+ rpds-py==0.18.0
66
+ six==1.16.0
67
+ smmap==5.0.1
68
+ sniffio==1.3.1
69
+ SQLAlchemy==2.0.29
70
+ streamlit==1.33.0
71
+ tenacity==8.2.3
72
+ tiktoken==0.6.0
73
+ tokenizers==0.19.1
74
+ toml==0.10.2
75
+ toolz==0.12.1
76
+ tornado==6.4
77
+ tqdm==4.66.2
78
+ typing-inspect==0.9.0
79
+ typing_extensions==4.11.0
80
+ tzdata==2024.1
81
+ urllib3==2.2.1
82
+ yarl==1.9.4
resume_template.py CHANGED
@@ -1,5 +1,5 @@
1
- from pydantic import BaseModel, Field, ValidationError
2
  from typing import List, Optional, Dict
 
3
 
4
  # The following classes are for the resume template
5
 
@@ -9,8 +9,8 @@ class ContactInfo(BaseModel):
9
  linkedin: Optional[str] = None
10
 
11
  class PersonalDetails(BaseModel):
12
- full_name: str
13
- contact_info: ContactInfo
14
  professional_summary: Optional[str] = None
15
 
16
  class Education(BaseModel):
 
 
1
  from typing import List, Optional, Dict
2
+ from langchain_core.pydantic_v1 import BaseModel, Field
3
 
4
  # The following classes are for the resume template
5
 
 
9
  linkedin: Optional[str] = None
10
 
11
  class PersonalDetails(BaseModel):
12
+ full_name: str = None
13
+ contact_info: ContactInfo
14
  professional_summary: Optional[str] = None
15
 
16
  class Education(BaseModel):