Spaces:

LeoWalker
/

ResumeParser

Running

App Files Files Community

LeoWalker commited on Jan 11, 2024

Commit

5ee2f5f

verified ·

1 Parent(s): 8065b35

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -1

app.py CHANGED Viewed

@@ -1,7 +1,110 @@
 from dotenv import load_dotenv
 import streamlit as st
 load_dotenv()
-st.title("Resume Parser via LLM")

 from dotenv import load_dotenv
+import io
 import streamlit as st
+from langchain.prompts import PromptTemplate
+from langchain.output_parsers import PydanticOutputParser
+from langchain_community.chat_models import ChatAnthropic
+from langchain_openai import ChatOpenAI
+from pydantic import ValidationError
+from resume_template import Resume
+from json import JSONDecodeError
+import PyPDF2
+import json
 load_dotenv()
+def pdf_to_string(file):
+    """
+    Convert a PDF file to a string.
+    Parameters:
+    file (io.BytesIO): A file-like object representing the PDF file.
+    Returns:
+    str: The extracted text from the PDF.
+    """
+    pdf_reader = PyPDF2.PdfReader(file)
+    num_pages = len(pdf_reader.pages)
+    text = ''
+    for i in range(num_pages):
+        page = pdf_reader.pages[i]
+        text += page.extract_text()
+    file.close()
+    return text
+def extract_resume_fields(full_text, model):
+    """
+    Analyze a resume text and extract structured information using a specified language model.
+    Parameters:
+    full_text (str): The text content of the resume.
+    model (str): The language model object to use for processing the text.
+    Returns:
+    dict: A dictionary containing structured information extracted from the resume.
+    """
+    # The Resume object is imported from the local resume_template file
+    with open("prompts/resume_extraction.prompt", "r") as f:
+        template = f.read()
+    parser = PydanticOutputParser(pydantic_object=Resume)
+    prompt_template = PromptTemplate(
+        template=template,
+        input_variables=["resume"],
+        partial_variables={"response_template": parser.get_format_instructions()},
+    )
+    # Invoke the language model and process the resume
+    formatted_input = prompt_template.format_prompt(resume=full_text)
+    llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
+    # print("llm", llm)
+    output = llm.invoke(formatted_input.to_string())
+    # print(output)  # Print the output object for debugging
+    try:
+        parsed_output = parser.parse(output.content)
+        json_output = parsed_output.json()
+        print(json_output)
+        return json_output
+    except ValidationError as e:
+        print(f"Validation error: {e}")
+        print(output)
+        return output.content
+    except JSONDecodeError as e:
+        print(f"JSONDecodeError error: {e}")
+        print(output)
+        return output.content
+st.title("Resume Parser")
+# Set up the LLM dictionary
+llm_dict = {
+    "gpt-4-1106-preview": ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
+    "gpt-4": ChatOpenAI(temperature=0, model="gpt-4"),
+    "gpt-3.5-turbo-1106": ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106"),
+    "claude-2": ChatAnthropic(model="claude-2", max_tokens=20_000),
+    "claude-instant-1": ChatAnthropic(model="claude-instant-1", max_tokens=20_000)
+}
+# Add a Streamlit dropdown menu for model selection
+selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
+# Add a file uploader
+uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
+# Check if a file is uploaded
+if uploaded_file is not None:
+    # Add a button to trigger the conversion
+    if st.button("Convert PDF to Text"):
+        # Convert the uploaded file to a string
+        text = pdf_to_string(uploaded_file)
+        # Extract resume fields using the selected model
+        extracted_fields = extract_resume_fields(text, selected_model)
+        # Display the extracted fields on the Streamlit app
+        st.json(extracted_fields)