Spaces:

srijaydeshpande
/

CVCraft

Running on Zero

App Files Files Community

CVCraft / app.py

srijaydeshpande

Update app.py

5c7d67b verified 5 days ago

raw

history blame contribute delete

7.16 kB

	# from pdfminer.high_level import extract_pages
	# from pdfminer.layout import LTTextContainer
	from tqdm import tqdm
	import re
	import gradio as gr
	import os
	import accelerate
	import spaces
	import subprocess
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	from docling.document_converter import DocumentConverter

	from huggingface_hub import login

	login(token = os.getenv('HF_TOKEN'))



	repo_id = "srijaydeshpande/CVCRaft"
	model_id = "cvcraft2.gguf"


	hf_hub_download(
	repo_id=repo_id,
	filename=model_id,
	local_dir = "./models"
	)

	def process_document(pdf_path):
	extracted_pages = extract_pages(pdf_path)
	page2content = {}
	for extracted_page in tqdm(extracted_pages):
	page_id = extracted_page.pageid
	content = process_page(extracted_page)
	page2content[page_id] = content
	return page2content


	def process_page(extracted_page):
	content = []
	elements = [element for element in extracted_page._objs]
	elements.sort(key=lambda a: a.y1, reverse=True)
	for i, element in enumerate(elements):
	if isinstance(element, LTTextContainer):
	line_text = extract_text_and_normalize(element)
	content.append(line_text)
	content = re.sub('\n+', '\n', ''.join(content))
	return content


	def extract_text_and_normalize(element):
	# Extract text from line and split it with new lines
	line_texts = element.get_text().split('\n')
	norm_text = ''
	for line_text in line_texts:
	line_text = line_text.strip()
	if not line_text:
	line_text = '\n'
	else:
	line_text = re.sub('\s+', ' ', line_text)
	if not re.search('[\w\d\,\-]', line_text[-1]):
	line_text += '\n'
	else:
	line_text += ' '
	norm_text += line_text
	return norm_text


	def txt_to_html(text):
	html_content = "<html><body>"
	for line in text.split('\n'):
	html_content += "<p>{}</p>".format(line.strip())
	html_content += "</body></html>"
	return html_content

	# def craft_cv(llm, cv_text, job_description, maxtokens, temperature, top_probability):
	def craft_cv(llm, prompt, maxtokens, temperature, top_probability):
	instruction = "Given input CV and job description. Please revise the CV according to the given job description and output the revised CV."
	output = llm.create_chat_completion(
	messages=[
	# {"from": "user", "value": instruction + ' Input CV: ' + cv_text + ' , Job Description: ' + job_description},
	{"from": "user", "value": prompt},
	],
	max_tokens=maxtokens,
	temperature=temperature
	)
	output = output['choices'][0]['message']['content']
	cv_text=''
	return cv_text, output

	def convert_to_json(llm, cv_text, maxtokens, temperature, top_probability):
	json_format = """
	You are an expert at structuring resumes in JSON format. Given a modified resume text, extract the relevant details and convert them into the following structured JSON format:

	{
	"profileDetail": {
	"name": "[Candidate's Name]",
	"email": "[Candidate's Email]",
	"phone": "[Candidate's Phone]",
	"linkedin": "[Candidate's LinkedIn]",
	"languages": "Hindi,English",
	"interests": "Cricket",
	"location": "[Candidate's Location]",
	"role": "[Candidate's Role]"
	},
	"professionalSummary": "[Candidate's Professional Summary]",
	"skills": ["skill1", "skill2"],
	"workExperience": [
	{
	"title": "[Job Title]",
	"company": "[Company Name]",
	"location": "[Location]",
	"startDate": "[Start Date]",
	"endDate": "[End Date]",
	"responsibilities": ["[Responsibility 1]", "[Responsibility 2]"],
	"projects": [
	{
	"title": "[Project Title]",
	"description": "[Project Description]"
	}
	]
	}
	],
	"education": [
	{
	"degree": "[Degree]",
	"institution": "[Institution]",
	"location": "[Location]",
	"graduationDate": "[Graduation Date]"
	}
	],
	"certifications": ["[Certification 1]", "[Certification 2]"] ,
	"extraCurricular": "[Extra Curricular Activities]",
	"achievment": "[Achievements]"
	}

	Instructions:
	- Extract details accurately from the given resume.
	- Ensure proper structuring of dates, responsibilities, and projects.
	- If a field is missing in the input, leave it as an empty string or an empty list where applicable.
	- Maintain proper formatting and avoid unnecessary additions.

	Provide the response in a valid JSON format with no additional explanations.
	"""
	output = llm.create_chat_completion(
	messages=[
	{"from": "user", "value": json_format + ' CV text: ' + cv_text},
	# {"from": "user", "value": prompt + ' CV text: ' + cv_text},
	],
	max_tokens=maxtokens,
	temperature=temperature
	)
	output = output['choices'][0]['message']['content']
	return output

	@spaces.GPU(duration=40)
	def pdf_to_text(prompt, maxtokens=2048, temperature=0, top_probability=0.95):
	# def pdf_to_text(cv_file, job_description, maxtokens=2048, temperature=0, top_probability=0.95):

	# page2content = process_document(cv_file)
	# cv_text = ""
	# for page_id in page2content:
	# cv_text += page2content[page_id] + ' '
	# converter = DocumentConverter()
	# result = converter.convert(cv_file)
	# cv_text = result.document.export_to_markdown()

	llm = Llama(
	model_path="models/" + model_id,
	flash_attn=True,
	n_gpu_layers=81,
	n_batch=1024,
	n_ctx=8192,
	)
	# cv_text, crafted_cv = craft_cv(llm, cv_text, job_description, maxtokens, temperature, top_probability)
	cv_text, crafted_cv = craft_cv(llm, prompt, maxtokens, temperature, top_probability)
	crafted_cv = convert_to_json(llm, crafted_cv, maxtokens, temperature, top_probability)
	return crafted_cv

	temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value")
	prob_slider = gr.Slider(minimum=0, maximum=1, value=0.95, label="Max Probability Value")
	max_tokens = gr.Number(value=600, label="Max Tokens")
	cv_file = gr.File(label='Upload the CV')
	prompt_text = gr.Textbox(label='Enter the job description')
	output_text = gr.Textbox()
	iface = gr.Interface(
	fn=pdf_to_text,
	# inputs=[cv_file, prompt_text],
	inputs=['text'],
	outputs=['text'],
	title='Craft CV',
	description="This application assists to customize CV based on input job description",
	theme=gr.themes.Soft(),
	)
	iface.launch()