cvprofile / app.py
Nasma's picture
Create app.py
292ea38 verified
import pdfplumber
import docx
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# Function to extract text from PDF using pdfplumber
def extract_text_from_pdf(pdf_file):
with pdfplumber.open(pdf_file) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(docx_file):
doc = docx.Document(docx_file)
full_text = []
for paragraph in doc.paragraphs:
full_text.append(paragraph.text)
return '\n'.join(full_text)
# Function to generate roast based on resume text
def generate_roast(resume_text):
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
# Define the prompt
prompt_text = "Roast this resume:\n\n"
# Tokenize the prompt
prompt_tokenized = tokenizer(prompt_text, return_tensors="pt")
prompt_tokens = prompt_tokenized['input_ids'].shape[1]
# Calculate remaining tokens for resume text
max_resume_tokens = 2048 - prompt_tokens
# Tokenize and truncate resume text
resume_tokenized = tokenizer(resume_text, truncation=True, max_length=max_resume_tokens, return_tensors="pt")
# Decode the truncated resume text back into a string
truncated_resume_text = tokenizer.decode(resume_tokenized['input_ids'][0], skip_special_tokens=True)
# Combine prompt and truncated resume text
final_prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:"
# Generate roast
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
roast = generator(final_prompt, max_new_tokens=50, num_return_sequences=1)
return roast[0]['generated_text']
# Function to handle file uploads and extract text from resume files
def roast_resume(file=None, resume_text=None):
if file:
# Handle file uploads for PDF or DOCX
if file.name.endswith('.pdf'):
resume_text = extract_text_from_pdf(file)
elif file.name.endswith('.docx'):
resume_text = extract_text_from_docx(file)
else:
return "Unsupported file format. Please upload a PDF or DOCX file."
elif resume_text:
# Use pasted resume text
pass
else:
return "No resume provided."
# Generate the roast based on extracted or pasted resume text
roast = generate_roast(resume_text)
return roast
# Gradio interface with file upload or text input options
interface = gr.Interface(
fn=roast_resume,
inputs=[gr.File(label="Upload Resume (PDF/DOCX)"), gr.Textbox(label="Or Paste Your Resume")],
outputs="text",
title="Resume Roaster",
description="Upload your resume in PDF/DOCX format or paste your resume text, and let the AI roast it!"
)
# Launch Gradio app
interface.launch()