|
import pdfplumber |
|
import docx |
|
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM |
|
import gradio as gr |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
with pdfplumber.open(pdf_file) as pdf: |
|
text = '' |
|
for page in pdf.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def extract_text_from_docx(docx_file): |
|
doc = docx.Document(docx_file) |
|
full_text = [] |
|
for paragraph in doc.paragraphs: |
|
full_text.append(paragraph.text) |
|
return '\n'.join(full_text) |
|
|
|
|
|
def generate_roast(resume_text): |
|
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") |
|
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") |
|
|
|
|
|
prompt_text = "Roast this resume:\n\n" |
|
|
|
|
|
prompt_tokenized = tokenizer(prompt_text, return_tensors="pt") |
|
prompt_tokens = prompt_tokenized['input_ids'].shape[1] |
|
|
|
|
|
max_resume_tokens = 2048 - prompt_tokens |
|
|
|
|
|
resume_tokenized = tokenizer(resume_text, truncation=True, max_length=max_resume_tokens, return_tensors="pt") |
|
|
|
|
|
truncated_resume_text = tokenizer.decode(resume_tokenized['input_ids'][0], skip_special_tokens=True) |
|
|
|
|
|
final_prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:" |
|
|
|
|
|
generator = pipeline('text-generation', model=model, tokenizer=tokenizer) |
|
roast = generator(final_prompt, max_new_tokens=50, num_return_sequences=1) |
|
|
|
return roast[0]['generated_text'] |
|
|
|
|
|
def roast_resume(file=None, resume_text=None): |
|
if file: |
|
|
|
if file.name.endswith('.pdf'): |
|
resume_text = extract_text_from_pdf(file) |
|
elif file.name.endswith('.docx'): |
|
resume_text = extract_text_from_docx(file) |
|
else: |
|
return "Unsupported file format. Please upload a PDF or DOCX file." |
|
elif resume_text: |
|
|
|
pass |
|
else: |
|
return "No resume provided." |
|
|
|
|
|
roast = generate_roast(resume_text) |
|
return roast |
|
|
|
|
|
interface = gr.Interface( |
|
fn=roast_resume, |
|
inputs=[gr.File(label="Upload Resume (PDF/DOCX)"), gr.Textbox(label="Or Paste Your Resume")], |
|
outputs="text", |
|
title="Resume Roaster", |
|
description="Upload your resume in PDF/DOCX format or paste your resume text, and let the AI roast it!" |
|
) |
|
|
|
|
|
interface.launch() |