import pdfplumber import docx from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import gradio as gr # Function to extract text from PDF using pdfplumber def extract_text_from_pdf(pdf_file): with pdfplumber.open(pdf_file) as pdf: text = '' for page in pdf.pages: text += page.extract_text() return text # Function to extract text from DOCX def extract_text_from_docx(docx_file): doc = docx.Document(docx_file) full_text = [] for paragraph in doc.paragraphs: full_text.append(paragraph.text) return '\n'.join(full_text) # Function to generate roast based on resume text def generate_roast(resume_text): tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") # Define the prompt prompt_text = "Roast this resume:\n\n" # Tokenize the prompt prompt_tokenized = tokenizer(prompt_text, return_tensors="pt") prompt_tokens = prompt_tokenized['input_ids'].shape[1] # Calculate remaining tokens for resume text max_resume_tokens = 2048 - prompt_tokens # Tokenize and truncate resume text resume_tokenized = tokenizer(resume_text, truncation=True, max_length=max_resume_tokens, return_tensors="pt") # Decode the truncated resume text back into a string truncated_resume_text = tokenizer.decode(resume_tokenized['input_ids'][0], skip_special_tokens=True) # Combine prompt and truncated resume text final_prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:" # Generate roast generator = pipeline('text-generation', model=model, tokenizer=tokenizer) roast = generator(final_prompt, max_new_tokens=50, num_return_sequences=1) return roast[0]['generated_text'] # Function to handle file uploads and extract text from resume files def roast_resume(file=None, resume_text=None): if file: # Handle file uploads for PDF or DOCX if file.name.endswith('.pdf'): resume_text = extract_text_from_pdf(file) elif file.name.endswith('.docx'): resume_text = extract_text_from_docx(file) else: return "Unsupported file format. Please upload a PDF or DOCX file." elif resume_text: # Use pasted resume text pass else: return "No resume provided." # Generate the roast based on extracted or pasted resume text roast = generate_roast(resume_text) return roast # Gradio interface with file upload or text input options interface = gr.Interface( fn=roast_resume, inputs=[gr.File(label="Upload Resume (PDF/DOCX)"), gr.Textbox(label="Or Paste Your Resume")], outputs="text", title="Resume Roaster", description="Upload your resume in PDF/DOCX format or paste your resume text, and let the AI roast it!" ) # Launch Gradio app interface.launch()