import streamlit as st import fitz # PyMuPDF from sentence_transformers import SentenceTransformer, util import faiss from transformers import pipeline import os from pathlib import Path st.title("Évaluation Stagiaire Data Scientist") uploaded_file = st.file_uploader("Choisissez un fichier PDF", type="pdf") def save_uploaded_file(uploaded_file, directory): directory = Path(directory) directory.mkdir(parents=True, exist_ok=True) file_path = directory / uploaded_file.name with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) return file_path def extract_text_from_pdf(pdf_path): text = "" pdf_document = fitz.open(pdf_path) for page_num in range(pdf_document.page_count): page = pdf_document.load_page(page_num) text += page.get_text() return text def index_document(text): model = SentenceTransformer('paraphrase-MiniLM-L6-v2') documents = [text] document_embeddings = model.encode(documents, convert_to_tensor=True) index = faiss.IndexFlatL2(document_embeddings.shape[1]) index.add(document_embeddings.cpu().detach().numpy()) faiss.write_index(index, 'document_index.faiss') def get_answer_from_document(question, context): qa_pipeline = pipeline('question-answering', model='deepset/roberta-base-squad2') result = qa_pipeline(question=question, context=context) return result def generate_questions(text, num_questions=5, num_beams=5): question_generation_pipeline = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl") input_text = "generate questions: " + text questions = question_generation_pipeline(input_text, max_length=512, num_beams=num_beams, num_return_sequences=num_questions) return [q['generated_text'] for q in questions] def evaluate_responses(user_responses, correct_answers): model = SentenceTransformer('paraphrase-MiniLM-L6-v2') user_embeddings = model.encode(user_responses, convert_to_tensor=True) correct_embeddings = model.encode(correct_answers, convert_to_tensor=True) scores = [] for user_emb, correct_emb in zip(user_embeddings, correct_embeddings): score = util.pytorch_cos_sim(user_emb, correct_emb) scores.append(score.item()) return scores def generate_training_plan(scores, threshold=0.7): plan = [] for idx, score in enumerate(scores): if score < threshold: plan.append(f"Revoir la section correspondant à la question {idx+1}") else: plan.append(f"Passer à l'étape suivante après la question {idx+1}") return plan if uploaded_file is not None: file_path = save_uploaded_file(uploaded_file, "uploaded_documents") st.write(f"Fichier téléchargé et sauvegardé sous : {file_path}") document_text = extract_text_from_pdf(file_path) st.write("Texte extrait du document PDF:") st.write(document_text[:1000]) # Affiche les 1000 premiers caractères du texte extrait index_document(document_text) st.subheader("Questions générées") questions = generate_questions(document_text, num_questions=5) for idx, question in enumerate(questions): st.write(f"Question {idx+1}: {question}") st.subheader("Évaluer les réponses de l'utilisateur") user_responses = [st.text_input(f"Réponse de l'utilisateur {idx+1}") for idx in range(5)] if st.button("Évaluer"): correct_answers = ["La réponse correcte 1", "La réponse correcte 2", "La réponse correcte 3", "La réponse correcte 4", "La réponse correcte 5"] scores = evaluate_responses(user_responses, correct_answers) for idx, score in enumerate(scores): st.write(f"Question {idx+1}: Score {score:.2f}") st.subheader("Plan de formation personnalisé") training_plan = generate_training_plan(scores) for step in training_plan: st.write(step)