import streamlit as st import fitz # PyMuPDF from sentence_transformers import SentenceTransformer, util import faiss from transformers import pipeline st.title("Évaluation Stagiaire Data Scientist") uploaded_file = st.file_uploader("Choisissez un fichier PDF", type="pdf") def extract_text_from_pdf(pdf_path): text = "" pdf_document = fitz.open(pdf_path) for page_num in range(pdf_document.page_count): page = pdf_document.load_page(page_num) text += page.get_text() return text def index_document(text): model = SentenceTransformer('paraphrase-MiniLM-L6-v2') documents = [text] document_embeddings = model.encode(documents, convert_to_tensor=True) index = faiss.IndexFlatL2(document_embeddings.shape[1]) index.add(document_embeddings.cpu().detach().numpy()) faiss.write_index(index, 'document_index.faiss') def get_answer_from_document(question, context): qa_pipeline = pipeline('question-answering', model='deepset/roberta-base-squad2') result = qa_pipeline(question=question, context=context) return result def generate_questions(text, num_questions=5): question_generation_pipeline = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl") input_text = "generate questions: " + text questions = question_generation_pipeline(input_text, max_length=512, num_return_sequences=num_questions) return [q['generated_text'] for q in questions] def evaluate_responses(user_responses, correct_answers): model = SentenceTransformer('paraphrase-MiniLM-L6-v2') user_embeddings = model.encode(user_responses, convert_to_tensor=True) correct_embeddings = model.encode(correct_answers, convert_to_tensor=True) scores = [] for user_emb, correct_emb in zip(user_embeddings, correct_embeddings): score = util.pytorch_cos_sim(user_emb, correct_emb) scores.append(score.item()) return scores def generate_training_plan(scores, threshold=0.7): plan = [] for idx, score in enumerate(scores): if score < threshold: plan.append(f"Revoir la section correspondant à la question {idx+1}") else: plan.append(f"Passer à l'étape suivante après la question {idx+1}") return plan if uploaded_file is not None: with open("uploaded_document.pdf", "wb") as f: f.write(uploaded_file.getbuffer()) document_text = extract_text_from_pdf("uploaded_document.pdf") st.write("Texte extrait du document PDF:") st.write(document_text[:1000]) # Affiche les 1000 premiers caractères du texte extrait index_document(document_text) st.subheader("Questions générées") questions = generate_questions(document_text, num_questions=5) for idx, question in enumerate(questions): st.write(f"Question {idx+1}: {question}") st.subheader("Évaluer les réponses de l'utilisateur") user_responses = [st.text_input(f"Réponse de l'utilisateur {idx+1}") for idx in range(5)] if st.button("Évaluer"): correct_answers = ["La réponse correcte 1", "La réponse correcte 2", "La réponse correcte 3", "La réponse correcte 4", "La réponse correcte 5"] scores = evaluate_responses(user_responses, correct_answers) for idx, score in enumerate(scores): st.write(f"Question {idx+1}: Score {score:.2f}") st.subheader("Plan de formation personnalisé") training_plan = generate_training_plan(scores) for step in training_plan: st.write(step)