|
import streamlit as st |
|
import fitz |
|
from sentence_transformers import SentenceTransformer, util |
|
import faiss |
|
from transformers import pipeline |
|
import os |
|
from pathlib import Path |
|
|
|
st.title("Évaluation Stagiaire Data Scientist") |
|
|
|
uploaded_file = st.file_uploader("Choisissez un fichier PDF", type="pdf") |
|
|
|
def save_uploaded_file(uploaded_file, directory): |
|
directory = Path(directory) |
|
directory.mkdir(parents=True, exist_ok=True) |
|
file_path = directory / uploaded_file.name |
|
with open(file_path, "wb") as f: |
|
f.write(uploaded_file.getbuffer()) |
|
return file_path |
|
|
|
def extract_text_from_pdf(pdf_path): |
|
text = "" |
|
pdf_document = fitz.open(pdf_path) |
|
for page_num in range(pdf_document.page_count): |
|
page = pdf_document.load_page(page_num) |
|
text += page.get_text() |
|
return text |
|
|
|
def index_document(text): |
|
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') |
|
documents = [text] |
|
document_embeddings = model.encode(documents, convert_to_tensor=True) |
|
index = faiss.IndexFlatL2(document_embeddings.shape[1]) |
|
index.add(document_embeddings.cpu().detach().numpy()) |
|
faiss.write_index(index, 'document_index.faiss') |
|
|
|
def get_answer_from_document(question, context): |
|
qa_pipeline = pipeline('question-answering', model='deepset/roberta-base-squad2') |
|
result = qa_pipeline(question=question, context=context) |
|
return result |
|
|
|
def generate_questions(text, num_questions=5, num_beams=5): |
|
question_generation_pipeline = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl") |
|
input_text = "generate questions: " + text |
|
questions = question_generation_pipeline(input_text, max_length=512, num_beams=num_beams, num_return_sequences=num_questions) |
|
return [q['generated_text'] for q in questions] |
|
|
|
def evaluate_responses(user_responses, correct_answers): |
|
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') |
|
user_embeddings = model.encode(user_responses, convert_to_tensor=True) |
|
correct_embeddings = model.encode(correct_answers, convert_to_tensor=True) |
|
scores = [] |
|
for user_emb, correct_emb in zip(user_embeddings, correct_embeddings): |
|
score = util.pytorch_cos_sim(user_emb, correct_emb) |
|
scores.append(score.item()) |
|
return scores |
|
|
|
def generate_training_plan(scores, threshold=0.7): |
|
plan = [] |
|
for idx, score in enumerate(scores): |
|
if score < threshold: |
|
plan.append(f"Revoir la section correspondant à la question {idx+1}") |
|
else: |
|
plan.append(f"Passer à l'étape suivante après la question {idx+1}") |
|
return plan |
|
|
|
if uploaded_file is not None: |
|
file_path = save_uploaded_file(uploaded_file, "uploaded_documents") |
|
st.write(f"Fichier téléchargé et sauvegardé sous : {file_path}") |
|
|
|
document_text = extract_text_from_pdf(file_path) |
|
st.write("Texte extrait du document PDF:") |
|
st.write(document_text[:1000]) |
|
|
|
index_document(document_text) |
|
|
|
st.subheader("Questions générées") |
|
questions = generate_questions(document_text, num_questions=5) |
|
for idx, question in enumerate(questions): |
|
st.write(f"Question {idx+1}: {question}") |
|
|
|
st.subheader("Évaluer les réponses de l'utilisateur") |
|
user_responses = [st.text_input(f"Réponse de l'utilisateur {idx+1}") for idx in range(5)] |
|
if st.button("Évaluer"): |
|
correct_answers = ["La réponse correcte 1", "La réponse correcte 2", "La réponse correcte 3", "La réponse correcte 4", "La réponse correcte 5"] |
|
scores = evaluate_responses(user_responses, correct_answers) |
|
for idx, score in enumerate(scores): |
|
st.write(f"Question {idx+1}: Score {score:.2f}") |
|
|
|
st.subheader("Plan de formation personnalisé") |
|
training_plan = generate_training_plan(scores) |
|
for step in training_plan: |
|
st.write(step) |
|
|
|
|