kawadou commited on
Commit
79a7b9c
1 Parent(s): b74ef0e

Add application file

Browse files
Files changed (2) hide show
  1. app.py +14 -4
  2. requirements.txt +1 -0
app.py CHANGED
@@ -3,11 +3,21 @@ import fitz # PyMuPDF
3
  from sentence_transformers import SentenceTransformer, util
4
  import faiss
5
  from transformers import pipeline
 
 
6
 
7
  st.title("Évaluation Stagiaire Data Scientist")
8
 
9
  uploaded_file = st.file_uploader("Choisissez un fichier PDF", type="pdf")
10
 
 
 
 
 
 
 
 
 
11
  def extract_text_from_pdf(pdf_path):
12
  text = ""
13
  pdf_document = fitz.open(pdf_path)
@@ -55,10 +65,10 @@ def generate_training_plan(scores, threshold=0.7):
55
  return plan
56
 
57
  if uploaded_file is not None:
58
- with open("uploaded_document.pdf", "wb") as f:
59
- f.write(uploaded_file.getbuffer())
60
- document_text = extract_text_from_pdf("uploaded_document.pdf")
61
-
62
  st.write("Texte extrait du document PDF:")
63
  st.write(document_text[:1000]) # Affiche les 1000 premiers caractères du texte extrait
64
 
 
3
  from sentence_transformers import SentenceTransformer, util
4
  import faiss
5
  from transformers import pipeline
6
+ import os
7
+ from pathlib import Path
8
 
9
  st.title("Évaluation Stagiaire Data Scientist")
10
 
11
  uploaded_file = st.file_uploader("Choisissez un fichier PDF", type="pdf")
12
 
13
+ def save_uploaded_file(uploaded_file, directory):
14
+ directory = Path(directory)
15
+ directory.mkdir(parents=True, exist_ok=True)
16
+ file_path = directory / uploaded_file.name
17
+ with open(file_path, "wb") as f:
18
+ f.write(uploaded_file.getbuffer())
19
+ return file_path
20
+
21
  def extract_text_from_pdf(pdf_path):
22
  text = ""
23
  pdf_document = fitz.open(pdf_path)
 
65
  return plan
66
 
67
  if uploaded_file is not None:
68
+ file_path = save_uploaded_file(uploaded_file, "uploaded_documents")
69
+ st.write(f"Fichier téléchargé et sauvegardé sous : {file_path}")
70
+
71
+ document_text = extract_text_from_pdf(file_path)
72
  st.write("Texte extrait du document PDF:")
73
  st.write(document_text[:1000]) # Affiche les 1000 premiers caractères du texte extrait
74
 
requirements.txt CHANGED
@@ -4,3 +4,4 @@ sentence-transformers
4
  faiss-cpu
5
  transformers
6
  torch
 
 
4
  faiss-cpu
5
  transformers
6
  torch
7
+ sentencepiece