ariansyahdedy commited on
Commit
4c47563
·
1 Parent(s): 4e0e782

Add NLTK package

Browse files
Files changed (1) hide show
  1. app/search/bm25_search.py +8 -2
app/search/bm25_search.py CHANGED
@@ -6,16 +6,21 @@ import string
6
  from typing import List, Set, Optional
7
  from nltk.corpus import stopwords
8
  from nltk.stem import WordNetLemmatizer
9
-
10
 
11
  def download_nltk_resources():
12
  """
13
  Downloads required NLTK resources synchronously.
14
  """
15
  resources = ['punkt', 'stopwords', 'wordnet', 'omw-1.4']
 
 
 
 
 
16
  for resource in resources:
17
  try:
18
- nltk.download(resource, quiet=True)
19
  except Exception as e:
20
  print(f"Error downloading {resource}: {str(e)}")
21
 
@@ -33,6 +38,7 @@ class BM25_search:
33
  """
34
  # Ensure NLTK resources are downloaded only once
35
  if not BM25_search.nltk_resources_downloaded:
 
36
  download_nltk_resources()
37
  BM25_search.nltk_resources_downloaded = True # Mark as downloaded
38
 
 
6
  from typing import List, Set, Optional
7
  from nltk.corpus import stopwords
8
  from nltk.stem import WordNetLemmatizer
9
+ import os
10
 
11
  def download_nltk_resources():
12
  """
13
  Downloads required NLTK resources synchronously.
14
  """
15
  resources = ['punkt', 'stopwords', 'wordnet', 'omw-1.4']
16
+
17
+ nltk_data_path = "/tmp/nltk_data" # Temporary directory for Hugging Face Spaces
18
+ os.makedirs(nltk_data_path, exist_ok=True)
19
+ nltk.data.path.append(nltk_data_path)
20
+
21
  for resource in resources:
22
  try:
23
+ nltk.download(resource, download_dir=nltk_data_path, quiet=True)
24
  except Exception as e:
25
  print(f"Error downloading {resource}: {str(e)}")
26
 
 
38
  """
39
  # Ensure NLTK resources are downloaded only once
40
  if not BM25_search.nltk_resources_downloaded:
41
+
42
  download_nltk_resources()
43
  BM25_search.nltk_resources_downloaded = True # Mark as downloaded
44