devve1 commited on
Commit
424863e
1 Parent(s): f81ff19

Update ppt_chunker.py

Browse files
Files changed (1) hide show
  1. ppt_chunker.py +6 -4
ppt_chunker.py CHANGED
@@ -125,6 +125,7 @@ def ppt_chunk(file_like, nlp):
125
  nb_titles = 0
126
  nb_sub_titles = 0
127
  metadata_sub_title = ''
 
128
 
129
  for i, sub_chunk in enumerate(chunk[1]):
130
  if type(sub_chunk) is list:
@@ -144,12 +145,13 @@ def ppt_chunk(file_like, nlp):
144
  clean_trailing_punctuation(metadata_main_title),
145
  clean_trailing_punctuation(metadata_sub_title)
146
  )
 
147
  break
148
-
149
- #if i == len(chunk) - 1:
150
- # l = "\n".join([c[1].lower() for c in chunk[1] if type(c) is list])
151
- # weakDict.insert(chunk[0], l, metadata_main_title, metadata_sub_title)
152
 
 
 
 
 
153
  for w in weakDict.all_texts():
154
  print(f'AFTER : {type(w)}, {w}')
155
 
 
125
  nb_titles = 0
126
  nb_sub_titles = 0
127
  metadata_sub_title = ''
128
+ condition_met = False
129
 
130
  for i, sub_chunk in enumerate(chunk[1]):
131
  if type(sub_chunk) is list:
 
145
  clean_trailing_punctuation(metadata_main_title),
146
  clean_trailing_punctuation(metadata_sub_title)
147
  )
148
+ condition_met = True
149
  break
 
 
 
 
150
 
151
+ if not condition_met:
152
+ cleaned_titles_chunk = "\n".join([c[1].lower() for c in chunk[1] if type(c) is list])
153
+ weakDict.insert(chunk[0], cleaned_titles_chunk, metadata_main_title, metadata_sub_title)
154
+
155
  for w in weakDict.all_texts():
156
  print(f'AFTER : {type(w)}, {w}')
157