Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Running on T4

devve1 commited on Aug 3

Commit

952b8a3

•

1 Parent(s): 3b7e1ca

Update ppt_chunker.py

Files changed (1) hide show

ppt_chunker.py CHANGED Viewed

@@ -21,15 +21,15 @@ def process_chunk(chunk, nlp):
             tokens = nlp(current_text)
             try:
-                next = chunk[1][i+1]
             except IndexError:
                 continue
-            if (type(next) is list) and next[1].isupper() and (next[0] == ('Title' or 'NarrativeText' or 'UncategorizedText')):
                 print(f'TOKEN: {current_text}, {tokens[-1]}, {tokens[-1].pos_}')
                 print(f'{str(tokens[-1])}')
                 if (tokens[-1].pos_ in {'SYM', "ADP", 'ADV', 'PART', 'PRON', 'DET', "AUX", 'SCONJ', 'CONJ', "CCONJ"}) or ((tokens[-1].pos_ in {'PROPN', 'NOUN', 'VERB'}) and (str(tokens[-1]) in WRONG_NOUNS)):
-                    chunk[1][i+1][1] = current_text + ' ' + next[1]
                     marked.append(i)
     for i in marked:
@@ -73,10 +73,10 @@ def ppt_chunk(file_like, model):
         else:
             if elem.text[-1] in NON_ENDING_PUNCT:
                 try:
-                    next = elements[i+1]
                 except:
                     pass
-                elements[i+1].text = elem.text + ' ' + next.text
                 marked.add(elem.text)
             if elem.category == "ListItem":

             tokens = nlp(current_text)
             try:
+                next_ = chunk[1][i+1]
             except IndexError:
                 continue
+            if (type(next_) is list) and next_[1].isupper() and (next_[0] == ('Title' or 'NarrativeText' or 'UncategorizedText')):
                 print(f'TOKEN: {current_text}, {tokens[-1]}, {tokens[-1].pos_}')
                 print(f'{str(tokens[-1])}')
                 if (tokens[-1].pos_ in {'SYM', "ADP", 'ADV', 'PART', 'PRON', 'DET', "AUX", 'SCONJ', 'CONJ', "CCONJ"}) or ((tokens[-1].pos_ in {'PROPN', 'NOUN', 'VERB'}) and (str(tokens[-1]) in WRONG_NOUNS)):
+                    chunk[1][i+1][1] = current_text + ' ' + next_[1]
                     marked.append(i)
     for i in marked:
         else:
             if elem.text[-1] in NON_ENDING_PUNCT:
                 try:
+                    next_ = elements[i+1]
                 except:
                     pass
+                elements[i+1].text = elem.text + ' ' + next_.text
                 marked.add(elem.text)
             if elem.category == "ListItem":