devve1 commited on
Commit
e1a3a4f
1 Parent(s): a705fd5

Update ppt_chunker.py

Browse files
Files changed (1) hide show
  1. ppt_chunker.py +1 -0
ppt_chunker.py CHANGED
@@ -8,6 +8,7 @@ from unstructured.cleaners.core import clean_ordered_bullets, clean_bullets, cle
8
  from ordered_multimap import OrderedMultiIndexMapWeakRef
9
 
10
  WRONG_NOUNS = ["BY", "IN", "ON", "INTO", "A", "AT", "WITH", "FROM", "TO", "AND", "OR", "BUT", "OWN", 'BEHIND', 'THROUGH', 'FIERCE']
 
11
 
12
  def process_chunk(chunk, nlp):
13
  marked = []
 
8
  from ordered_multimap import OrderedMultiIndexMapWeakRef
9
 
10
  WRONG_NOUNS = ["BY", "IN", "ON", "INTO", "A", "AT", "WITH", "FROM", "TO", "AND", "OR", "BUT", "OWN", 'BEHIND', 'THROUGH', 'FIERCE']
11
+ NON_ENDING_PUNCT = [',', ':', ';', "'", '/', '-']
12
 
13
  def process_chunk(chunk, nlp):
14
  marked = []