Update ppt_chunker.py
Browse files- ppt_chunker.py +1 -0
ppt_chunker.py
CHANGED
@@ -8,6 +8,7 @@ from unstructured.cleaners.core import clean_ordered_bullets, clean_bullets, cle
|
|
8 |
from ordered_multimap import OrderedMultiIndexMapWeakRef
|
9 |
|
10 |
WRONG_NOUNS = ["BY", "IN", "ON", "INTO", "A", "AT", "WITH", "FROM", "TO", "AND", "OR", "BUT", "OWN", 'BEHIND', 'THROUGH', 'FIERCE']
|
|
|
11 |
|
12 |
def process_chunk(chunk, nlp):
|
13 |
marked = []
|
|
|
8 |
from ordered_multimap import OrderedMultiIndexMapWeakRef
|
9 |
|
10 |
WRONG_NOUNS = ["BY", "IN", "ON", "INTO", "A", "AT", "WITH", "FROM", "TO", "AND", "OR", "BUT", "OWN", 'BEHIND', 'THROUGH', 'FIERCE']
|
11 |
+
NON_ENDING_PUNCT = [',', ':', ';', "'", '/', '-']
|
12 |
|
13 |
def process_chunk(chunk, nlp):
|
14 |
marked = []
|