File size: 1,228 Bytes
a7a38d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from spacy.tokens import Doc

class HealthseaPipe:

    # Get Clauses and their predictions
    def get_clauses(self, doc):
        clauses = []
        for clause in doc._.clauses:
            words = []
            spaces = []
            clause_slice = doc[clause["split_indices"][0] : clause["split_indices"][1]]

            if clause["has_ent"]:
                for token in clause_slice:
                    if token.i == clause["ent_indices"][0]:
                        words.append(
                            clause["blinder"].replace(">", "").replace("<", "")
                        )
                        spaces.append(True)
                    elif token.i not in range(
                        clause["ent_indices"][0], clause["ent_indices"][1]
                    ):
                        words.append(token.text)
                        spaces.append(token.whitespace_)
                clauses.append(Doc(doc.vocab, words=words, spaces=spaces))

            else:
                for token in clause_slice:
                    words.append(token.text)
                    spaces.append(token.whitespace_)
                clauses.append(Doc(doc.vocab, words=words, spaces=spaces))

        return clauses