File size: 1,080 Bytes
8b513d0 d6504ae 8b513d0 d6504ae 8b513d0 d6504ae 8b513d0 d6504ae 8b513d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from spacy.tokens import Doc, Span
from spacy.language import Language
from .EntityClassifier import EntityClassifier
from .EntityCollection import EntityCollection
from .TermCandidateExtractor import TermCandidateExtractor
@Language.factory('entityLinker')
class EntityLinker:
def __init__(self, nlp, name):
Doc.set_extension("linkedEntities", default=EntityCollection(), force=True)
Span.set_extension("linkedEntities", default=None, force=True)
def __call__(self, doc):
tce = TermCandidateExtractor(doc)
classifier = EntityClassifier()
for sent in doc.sents:
sent._.linkedEntities = EntityCollection([])
entities = []
for termCandidates in tce:
entityCandidates = termCandidates.get_entity_candidates()
if len(entityCandidates) > 0:
entity = classifier(entityCandidates)
entity.span.sent._.linkedEntities.append(entity)
entities.append(entity)
doc._.linkedEntities = EntityCollection(entities)
return doc
|