egerber1 commited on
Commit
f51d9c9
·
1 Parent(s): 1fba1a5

implement preview methods

Browse files
.gitignore CHANGED
@@ -141,4 +141,7 @@ dmypy.json
141
  .pytype/
142
 
143
  # Cython debug symbols
144
- cython_debug/
 
 
 
 
141
  .pytype/
142
 
143
  # Cython debug symbols
144
+ cython_debug/
145
+
146
+ experimental_notebooks
147
+ settings.json
README.md CHANGED
@@ -42,8 +42,8 @@ for sent in doc.sents:
42
  sent._.linkedEntities.pretty_print()
43
 
44
  # OUTPUT:
45
- # https://www.wikidata.org/wiki/Q194318 194318 Pirates of the Caribbean Series of fantasy adventure films
46
- # https://www.wikidata.org/wiki/Q12525597 12525597 Silvester the day celebrated on 31 December (Roman Catholic Church) or 2 January (Eastern Orthodox Churches)
47
 
48
  ```
49
 
@@ -94,16 +94,14 @@ doc = nlp("I follow the New England Patriots")
94
  patriots_entity = doc._.linkedEntities[0]
95
  patriots_entity.pretty_print()
96
  # OUTPUT:
97
- # https://www.wikidata.org/wiki/Q193390
98
- # 193390
99
  # New England Patriots
100
  # National Football League franchise in Foxborough, Massachusetts
101
 
102
  football_team_entity = patriots_entity.get_super_entities()[0]
103
  football_team_entity.pretty_print()
104
  # OUTPUT:
105
- # https://www.wikidata.org/wiki/Q17156793
106
- # 17156793
107
  # American football team
108
  # organization, in which a group of players are organized to compete as a team in American football
109
 
 
42
  sent._.linkedEntities.pretty_print()
43
 
44
  # OUTPUT:
45
+ # https://www.wikidata.org/wiki/Q194318 Pirates of the Caribbean Series of fantasy adventure films
46
+ # https://www.wikidata.org/wiki/Q12525597 Silvester the day celebrated on 31 December (Roman Catholic Church) or 2 January (Eastern Orthodox Churches)
47
 
48
  ```
49
 
 
94
  patriots_entity = doc._.linkedEntities[0]
95
  patriots_entity.pretty_print()
96
  # OUTPUT:
97
+ # https://www.wikidata.org/wiki/Q193390
 
98
  # New England Patriots
99
  # National Football League franchise in Foxborough, Massachusetts
100
 
101
  football_team_entity = patriots_entity.get_super_entities()[0]
102
  football_team_entity.pretty_print()
103
  # OUTPUT:
104
+ # https://www.wikidata.org/wiki/Q17156793
 
105
  # American football team
106
  # organization, in which a group of players are organized to compete as a team in American football
107
 
setup.py CHANGED
@@ -1,9 +1,6 @@
1
  #!/usr/bin/env python
2
  # -*- coding: utf-8 -*-
3
 
4
- # Copyright (c) 2014 SeatGeek
5
-
6
- # This file is part of fuzzywuzzy.
7
 
8
  import os
9
 
@@ -22,7 +19,7 @@ with open("README.md", "r") as fh:
22
 
23
  setup(
24
  name='spacy-entity-linker',
25
- version='1.0.0',
26
  author='Emanuel Gerber',
27
  author_email='[email protected]',
28
  packages=['spacy_entity_linker'],
 
1
  #!/usr/bin/env python
2
  # -*- coding: utf-8 -*-
3
 
 
 
 
4
 
5
  import os
6
 
 
19
 
20
  setup(
21
  name='spacy-entity-linker',
22
+ version='1.0.1',
23
  author='Emanuel Gerber',
24
  author_email='[email protected]',
25
  packages=['spacy_entity_linker'],
spacy_entity_linker/EntityCandidates.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  class EntityCandidates:
2
 
3
  def __init__(self, entity_elements):
@@ -17,6 +19,15 @@ class EntityCandidates:
17
  for entity in self.entity_elements:
18
  entity.pretty_print()
19
 
 
 
 
 
 
 
 
 
 
20
  def __str__(self):
21
  return str(["entity {}: {} (<{}>)".format(i, entity.get_label(), entity.get_description()) for i, entity in
22
  enumerate(self.entity_elements)])
 
1
+ MAX_ITEMS_PREVIEW=20
2
+
3
  class EntityCandidates:
4
 
5
  def __init__(self, entity_elements):
 
19
  for entity in self.entity_elements:
20
  entity.pretty_print()
21
 
22
+ def __repr__(self) -> str:
23
+ preview_str=""
24
+ for index,entity_element in enumerate(self):
25
+ if index>MAX_ITEMS_PREVIEW:
26
+ break
27
+ preview_str+="{}\n".format(entity_element.get_preview_string())
28
+
29
+ return preview_str
30
+
31
  def __str__(self):
32
  return str(["entity {}: {} (<{}>)".format(i, entity.get_label(), entity.get_description()) for i, entity in
33
  enumerate(self.entity_elements)])
spacy_entity_linker/EntityCollection.py CHANGED
@@ -1,6 +1,8 @@
1
  from collections import Counter, defaultdict
2
  from .DatabaseConnection import get_wikidata_instance
3
 
 
 
4
 
5
  class EntityCollection:
6
 
@@ -45,6 +47,17 @@ class EntityCollection:
45
  print("{} ({}) : {}".format(wikidataInstance.get_entity_name(category), frequency,
46
  ','.join([str(e) for e in category_to_entites[category]])))
47
 
 
 
 
 
 
 
 
 
 
 
 
48
  def pretty_print(self):
49
  for entity in self.entities:
50
  entity.pretty_print()
 
1
  from collections import Counter, defaultdict
2
  from .DatabaseConnection import get_wikidata_instance
3
 
4
+ MAX_ITEMS_PREVIEW=20
5
+
6
 
7
  class EntityCollection:
8
 
 
47
  print("{} ({}) : {}".format(wikidataInstance.get_entity_name(category), frequency,
48
  ','.join([str(e) for e in category_to_entites[category]])))
49
 
50
+ def __repr__(self) -> str:
51
+ preview_str="<EntityCollection ({} entities):".format(len(self))
52
+ for index,entity_element in enumerate(self):
53
+ if index>MAX_ITEMS_PREVIEW:
54
+ preview_str+="\n...{} more".format(len(self)-MAX_ITEMS_PREVIEW)
55
+ break
56
+ preview_str+="\n-{}".format(entity_element.get_preview_string())
57
+
58
+ preview_str+=">"
59
+ return preview_str
60
+
61
  def pretty_print(self):
62
  for entity in self.entities:
63
  entity.pretty_print()
spacy_entity_linker/EntityElement.py CHANGED
@@ -20,6 +20,7 @@ class EntityElement:
20
  if len(row) > 5 and row[5]:
21
  self.original_alias = row[5]
22
 
 
23
  self.span = span
24
 
25
  self.chain = None
@@ -101,11 +102,16 @@ class EntityElement:
101
  }
102
 
103
  def pretty_print(self):
104
- print(
105
- "https://www.wikidata.org/wiki/Q{0:<10} {1:<10} {2:<30} {3:<100}".format(self.get_id(),
106
- self.get_id(),
107
- self.get_label(),
108
- self.get_description()[:100]))
 
 
 
 
 
109
 
110
  def pretty_string(self, description=False):
111
  if description:
 
20
  if len(row) > 5 and row[5]:
21
  self.original_alias = row[5]
22
 
23
+ self.url="https://www.wikidata.org/wiki/Q{}".format(self.get_id())
24
  self.span = span
25
 
26
  self.chain = None
 
102
  }
103
 
104
  def pretty_print(self):
105
+ print(self.__repr__())
106
+
107
+ def get_url(self):
108
+ return self.url
109
+
110
+ def __repr__(self):
111
+ return "<EntityElement: {}>".format(self.get_preview_string())
112
+
113
+ def get_preview_string(self):
114
+ return "{0:<10} {1:<25} {2:<50}".format(self.get_url(),self.get_label(),self.get_description()[:100])
115
 
116
  def pretty_string(self, description=False):
117
  if description: