davanstrien HF staff commited on
Commit
71b3cf3
·
1 Parent(s): f1cf4ce

add more model information

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -16,7 +16,6 @@ def _value2rgba(x, cmap=cm.RdYlGn, alpha_mult=1.0):
16
  return tuple(rgb.tolist() + [a])
17
 
18
 
19
-
20
  def _eval_dropouts(mod):
21
  module_name = mod.__class__.__name__
22
  if "Dropout" in module_name or "BatchNorm" in module_name:
@@ -25,7 +24,6 @@ def _eval_dropouts(mod):
25
  _eval_dropouts(module)
26
 
27
 
28
-
29
  def _piece_attn_html(pieces, attns, sep=" ", **kwargs):
30
  html_code, spans = ['<span style="font-family: monospace;">'], []
31
  for p, a in zip(pieces, attns):
@@ -45,8 +43,7 @@ def _show_piece_attn(*args, **kwargs):
45
  display(HTML(_piece_attn_html(*args, **kwargs)))
46
 
47
 
48
-
49
- @lru_cache(maxsize=1024*2)
50
  def _intrinsic_attention(learn, text, class_id=None):
51
  "Calculate the intrinsic attention of the input w.r.t to an output `class_id`, or the classification given by the model if `None`."
52
  learn.model.train()
@@ -80,12 +77,10 @@ def intrinsic_attention(x: TextLearner, text: str, class_id: int = None, **kwarg
80
  return _piece_attn_html(text.split(), to_np(attn), **kwargs)
81
 
82
 
83
-
84
-
85
  labels = learn_inf.dls.vocab[1]
86
 
87
 
88
- @lru_cache(maxsize=1024*2)
89
  def predict_label(title):
90
  *_, probs = learn_inf.predict(title)
91
  return probs
@@ -131,11 +126,14 @@ British Library Books genre detection model
131
  article = """
132
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5245175.svg)](https://doi.org/10.5281/zenodo.5245175)
133
 
134
-
135
  # British Library Books genre detection demo
136
 
137
  This demo alows you to play with a 'genre' detection model which has been trained to predict, from the title of a book, whether it is 'fiction' or 'non-fiction'.
138
- The demo also shows you which parts of the input the model is using most to make its prediction.
 
 
 
 
139
 
140
  ## Model description
141
 
@@ -145,12 +143,14 @@ This dataset is dominated by English language books though it includes books in
145
 
146
  ## Training data
147
 
148
- [[More information needed]]
 
149
 
150
  ## Model performance
151
 
152
  The models performance on a held-out test set is as follows:
153
 
 
154
  ```
155
  precision recall f1-score support
156
 
 
16
  return tuple(rgb.tolist() + [a])
17
 
18
 
 
19
  def _eval_dropouts(mod):
20
  module_name = mod.__class__.__name__
21
  if "Dropout" in module_name or "BatchNorm" in module_name:
 
24
  _eval_dropouts(module)
25
 
26
 
 
27
  def _piece_attn_html(pieces, attns, sep=" ", **kwargs):
28
  html_code, spans = ['<span style="font-family: monospace;">'], []
29
  for p, a in zip(pieces, attns):
 
43
  display(HTML(_piece_attn_html(*args, **kwargs)))
44
 
45
 
46
+ @lru_cache(maxsize=1024 * 2)
 
47
  def _intrinsic_attention(learn, text, class_id=None):
48
  "Calculate the intrinsic attention of the input w.r.t to an output `class_id`, or the classification given by the model if `None`."
49
  learn.model.train()
 
77
  return _piece_attn_html(text.split(), to_np(attn), **kwargs)
78
 
79
 
 
 
80
  labels = learn_inf.dls.vocab[1]
81
 
82
 
83
+ @lru_cache(maxsize=1024 * 2)
84
  def predict_label(title):
85
  *_, probs = learn_inf.predict(title)
86
  return probs
 
126
  article = """
127
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5245175.svg)](https://doi.org/10.5281/zenodo.5245175)
128
 
 
129
  # British Library Books genre detection demo
130
 
131
  This demo alows you to play with a 'genre' detection model which has been trained to predict, from the title of a book, whether it is 'fiction' or 'non-fiction'.
132
+ The demo also shows you which parts of the input the model is using most to make its prediction. You can hover over the words to see the attenton score assigned to that word. This gives you some sense of which words are important to the model in making a prediction.
133
+
134
+ ## Background
135
+
136
+ This model was developed as part of work by the [Living with Machines](https://livingwithmachines.ac.uk/). The process of training the model and working with the data is documented in a tutorial which will be released soon.
137
 
138
  ## Model description
139
 
 
143
 
144
  ## Training data
145
 
146
+ The model is trained on a particular collection of books digitised by the British Library. As a result the model may do less well on titles that look different to this data.
147
+ In particular the training data, was mostly English, and mostly from the 19th Century. You can find more information about the model [here]((https://doi.org/10.5281/zenodo.5245175))
148
 
149
  ## Model performance
150
 
151
  The models performance on a held-out test set is as follows:
152
 
153
+
154
  ```
155
  precision recall f1-score support
156