Jiahuita commited on
Commit
d6be1a6
·
1 Parent(s): 1d3834b

Add inference widget and pipeline configuration

Browse files
Files changed (3) hide show
  1. README.md +6 -14
  2. pipeline.py +36 -0
  3. requirements.txt +7 -2
README.md CHANGED
@@ -1,24 +1,16 @@
1
  ---
 
2
  license: mit
3
  tags:
4
  - text-classification
5
- - tensorflow
6
  - news-classification
7
  pipeline_tag: text-classification
 
8
  widget:
9
- - text: "Enter your news headline here"
10
- datasets:
11
- - custom_news_dataset
12
- model-index:
13
- - name: news-source-classifier
14
- results:
15
- - task:
16
- type: text-classification
17
- name: News Source Classification
18
- metrics:
19
- - type: accuracy
20
- value: 0.82
21
- name: Test Accuracy
22
  ---
23
 
24
  # News Source Classifier
 
1
  ---
2
+ language: en
3
  license: mit
4
  tags:
5
  - text-classification
 
6
  - news-classification
7
  pipeline_tag: text-classification
8
+ inference: true
9
  widget:
10
+ - example_title: "Write a news headline to classify"
11
+ text: "Wife of murdered Minnesota pastor hired 3 men to kill husband after affair: police"
12
+ - example_title: "Another example"
13
+ text: "Scientists discover breakthrough in renewable energy research"
 
 
 
 
 
 
 
 
 
14
  ---
15
 
16
  # News Source Classifier
pipeline.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pipeline.py
2
+ from transformers import Pipeline
3
+ import tensorflow as tf
4
+ from tensorflow.keras.models import load_model
5
+ from tensorflow.keras.preprocessing.text import tokenizer_from_json
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ import json
8
+ import numpy as np
9
+
10
+ class NewsClassifierPipeline(Pipeline):
11
+ def __init__(self):
12
+ super().__init__()
13
+ self.model = load_model('news_classifier.h5')
14
+ with open('tokenizer.json') as f:
15
+ tokenizer_data = json.load(f)
16
+ self.tokenizer = tokenizer_from_json(tokenizer_data)
17
+
18
+ def preprocess(self, text):
19
+ sequence = self.tokenizer.texts_to_sequences([text])
20
+ padded = pad_sequences(sequence)
21
+ return padded
22
+
23
+ def _forward(self, texts):
24
+ processed = self.preprocess(texts)
25
+ predictions = self.model.predict(processed)
26
+ scores = tf.nn.softmax(predictions, axis=1)
27
+
28
+ predicted_class = np.argmax(predictions)
29
+ score = float(np.max(scores))
30
+
31
+ label = 'foxnews' if predicted_class == 0 else 'nbc'
32
+
33
+ return [{'label': label, 'score': score}]
34
+
35
+ def postprocess(self, model_outputs):
36
+ return model_outputs
requirements.txt CHANGED
@@ -1,8 +1,13 @@
1
- tensorflow==2.13.0
 
 
 
2
  fastapi==0.68.1
3
  uvicorn==0.15.0
4
  numpy>=1.19.2
5
  pydantic==1.8.2
6
  python-multipart==0.0.5
7
  scikit-learn>=0.24.2
8
- joblib>=1.1.0
 
 
 
1
+ tensorflow-macos>=2.10.0
2
+ #tensorflow==2.10.0
3
+ transformers>=4.30.0
4
+ torch>=2.0.0
5
  fastapi==0.68.1
6
  uvicorn==0.15.0
7
  numpy>=1.19.2
8
  pydantic==1.8.2
9
  python-multipart==0.0.5
10
  scikit-learn>=0.24.2
11
+ joblib>=1.1.0
12
+ sentencepiece
13
+ protobuf