Spaces:
Runtime error
Runtime error
"pull and shock"
Browse files- README.md +10 -0
- __pycache__/seafoam.cpython-39.pyc +0 -0
- cache/intent/embeddings.pt +0 -3
- cache/slot/embeddings.pt +0 -3
- cache/slot/tag2idx.json +0 -11
- cache/slot/vocab.pkl +0 -3
- ckpt/intent/model_checkpoint.pth +2 -2
- data/intent/eval.json +0 -0
- data/intent/test.json +0 -0
- data/intent/train.json +0 -0
- data/slot/eval.json +0 -0
- data/slot/test.json +0 -0
- data/slot/train.json +0 -0
- dataset.py +0 -74
- model.py +1 -8
- requirements.in → requirements.txt +2 -1
- seafoam.py +58 -0
README.md
CHANGED
@@ -11,3 +11,13 @@ license: apache-2.0
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
## Environment
|
16 |
+
```shell
|
17 |
+
# If you have conda, we recommend you to build a conda environment called "adl-hw1"
|
18 |
+
make
|
19 |
+
conda activate adl-hw1
|
20 |
+
pip install -r requirements.txt
|
21 |
+
# Otherwise
|
22 |
+
pip install -r requirements.in
|
23 |
+
```
|
__pycache__/seafoam.cpython-39.pyc
ADDED
Binary file (2.07 kB). View file
|
|
cache/intent/embeddings.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f48c2a4bb711ddd28a95f849b676ab6c76a4aeff3ba01976ccea97a4808ce790
|
3 |
-
size 7789931
|
|
|
|
|
|
|
|
cache/slot/embeddings.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:faba49b73dfdd2a98dbbfe7b53eed50b8edd9df716169e8f837558c5e24c42bf
|
3 |
-
size 4941099
|
|
|
|
|
|
|
|
cache/slot/tag2idx.json
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"O": 0,
|
3 |
-
"B-date": 1,
|
4 |
-
"I-time": 2,
|
5 |
-
"B-time": 3,
|
6 |
-
"B-last_name": 4,
|
7 |
-
"I-people": 5,
|
8 |
-
"B-people": 6,
|
9 |
-
"I-date": 7,
|
10 |
-
"B-first_name": 8
|
11 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cache/slot/vocab.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c711af8ba9cba928df00a20913b2bcdd0738ab3b9210b4b9f10d0ff9dcf27f16
|
3 |
-
size 49861
|
|
|
|
|
|
|
|
ckpt/intent/model_checkpoint.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c69530b46831942e17a75192a402d6a699d7de48340fbf336f964277742af95
|
3 |
+
size 74048714
|
data/intent/eval.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/intent/test.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/intent/train.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/slot/eval.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/slot/test.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/slot/train.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
dataset.py
DELETED
@@ -1,74 +0,0 @@
|
|
1 |
-
from typing import List, Dict
|
2 |
-
|
3 |
-
import torch
|
4 |
-
|
5 |
-
from torch.utils.data import Dataset
|
6 |
-
|
7 |
-
from utils import Vocab
|
8 |
-
|
9 |
-
|
10 |
-
class SeqClsDataset(Dataset):
|
11 |
-
def __init__(
|
12 |
-
self,
|
13 |
-
data: List[Dict],
|
14 |
-
vocab: Vocab,
|
15 |
-
label_mapping: Dict[str, int],
|
16 |
-
max_len: int,
|
17 |
-
):
|
18 |
-
self.data = data
|
19 |
-
self.vocab = vocab
|
20 |
-
self.label_mapping = label_mapping
|
21 |
-
self._idx2label = {idx: intent for intent, idx in self.label_mapping.items()}
|
22 |
-
self.max_len = max_len
|
23 |
-
|
24 |
-
def __len__(self) -> int:
|
25 |
-
return len(self.data)
|
26 |
-
|
27 |
-
def __getitem__(self, index) -> Dict:
|
28 |
-
instance = self.data[index]
|
29 |
-
return instance
|
30 |
-
|
31 |
-
@property
|
32 |
-
def num_classes(self) -> int:
|
33 |
-
return len(self.label_mapping)
|
34 |
-
|
35 |
-
def collate_fn(self, samples: List[Dict]) -> Dict:
|
36 |
-
# sample就是batch data
|
37 |
-
# collate_fn幫你把batch data編碼成詞彙的索引
|
38 |
-
# batch[0] = {'text': '~', 'intent': '~', 'id': 'train-0'}
|
39 |
-
|
40 |
-
# 提取所有樣本的文本數據和標籤數據
|
41 |
-
texts = samples["text"]
|
42 |
-
labels = samples["intent"]
|
43 |
-
|
44 |
-
# 使用 vocab 將文本數據轉換為整數索引序列,並指定最大長度
|
45 |
-
encoded_texts = self.vocab.encode_batch([text.split() for text in texts], to_len=self.max_len)
|
46 |
-
|
47 |
-
# 將標籤數據轉換為整數索引序列
|
48 |
-
encoded_labels = [self.label_mapping[label] for label in labels]
|
49 |
-
|
50 |
-
# 將整數索引序列轉換為 PyTorch 張量
|
51 |
-
encoded_text = torch.tensor(encoded_texts)
|
52 |
-
encoded_label = torch.tensor(encoded_labels)
|
53 |
-
|
54 |
-
# 創建批次數據字典
|
55 |
-
batch_data = {
|
56 |
-
"encoded_text": encoded_text,
|
57 |
-
"encoded_label": encoded_label
|
58 |
-
}
|
59 |
-
|
60 |
-
return batch_data
|
61 |
-
|
62 |
-
def label2idx(self, label: str):
|
63 |
-
return self.label_mapping[label]
|
64 |
-
|
65 |
-
def idx2label(self, idx: int):
|
66 |
-
return self._idx2label[idx]
|
67 |
-
|
68 |
-
|
69 |
-
class SeqTaggingClsDataset(SeqClsDataset):
|
70 |
-
ignore_idx = -100
|
71 |
-
|
72 |
-
def collate_fn(self, samples):
|
73 |
-
# TODO: implement collate_fn
|
74 |
-
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.py
CHANGED
@@ -3,8 +3,7 @@ from typing import Dict
|
|
3 |
import torch
|
4 |
import torch.nn as nn
|
5 |
|
6 |
-
|
7 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
8 |
|
9 |
class SeqClassifier(nn.Module):
|
10 |
def __init__(
|
@@ -68,9 +67,3 @@ class SeqClassifier(nn.Module):
|
|
68 |
# 通過全連接層
|
69 |
logits = self.fc(combined_hidden_state)
|
70 |
return logits # 返回預測結果
|
71 |
-
|
72 |
-
|
73 |
-
class SeqTagger(SeqClassifier):
|
74 |
-
def forward(self, batch) -> Dict[str, torch.Tensor]:
|
75 |
-
# TODO: implement model forward
|
76 |
-
raise NotImplementedError
|
|
|
3 |
import torch
|
4 |
import torch.nn as nn
|
5 |
|
6 |
+
device = "cpu"
|
|
|
7 |
|
8 |
class SeqClassifier(nn.Module):
|
9 |
def __init__(
|
|
|
67 |
# 通過全連接層
|
68 |
logits = self.fc(combined_hidden_state)
|
69 |
return logits # 返回預測結果
|
|
|
|
|
|
|
|
|
|
|
|
requirements.in → requirements.txt
RENAMED
@@ -7,4 +7,5 @@ numpy
|
|
7 |
pandas
|
8 |
scikit-learn==1.1.2
|
9 |
transformers[torch]
|
10 |
-
datasets
|
|
|
|
7 |
pandas
|
8 |
scikit-learn==1.1.2
|
9 |
transformers[torch]
|
10 |
+
datasets
|
11 |
+
huggingface_hub
|
seafoam.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
from typing import Iterable
|
3 |
+
import gradio as gr
|
4 |
+
from gradio.themes.base import Base
|
5 |
+
from gradio.themes.utils import colors, fonts, sizes
|
6 |
+
import time
|
7 |
+
|
8 |
+
class Seafoam(Base):
|
9 |
+
def __init__(
|
10 |
+
self,
|
11 |
+
*,
|
12 |
+
primary_hue: colors.Color | str = colors.emerald,
|
13 |
+
secondary_hue: colors.Color | str = colors.blue,
|
14 |
+
neutral_hue: colors.Color | str = colors.blue,
|
15 |
+
spacing_size: sizes.Size | str = sizes.spacing_md,
|
16 |
+
radius_size: sizes.Size | str = sizes.radius_md,
|
17 |
+
text_size: sizes.Size | str = sizes.text_lg,
|
18 |
+
font: fonts.Font
|
19 |
+
| str
|
20 |
+
| Iterable[fonts.Font | str] = (
|
21 |
+
fonts.GoogleFont("Quicksand"),
|
22 |
+
"ui-sans-serif",
|
23 |
+
"sans-serif",
|
24 |
+
),
|
25 |
+
font_mono: fonts.Font
|
26 |
+
| str
|
27 |
+
| Iterable[fonts.Font | str] = (
|
28 |
+
fonts.GoogleFont("IBM Plex Mono"),
|
29 |
+
"ui-monospace",
|
30 |
+
"monospace",
|
31 |
+
),
|
32 |
+
):
|
33 |
+
super().__init__(
|
34 |
+
primary_hue=primary_hue,
|
35 |
+
secondary_hue=secondary_hue,
|
36 |
+
neutral_hue=neutral_hue,
|
37 |
+
spacing_size=spacing_size,
|
38 |
+
radius_size=radius_size,
|
39 |
+
text_size=text_size,
|
40 |
+
font=font,
|
41 |
+
font_mono=font_mono,
|
42 |
+
)
|
43 |
+
super().set(
|
44 |
+
body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)",
|
45 |
+
body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
|
46 |
+
button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
|
47 |
+
button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
|
48 |
+
button_primary_text_color="white",
|
49 |
+
button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
|
50 |
+
slider_color="*secondary_300",
|
51 |
+
slider_color_dark="*secondary_600",
|
52 |
+
block_title_text_weight="600",
|
53 |
+
block_border_width="3px",
|
54 |
+
block_shadow="*shadow_drop_lg",
|
55 |
+
button_shadow="*shadow_drop_lg",
|
56 |
+
button_large_padding="32px",
|
57 |
+
)
|
58 |
+
|