Norgan97 commited on
Commit
4bfdba0
1 Parent(s): 096aad7
app.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title('Проект по NLP')
4
+
pages/__init__.py ADDED
File without changes
pages/gpt.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c9012fd11ba32a1145f6b167d8cafea4a7208585e102aa283a96c335c0908f
3
+ size 1321
pages/models/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.bin filter=lfs diff=lfs merge=lfs -text
pages/models/gpt/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "gradient_checkpointing": false,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_ctx": 2048,
16
+ "n_embd": 768,
17
+ "n_head": 12,
18
+ "n_inner": null,
19
+ "n_layer": 12,
20
+ "n_positions": 2048,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.35.0",
32
+ "use_cache": true,
33
+ "vocab_size": 50264
34
+ }
pages/models/gpt/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.35.0"
6
+ }
pages/models/gpt/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6838ff75c456e4cbc50676a4c6d1f3574a512c555d62d8b753afb4c449a44253
3
+ size 500941440
pages/models/gptmodel.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
3
+ import torch
4
+
5
+
6
+ model_name_or_path = 'pages/models/gpt'
7
+ tokenizer = GPT2Tokenizer.from_pretrained('sberbank-ai/rugpt3small_based_on_gpt2')
8
+ model = GPT2LMHeadModel.from_pretrained(model_name_or_path).to('cpu')
pages/toxicity.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c47ac7306a6ccb6bba3ad38b72b82cb9a253054dd2160a646414cf6ed41a742c
3
+ size 1880
requirements.txt ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.0.0
2
+ accelerate==0.24.1
3
+ altair==5.1.2
4
+ astunparse==1.6.3
5
+ attrs==23.1.0
6
+ blinker==1.7.0
7
+ cachetools==5.3.2
8
+ certifi==2023.7.22
9
+ charset-normalizer==3.3.2
10
+ click==8.1.7
11
+ contourpy==1.1.1
12
+ cycler==0.12.1
13
+ filelock==3.13.1
14
+ flatbuffers==23.5.26
15
+ fonttools==4.43.1
16
+ fsspec==2023.10.0
17
+ gast==0.5.4
18
+ gitdb==4.0.11
19
+ GitPython==3.1.40
20
+ google-auth==2.23.4
21
+ google-auth-oauthlib==1.0.0
22
+ google-pasta==0.2.0
23
+ grpcio==1.59.2
24
+ h5py==3.10.0
25
+ huggingface-hub==0.17.3
26
+ idna==3.4
27
+ importlib-metadata==6.8.0
28
+ Jinja2==3.1.2
29
+ joblib==1.3.2
30
+ jsonschema==4.19.2
31
+ jsonschema-specifications==2023.7.1
32
+ keras==2.14.0
33
+ kiwisolver==1.4.5
34
+ libclang==16.0.6
35
+ Markdown==3.5.1
36
+ markdown-it-py==3.0.0
37
+ MarkupSafe==2.1.3
38
+ matplotlib==3.8.1
39
+ mdurl==0.1.2
40
+ ml-dtypes==0.2.0
41
+ mpmath==1.3.0
42
+ networkx==3.2.1
43
+ nltk==3.8.1
44
+ numpy==1.26.1
45
+ nvidia-cublas-cu12==12.1.3.1
46
+ nvidia-cuda-cupti-cu12==12.1.105
47
+ nvidia-cuda-nvrtc-cu12==12.1.105
48
+ nvidia-cuda-runtime-cu12==12.1.105
49
+ nvidia-cudnn-cu12==8.9.2.26
50
+ nvidia-cufft-cu12==11.0.2.54
51
+ nvidia-curand-cu12==10.3.2.106
52
+ nvidia-cusolver-cu12==11.4.5.107
53
+ nvidia-cusparse-cu12==12.1.0.106
54
+ nvidia-nccl-cu12==2.18.1
55
+ nvidia-nvjitlink-cu12==12.3.52
56
+ nvidia-nvtx-cu12==12.1.105
57
+ oauthlib==3.2.2
58
+ opt-einsum==3.3.0
59
+ packaging==23.2
60
+ pandas==2.1.2
61
+ Pillow==10.1.0
62
+ protobuf==4.25.0
63
+ psutil==5.9.6
64
+ pyarrow==14.0.0
65
+ pyasn1==0.5.0
66
+ pyasn1-modules==0.3.0
67
+ pydeck==0.8.1b0
68
+ Pygments==2.16.1
69
+ pyparsing==3.1.1
70
+ python-dateutil==2.8.2
71
+ pytz==2023.3.post1
72
+ PyYAML==6.0.1
73
+ referencing==0.30.2
74
+ regex==2023.10.3
75
+ requests==2.31.0
76
+ requests-oauthlib==1.3.1
77
+ rich==13.6.0
78
+ rpds-py==0.10.6
79
+ rsa==4.9
80
+ safetensors==0.4.0
81
+ scikit-learn==1.3.2
82
+ scipy==1.11.3
83
+ six==1.16.0
84
+ smmap==5.0.1
85
+ streamlit==1.28.0
86
+ sympy==1.12
87
+ tenacity==8.2.3
88
+ tensorboard==2.14.1
89
+ tensorboard-data-server==0.7.2
90
+ tensorflow==2.14.0
91
+ tensorflow-estimator==2.14.0
92
+ tensorflow-io-gcs-filesystem==0.34.0
93
+ termcolor==2.3.0
94
+ threadpoolctl==3.2.0
95
+ tokenizers==0.14.1
96
+ toml==0.10.2
97
+ toolz==0.12.0
98
+ torch==2.1.0
99
+ torchaudio==2.1.0
100
+ torchutils==0.0.4
101
+ torchvision==0.16.0
102
+ tornado==6.3.3
103
+ tqdm==4.66.1
104
+ transformers==4.34.1
105
+ triton==2.1.0
106
+ typing_extensions==4.8.0
107
+ tzdata==2023.3
108
+ tzlocal==5.2
109
+ urllib3==2.0.7
110
+ validators==0.22.0
111
+ watchdog==3.0.0
112
+ Werkzeug==3.0.1
113
+ wrapt==1.14.1
114
+ zipp==3.17.0