franz96521 commited on
Commit
abe5e7f
·
1 Parent(s): e6e34c7
.gitattributes CHANGED
@@ -17,7 +17,7 @@
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
20
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
@@ -25,3 +25,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ AbstractGenerator/ filter=lfs diff=lfs merge=lfs -text
29
+ AbstractGenerator/** filter=lfs diff=lfs merge=lfs -text
30
+ models/** filter=lfs diff=lfs merge=lfs -text
31
+ models/ filter=lfs diff=lfs merge=lfs -text
AbstractGenerator.ipynb ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "ename": "ModuleNotFoundError",
10
+ "evalue": "No module named 'gpt_2_simple'",
11
+ "output_type": "error",
12
+ "traceback": [
13
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
14
+ "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
15
+ "\u001b[1;32mc:\\Users\\franz\\AbstractGenerator\\AbstractGenerator.ipynb Cell 1'\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/franz/AbstractGenerator/AbstractGenerator.ipynb#ch0000000?line=0'>1</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mgpt_2_simple\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mgpt2\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/franz/AbstractGenerator/AbstractGenerator.ipynb#ch0000000?line=1'>2</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/franz/AbstractGenerator/AbstractGenerator.ipynb#ch0000000?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtensorflow\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mtf\u001b[39;00m\n",
16
+ "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'gpt_2_simple'"
17
+ ]
18
+ }
19
+ ],
20
+ "source": [
21
+ "\n",
22
+ "import gpt_2_simple as gpt2\n",
23
+ "import os\n",
24
+ "import tensorflow as tf\n",
25
+ "import pandas as pd\n",
26
+ "import re\n",
27
+ "print(\"GPU is\", \"available\" if tf.test.is_gpu_available() else \"NOT AVAILABLE\")"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 2,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "model_name = \"124M\"\n",
37
+ "if not os.path.isdir(os.path.join(\"models\", model_name)):\n",
38
+ "\tprint(f\"Downloading {model_name} model...\")\n",
39
+ "\tgpt2.download_gpt2(model_name=model_name) "
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 3,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "path = 'AbstractGenerator/'\n",
49
+ "checkpoint_dir =path+'weights/'\n",
50
+ "data_path = path+'Tokenized_data/'\n",
51
+ "\n",
52
+ "\n",
53
+ "file_name = 'resumen'\n",
54
+ "file_path = data_path+file_name\n",
55
+ "\n",
56
+ "prefix= '<|startoftext|>'\n",
57
+ "sufix ='<|endoftext|>'"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "markdown",
62
+ "metadata": {},
63
+ "source": [
64
+ "# pretrained"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": null,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "sess = gpt2.start_tf_sess()\n",
74
+ "gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "markdown",
79
+ "metadata": {},
80
+ "source": [
81
+ "# train "
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "tf.compat.v1.reset_default_graph()\n",
91
+ "sess = gpt2.start_tf_sess()\n",
92
+ "\n"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "metadata": {},
99
+ "outputs": [],
100
+ "source": [
101
+ "gpt2.finetune(sess,\n",
102
+ " file_path+'.txt',\n",
103
+ " model_name=model_name,\n",
104
+ " checkpoint_dir=checkpoint_dir, \n",
105
+ " steps=1000\n",
106
+ " ) "
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "markdown",
111
+ "metadata": {},
112
+ "source": [
113
+ "# test"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": null,
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "text = \"\"\"Introduction and preliminaries\n",
123
+ "The focus of this paper is decompositions of (k, `)-sparse graphs into edge-disjoint subgraphs\n",
124
+ "that certify sparsity. We use graph to mean a multigraph, possibly with loops. We say that a\n",
125
+ "graph is (k, `)-sparse if no subset of n′ vertices spans more than kn′− ` edges in the graph; a\n",
126
+ "(k, `)-sparse graph with kn′− ` edges is (k, `)-tight. We call the range k ≤ `≤ 2k−1 the upper\n",
127
+ "range of sparse graphs and 0≤ `≤ k the lower range.\n",
128
+ "In this paper, we present efficient algorithms for finding decompositions that certify sparsity\n",
129
+ "in the upper range of `. Our algorithms also apply in the lower range, which was already ad-\n",
130
+ "dressed by [3, 4, 5, 6, 19]. A decomposition certifies the sparsity of a graph if the sparse graphs\n",
131
+ "and graphs admitting the decomposition coincide.\n",
132
+ "Our algorithms are based on a new characterization of sparse graphs, which we call the\n",
133
+ "pebble game with colors. The pebble game with colors is a simple graph construction rule that\n",
134
+ "produces a sparse graph along with a sparsity-certifying decomposition.\n",
135
+ "We define and study a canonical class of pebble game constructions, which correspond to\n",
136
+ "previously studied decompositions of sparse graphs into edge disjoint trees. Our results provide\n",
137
+ "a unifying framework for all the previously known special cases, including Nash-Williams-\n",
138
+ "Tutte and [7, 24]. Indeed, in the lower range, canonical pebble game constructions capture the\n",
139
+ "properties of the augmenting paths used in matroid union and intersection algorithms[5, 6].\n",
140
+ "Since the sparse graphs in the upper range are not known to be unions or intersections of the\n",
141
+ "matroids for which there are efficient augmenting path algorithms, these do not easily apply in\n",
142
+ "∗ Research of both authors funded by the NSF under grants NSF CCF-0430990 and NSF-DARPA CARGO\n",
143
+ "CCR-0310661 to the first author.\n",
144
+ "2 Ileana Streinu, Louis Theran\n",
145
+ "Term Meaning\n",
146
+ "Sparse graph G Every non-empty subgraph on n′ vertices has ≤ kn′− ` edges\n",
147
+ "Tight graph G G = (V,E) is sparse and |V |= n, |E|= kn− `\n",
148
+ "Block H in G G is sparse, and H is a tight subgraph\n",
149
+ "Component H of G G is sparse and H is a maximal block\n",
150
+ "Map-graph Graph that admits an out-degree-exactly-one orientation\n",
151
+ "(k, `)-maps-and-trees Edge-disjoint union of ` trees and (k− `) map-grpahs\n",
152
+ "`Tk Union of ` trees, each vertex is in exactly k of them\n",
153
+ "Set of tree-pieces of an `Tk induced on V ′ ⊂V Pieces of trees in the `Tk spanned by E(V ′)\n",
154
+ "Proper `Tk Every V ′ ⊂V contains ≥ ` pieces of trees from the `Tk\n",
155
+ "Table 1. Sparse graph and decomposition terminology used in this paper.\n",
156
+ "the upper range. Pebble game with colors constructions may thus be considered a strengthening\n",
157
+ "of augmenting paths to the upper range of matroidal sparse graphs.\n",
158
+ "1.1. Sparse graphs\n",
159
+ "\n",
160
+ "ABSTRACT\n",
161
+ "\"\"\""
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": null,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "gpt2.generate(sess,prefix=text,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "markdown",
175
+ "metadata": {},
176
+ "source": [
177
+ "# Data Tokeniser"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "metadata": {},
184
+ "outputs": [],
185
+ "source": [
186
+ "ds = pd.read_csv('Recipe-Creator\\data\\scientific_paper_full_text_translated.csv')"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "metadata": {},
193
+ "outputs": [],
194
+ "source": [
195
+ "import codecs\n",
196
+ "with codecs.open(\"Recipe-Creator/Tokenized_data/resumen.txt\",'a','utf-8') as f:\n",
197
+ " for i in ds.index:\n",
198
+ " f.write(prefix+\"\\n\")\n",
199
+ " f.write(ds.iloc[i]['text_no_abstract'])\n",
200
+ " f.write(\"ABSTRACT\\n\")\n",
201
+ " f.write(ds.iloc[i]['abstract']+\"\\n\")\n",
202
+ " f.write(sufix)\n",
203
+ " "
204
+ ]
205
+ },
206
+ {
207
+ "cell_type": "code",
208
+ "execution_count": null,
209
+ "metadata": {},
210
+ "outputs": [],
211
+ "source": [
212
+ "import gradio as gr\n",
213
+ "\n",
214
+ "def greet(text):\n",
215
+ " return gpt2.generate(sess,prefix=str(text),truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)\n",
216
+ "\n",
217
+ "iface = gr.Interface(fn=greet, inputs=\"text\", outputs=\"text\")\n",
218
+ "iface.launch(share=True,debug=True)"
219
+ ]
220
+ }
221
+ ],
222
+ "metadata": {
223
+ "interpreter": {
224
+ "hash": "53fbdc69e3e12c371950068c144423682c30d04ec68c2bd46937202e33e0058d"
225
+ },
226
+ "kernelspec": {
227
+ "display_name": "Python 3.7.11 ('receta')",
228
+ "language": "python",
229
+ "name": "python3"
230
+ },
231
+ "language_info": {
232
+ "codemirror_mode": {
233
+ "name": "ipython",
234
+ "version": 3
235
+ },
236
+ "file_extension": ".py",
237
+ "mimetype": "text/x-python",
238
+ "name": "python",
239
+ "nbconvert_exporter": "python",
240
+ "pygments_lexer": "ipython3",
241
+ "version": "3.9.7"
242
+ },
243
+ "orig_nbformat": 4
244
+ },
245
+ "nbformat": 4,
246
+ "nbformat_minor": 2
247
+ }
AbstractGenerator.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Recipe-Creator
2
+
3
+ dependencies:
4
+ - python>=3.7
5
+ - pip>=19.0
6
+ - jupyter
7
+ - pandas
8
+ - pip:
9
+ - gpt-2-simple
10
+ - tensorflow-estimator==1.15.1
11
+ - tensorflow-gpu==1.15
12
+
13
+
14
+
AbstractGenerator/Tokenized_data/reduced_dataset_47MB.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ddebcb25fcc12a0029f83374ac9ea5e010e5dcbc58162ad6cac42360391f4f5
3
+ size 48623670
AbstractGenerator/Tokenized_data/resumen.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee5de5835a959b24bb1fba6bed5b14a6c5e7682f04483850109123d448c2853
3
+ size 62458342
AbstractGenerator/data/scientific_paper_full_text_translated.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58700a294dbdc6a78ef056f207cbee864e47b81c59e0209d0f5b622276e373f0
3
+ size 227783472
AbstractGenerator/weights/run1/checkpoint ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdce1a7de49e734203b3af24a79c6e7f92b12c50462e0a6ee9e5ce254e8c5a7
3
+ size 77
AbstractGenerator/weights/run1/counter ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7b041d37d1d693c3afe25d2af7d56ad2ee7b98eddb2cc1a055c1117e55542e
3
+ size 6
AbstractGenerator/weights/run1/encoder.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783
3
+ size 1042301
AbstractGenerator/weights/run1/events.out.tfevents.1648099088.FRANZ96521-W11 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce17a8b02dce07c39e62333105e012aad5f62825b207ccb692becbe31bdc2b2
3
+ size 42913
AbstractGenerator/weights/run1/hparams.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d56e4121c427164e0c55c6f03c08e1daf9002b9b672825112d19097b680318
3
+ size 90
AbstractGenerator/weights/run1/model-1000.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:904922cd8b9620aec00f8072edc3717b03dfb4a700b585fe88012da8af0ce67a
3
+ size 497759232
AbstractGenerator/weights/run1/model-1000.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75db8ca12b433be77ffdf420d041d20837f2698ee5ab7132773f6d25cf841637
3
+ size 5215
AbstractGenerator/weights/run1/model-1000.meta ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e2968f20985aa601ae18b0ddf2b29bb5625822a4e11e3a45c0c5406f032e7b7
3
+ size 3884257
AbstractGenerator/weights/run1/vocab.bpe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce1664773c50f3e0cc8842619a93edc4624525b728b188a9e0be33b7726adc5
3
+ size 456318
App.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system('pip install gpt-2-simple')
3
+ os.system('pip install tensorflow-estimator==1.15.1')
4
+
5
+ import gpt_2_simple as gpt2
6
+ import tensorflow as tf
7
+ import pandas as pd
8
+ import re
9
+ import gradio as gr
10
+
11
+ model_name = "124M"
12
+
13
+ path = 'AbstractGenerator/'
14
+ checkpoint_dir =path+'weights/'
15
+ data_path = path+'Tokenized_data/'
16
+
17
+
18
+ file_name = 'resumen'
19
+ file_path = data_path+file_name
20
+
21
+ prefix= '<|startoftext|>'
22
+ sufix ='<|endoftext|>'
23
+
24
+ print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")
25
+
26
+ sess = gpt2.start_tf_sess()
27
+ gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')
28
+
29
+
30
+
31
+
32
+
33
+ def greet(text):
34
+ return gpt2.generate(sess,prefix=text,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)
35
+
36
+
37
+
38
+
39
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
40
+ iface.launch(share=True)
models/124M/checkpoint ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1b025d2e155283f5e300ce95bf6d5b6bc0f7fe010db73daa6975eb896ab9cb
3
+ size 77
models/124M/encoder.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783
3
+ size 1042301
models/124M/hparams.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d56e4121c427164e0c55c6f03c08e1daf9002b9b672825112d19097b680318
3
+ size 90
models/124M/model.ckpt.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2060c885360cc0cf41d7a6dbc4d24b5127aae20260c8b5ae521b5a6578407118
3
+ size 497759232
models/124M/model.ckpt.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71916f763f9746f9b2a06b12d91996cf1084ae008d0424543d39391c5f2dc687
3
+ size 5215
models/124M/model.ckpt.meta ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4668c448fa11531fd6700460487f73e82d3272960cea942252f8744bf225c77b
3
+ size 471155
models/124M/vocab.bpe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce1664773c50f3e0cc8842619a93edc4624525b728b188a9e0be33b7726adc5
3
+ size 456318