pszemraj commited on
Commit
fc448c2
1 Parent(s): 60811bf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +115 -114
README.md CHANGED
@@ -4,149 +4,150 @@ license:
4
  - cc-by-nc-4.0
5
  datasets: pszemraj/fleece2instructions-codealpaca
6
  tags:
7
- - generated_from_trainer
8
- - instruct
9
- - instructions
10
- - code
 
11
  metrics:
12
- - rouge
13
  language:
14
- - en
15
  widget:
16
- - text: |
17
- git lfs install
18
- huggingface-cli lfs-enable-largefiles .
19
- git lfs track "*.bin"
20
- git add .
21
- git commit -a -m "add fp32 chkpt"
22
- git push
23
- example_title: bash
24
- - text: |
25
- export interface DocumentParams {
26
- pageContent: string;
27
-
28
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
29
- metadata: Record<string, any>;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
 
 
 
 
 
 
31
 
32
- /**
33
- * Interface for interacting with a document.
34
- */
35
- export class Document implements DocumentParams {
36
- pageContent: string;
37
 
38
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
39
- metadata: Record<string, any>;
40
 
41
- constructor(fields?: Partial<DocumentParams>) {
42
- this.pageContent = fields?.pageContent ?? this.pageContent;
43
- this.metadata = fields?.metadata ?? {};
44
- }
45
- }
46
- example_title: js
47
- - text: |
48
- def merge(left, right):
49
- if len(left) == 0:
50
- return right
51
-
52
- if len(right) == 0:
53
- return left
54
-
55
- result = []
56
- index_left = index_right = 0
57
-
58
- while len(result) < len(left) + len(right):
59
- if left[index_left] <= right[index_right]:
60
- result.append(left[index_left])
61
- index_left += 1
62
- else:
63
- result.append(right[index_right])
64
- index_right += 1
65
 
66
- if index_right == len(right):
67
- result += left[index_left:]
68
- break
69
 
70
- if index_left == len(left):
71
- result += right[index_right:]
72
- break
73
 
74
- return result
75
- example_title: merge
76
- - text: >
77
- import pandas as pd
78
 
79
- import plotly.graph_objects as go
80
 
81
 
82
- df =
83
- pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_apple_stock.csv')
84
 
85
 
86
- fig = go.Figure(go.Scatter(x = df['AAPL_x'], y = df['AAPL_y'],
87
- name='Share Prices (in USD)'))
88
 
89
- fig.update_layout(title='Apple Share Prices over time (2014)',
90
- plot_bgcolor='rgb(230, 230,230)',
91
- showlegend=True)
92
 
93
- fig.show()
94
- example_title: plot
95
- - text: |
96
- from spellchecker import SpellChecker
97
 
98
- spell = SpellChecker()
99
 
100
- def check_word_spelling(word: str):
101
- misspelled = spell.unknown([word])
102
- return len(misspelled) == 0
103
 
104
- def eval_and_replace(text: str, match_token: str = "- "):
105
- if match_token not in text:
106
- return text
107
- else:
108
- while True:
109
- full_before_text = text.split(match_token, maxsplit=1)[0]
110
- before_text = [
111
- char for char in full_before_text.split()[-1] if char.isalpha()
112
- ]
113
- before_text = "".join(before_text)
114
- full_after_text = text.split(match_token, maxsplit=1)[-1]
115
- after_text = [char for char in full_after_text.split()[0] if char.isalpha()]
116
- after_text = "".join(after_text)
117
- full_text = before_text + after_text
118
- if check_word_spelling(full_text):
119
- text = full_before_text + full_after_text
120
- else:
121
- text = full_before_text + " " + full_after_text
122
- if match_token not in text:
123
- break
124
- return text
125
 
126
- text = "I- am- a go- od- boy"
127
- eval_and_replace(text)
128
- example_title: spell check
129
- - text: >
130
- import torch
131
 
132
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
133
 
134
 
135
- checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
136
 
137
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
138
 
139
- model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
140
 
141
- sequences = ["I've been waiting for a HuggingFace course my whole life.",
142
- "So have I!"]
143
 
144
 
145
- tokens = tokenizer(sequences, padding=True, truncation=True,
146
- return_tensors="pt")
147
 
148
- output = model(**tokens)
149
- example_title: model inference
150
  inference:
151
  parameters:
152
  max_length: 96
@@ -202,4 +203,4 @@ The following hyperparameters were used during training:
202
  |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
203
  | 1.1165 | 1.0 | 281 | 1.1090 | 57.9239 | 31.9259 | 53.8737 | 54.9811 | 28.2924 |
204
  | 1.0763 | 2.0 | 563 | 1.0267 | 59.9605 | 34.0298 | 55.7523 | 56.8021 | 29.6966 |
205
- | 0.9595 | 2.99 | 843 | 1.0136 | 59.9513 | 33.9118 | 55.7815 | 56.9064 | 29.7146 |
 
4
  - cc-by-nc-4.0
5
  datasets: pszemraj/fleece2instructions-codealpaca
6
  tags:
7
+ - generated_from_trainer
8
+ - instruct
9
+ - instructions
10
+ - code
11
+ - instructiongen
12
  metrics:
13
+ - rouge
14
  language:
15
+ - en
16
  widget:
17
+ - text: |
18
+ git lfs install
19
+ huggingface-cli lfs-enable-largefiles .
20
+ git lfs track "*.bin"
21
+ git add .
22
+ git commit -a -m "add fp32 chkpt"
23
+ git push
24
+ example_title: bash
25
+ - text: |
26
+ export interface DocumentParams {
27
+ pageContent: string;
28
+
29
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
30
+ metadata: Record<string, any>;
31
+ }
32
+
33
+ /**
34
+ * Interface for interacting with a document.
35
+ */
36
+ export class Document implements DocumentParams {
37
+ pageContent: string;
38
+
39
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
40
+ metadata: Record<string, any>;
41
+
42
+ constructor(fields?: Partial<DocumentParams>) {
43
+ this.pageContent = fields?.pageContent ?? this.pageContent;
44
+ this.metadata = fields?.metadata ?? {};
45
  }
46
+ }
47
+ example_title: js
48
+ - text: |
49
+ def merge(left, right):
50
+ if len(left) == 0:
51
+ return right
52
 
53
+ if len(right) == 0:
54
+ return left
 
 
 
55
 
56
+ result = []
57
+ index_left = index_right = 0
58
 
59
+ while len(result) < len(left) + len(right):
60
+ if left[index_left] <= right[index_right]:
61
+ result.append(left[index_left])
62
+ index_left += 1
63
+ else:
64
+ result.append(right[index_right])
65
+ index_right += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ if index_right == len(right):
68
+ result += left[index_left:]
69
+ break
70
 
71
+ if index_left == len(left):
72
+ result += right[index_right:]
73
+ break
74
 
75
+ return result
76
+ example_title: merge
77
+ - text: >
78
+ import pandas as pd
79
 
80
+ import plotly.graph_objects as go
81
 
82
 
83
+ df =
84
+ pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_apple_stock.csv')
85
 
86
 
87
+ fig = go.Figure(go.Scatter(x = df['AAPL_x'], y = df['AAPL_y'],
88
+ name='Share Prices (in USD)'))
89
 
90
+ fig.update_layout(title='Apple Share Prices over time (2014)',
91
+ plot_bgcolor='rgb(230, 230,230)',
92
+ showlegend=True)
93
 
94
+ fig.show()
95
+ example_title: plot
96
+ - text: |
97
+ from spellchecker import SpellChecker
98
 
99
+ spell = SpellChecker()
100
 
101
+ def check_word_spelling(word: str):
102
+ misspelled = spell.unknown([word])
103
+ return len(misspelled) == 0
104
 
105
+ def eval_and_replace(text: str, match_token: str = "- "):
106
+ if match_token not in text:
107
+ return text
108
+ else:
109
+ while True:
110
+ full_before_text = text.split(match_token, maxsplit=1)[0]
111
+ before_text = [
112
+ char for char in full_before_text.split()[-1] if char.isalpha()
113
+ ]
114
+ before_text = "".join(before_text)
115
+ full_after_text = text.split(match_token, maxsplit=1)[-1]
116
+ after_text = [char for char in full_after_text.split()[0] if char.isalpha()]
117
+ after_text = "".join(after_text)
118
+ full_text = before_text + after_text
119
+ if check_word_spelling(full_text):
120
+ text = full_before_text + full_after_text
121
+ else:
122
+ text = full_before_text + " " + full_after_text
123
+ if match_token not in text:
124
+ break
125
+ return text
126
 
127
+ text = "I- am- a go- od- boy"
128
+ eval_and_replace(text)
129
+ example_title: spell check
130
+ - text: >
131
+ import torch
132
 
133
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
134
 
135
 
136
+ checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
137
 
138
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
139
 
140
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
141
 
142
+ sequences = ["I've been waiting for a HuggingFace course my whole life.",
143
+ "So have I!"]
144
 
145
 
146
+ tokens = tokenizer(sequences, padding=True, truncation=True,
147
+ return_tensors="pt")
148
 
149
+ output = model(**tokens)
150
+ example_title: model inference
151
  inference:
152
  parameters:
153
  max_length: 96
 
203
  |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
204
  | 1.1165 | 1.0 | 281 | 1.1090 | 57.9239 | 31.9259 | 53.8737 | 54.9811 | 28.2924 |
205
  | 1.0763 | 2.0 | 563 | 1.0267 | 59.9605 | 34.0298 | 55.7523 | 56.8021 | 29.6966 |
206
+ | 0.9595 | 2.99 | 843 | 1.0136 | 59.9513 | 33.9118 | 55.7815 | 56.9064 | 29.7146 |