Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
machineuser
commited on
Commit
·
a6b2d88
1
Parent(s):
27a1c11
Sync widgets demo
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- packages/tasks/scripts/inference-codegen.ts +1 -1
- packages/tasks/src/tasks/audio-classification/inference.ts +3 -3
- packages/tasks/src/tasks/audio-classification/spec/input.json +2 -2
- packages/tasks/src/tasks/audio-classification/spec/output.json +1 -12
- packages/tasks/src/tasks/automatic-speech-recognition/inference.ts +34 -29
- packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json +1 -1
- packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json +30 -28
- packages/tasks/src/tasks/common-definitions.json +25 -17
- packages/tasks/src/tasks/depth-estimation/inference.ts +1 -1
- packages/tasks/src/tasks/document-question-answering/inference.ts +15 -7
- packages/tasks/src/tasks/document-question-answering/spec/input.json +7 -7
- packages/tasks/src/tasks/document-question-answering/spec/output.json +2 -2
- packages/tasks/src/tasks/fill-mask/inference.ts +3 -2
- packages/tasks/src/tasks/fill-mask/spec/input.json +1 -1
- packages/tasks/src/tasks/fill-mask/spec/output.json +1 -1
- packages/tasks/src/tasks/image-classification/inference.ts +2 -2
- packages/tasks/src/tasks/image-classification/spec/input.json +2 -2
- packages/tasks/src/tasks/image-segmentation/inference.ts +2 -2
- packages/tasks/src/tasks/image-segmentation/spec/input.json +2 -2
- packages/tasks/src/tasks/image-to-image/inference.ts +4 -4
- packages/tasks/src/tasks/image-to-image/spec/input.json +4 -4
- packages/tasks/src/tasks/image-to-text/inference.ts +24 -19
- packages/tasks/src/tasks/image-to-text/spec/input.json +1 -1
- packages/tasks/src/tasks/image-to-text/spec/output.json +8 -11
- packages/tasks/src/tasks/placeholder/spec/input.json +2 -2
- packages/tasks/src/tasks/placeholder/spec/output.json +1 -1
- packages/tasks/src/tasks/question-answering/inference.ts +7 -7
- packages/tasks/src/tasks/question-answering/spec/input.json +7 -7
- packages/tasks/src/tasks/summarization/inference.ts +4 -3
- packages/tasks/src/tasks/text-classification/inference.ts +2 -2
- packages/tasks/src/tasks/text-classification/spec/input.json +2 -2
- packages/tasks/src/tasks/text-generation/inference.ts +14 -11
- packages/tasks/src/tasks/text-generation/spec/input.json +8 -8
- packages/tasks/src/tasks/text-generation/spec/output.json +8 -11
- packages/tasks/src/tasks/text-to-audio/inference.ts +23 -18
- packages/tasks/src/tasks/text-to-audio/spec/output.json +10 -13
- packages/tasks/src/tasks/text-to-image/inference.ts +5 -7
- packages/tasks/src/tasks/text-to-image/spec/input.json +4 -4
- packages/tasks/src/tasks/text-to-image/spec/output.json +7 -9
- packages/tasks/src/tasks/text-to-speech/inference.ts +17 -16
- packages/tasks/src/tasks/text2text-generation/inference.ts +9 -7
- packages/tasks/src/tasks/text2text-generation/spec/input.json +2 -2
- packages/tasks/src/tasks/text2text-generation/spec/output.json +8 -11
- packages/tasks/src/tasks/token-classification/inference.ts +3 -3
- packages/tasks/src/tasks/token-classification/spec/input.json +2 -2
- packages/tasks/src/tasks/token-classification/spec/output.json +1 -1
- packages/tasks/src/tasks/translation/inference.ts +4 -3
- packages/tasks/src/tasks/video-classification/inference.ts +4 -4
- packages/tasks/src/tasks/video-classification/spec/input.json +4 -4
- packages/tasks/src/tasks/visual-question-answering/inference.ts +1 -1
packages/tasks/scripts/inference-codegen.ts
CHANGED
@@ -57,7 +57,7 @@ async function generateTypescript(inputData: InputData): Promise<SerializedRende
|
|
57 |
indentation: "\t",
|
58 |
rendererOptions: {
|
59 |
"just-types": true,
|
60 |
-
"nice-property-names":
|
61 |
"prefer-unions": true,
|
62 |
"prefer-const-values": true,
|
63 |
"prefer-unknown": true,
|
|
|
57 |
indentation: "\t",
|
58 |
rendererOptions: {
|
59 |
"just-types": true,
|
60 |
+
"nice-property-names": false,
|
61 |
"prefer-unions": true,
|
62 |
"prefer-const-values": true,
|
63 |
"prefer-unknown": true,
|
packages/tasks/src/tasks/audio-classification/inference.ts
CHANGED
@@ -23,11 +23,11 @@ export interface AudioClassificationInput {
|
|
23 |
* Additional inference parameters for Audio Classification
|
24 |
*/
|
25 |
export interface AudioClassificationParameters {
|
26 |
-
|
27 |
/**
|
28 |
* When specified, limits the output to the top K most probable classes.
|
29 |
*/
|
30 |
-
|
31 |
[property: string]: unknown;
|
32 |
}
|
33 |
/**
|
@@ -40,7 +40,7 @@ export type AudioClassificationOutput = AudioClassificationOutputElement[];
|
|
40 |
*/
|
41 |
export interface AudioClassificationOutputElement {
|
42 |
/**
|
43 |
-
* The predicted class label
|
44 |
*/
|
45 |
label: string;
|
46 |
/**
|
|
|
23 |
* Additional inference parameters for Audio Classification
|
24 |
*/
|
25 |
export interface AudioClassificationParameters {
|
26 |
+
function_to_apply?: ClassificationOutputTransform;
|
27 |
/**
|
28 |
* When specified, limits the output to the top K most probable classes.
|
29 |
*/
|
30 |
+
top_k?: number;
|
31 |
[property: string]: unknown;
|
32 |
}
|
33 |
/**
|
|
|
40 |
*/
|
41 |
export interface AudioClassificationOutputElement {
|
42 |
/**
|
43 |
+
* The predicted class label.
|
44 |
*/
|
45 |
label: string;
|
46 |
/**
|
packages/tasks/src/tasks/audio-classification/spec/input.json
CHANGED
@@ -19,11 +19,11 @@
|
|
19 |
"description": "Additional inference parameters for Audio Classification",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
-
"
|
23 |
"title": "AudioClassificationOutputTransform",
|
24 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
25 |
},
|
26 |
-
"
|
27 |
"type": "integer",
|
28 |
"description": "When specified, limits the output to the top K most probable classes."
|
29 |
}
|
|
|
19 |
"description": "Additional inference parameters for Audio Classification",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
+
"function_to_apply": {
|
23 |
"title": "AudioClassificationOutputTransform",
|
24 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
25 |
},
|
26 |
+
"top_k": {
|
27 |
"type": "integer",
|
28 |
"description": "When specified, limits the output to the top K most probable classes."
|
29 |
}
|
packages/tasks/src/tasks/audio-classification/spec/output.json
CHANGED
@@ -5,17 +5,6 @@
|
|
5 |
"description": "Outputs for Audio Classification inference",
|
6 |
"type": "array",
|
7 |
"items": {
|
8 |
-
"
|
9 |
-
"properties": {
|
10 |
-
"label": {
|
11 |
-
"type": "string",
|
12 |
-
"description": "The predicted class label (model specific)."
|
13 |
-
},
|
14 |
-
"score": {
|
15 |
-
"type": "number",
|
16 |
-
"description": "The corresponding probability."
|
17 |
-
}
|
18 |
-
},
|
19 |
-
"required": ["label", "score"]
|
20 |
}
|
21 |
}
|
|
|
5 |
"description": "Outputs for Audio Classification inference",
|
6 |
"type": "array",
|
7 |
"items": {
|
8 |
+
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
}
|
10 |
}
|
packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
|
|
6 |
/**
|
7 |
* Inputs for Automatic Speech Recognition inference
|
8 |
*/
|
@@ -17,6 +18,7 @@ export interface AutomaticSpeechRecognitionInput {
|
|
17 |
parameters?: AutomaticSpeechRecognitionParameters;
|
18 |
[property: string]: unknown;
|
19 |
}
|
|
|
20 |
/**
|
21 |
* Additional inference parameters
|
22 |
*
|
@@ -30,9 +32,10 @@ export interface AutomaticSpeechRecognitionParameters {
|
|
30 |
/**
|
31 |
* Whether to output corresponding timestamps with the generated text
|
32 |
*/
|
33 |
-
|
34 |
[property: string]: unknown;
|
35 |
}
|
|
|
36 |
/**
|
37 |
* Parametrization of the text generation process
|
38 |
*
|
@@ -42,18 +45,18 @@ export interface GenerationParameters {
|
|
42 |
/**
|
43 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
44 |
*/
|
45 |
-
|
46 |
/**
|
47 |
* Controls the stopping condition for beam-based methods.
|
48 |
*/
|
49 |
-
|
50 |
/**
|
51 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
52 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
53 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
54 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
55 |
*/
|
56 |
-
|
57 |
/**
|
58 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
59 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
@@ -63,37 +66,37 @@ export interface GenerationParameters {
|
|
63 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
64 |
* for more details.
|
65 |
*/
|
66 |
-
|
67 |
/**
|
68 |
* The maximum length (in tokens) of the generated text, including the input.
|
69 |
*/
|
70 |
-
|
71 |
/**
|
72 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
73 |
*/
|
74 |
-
|
75 |
/**
|
76 |
* The minimum length (in tokens) of the generated text, including the input.
|
77 |
*/
|
78 |
-
|
79 |
/**
|
80 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
81 |
*/
|
82 |
-
|
83 |
/**
|
84 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
85 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
86 |
*/
|
87 |
-
|
88 |
/**
|
89 |
* Number of beams to use for beam search.
|
90 |
*/
|
91 |
-
|
92 |
/**
|
93 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
94 |
* search decoding.
|
95 |
*/
|
96 |
-
|
97 |
/**
|
98 |
* The value used to modulate the next token probabilities.
|
99 |
*/
|
@@ -101,12 +104,12 @@ export interface GenerationParameters {
|
|
101 |
/**
|
102 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
103 |
*/
|
104 |
-
|
105 |
/**
|
106 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
107 |
* that add up to top_p or higher are kept for generation.
|
108 |
*/
|
109 |
-
|
110 |
/**
|
111 |
* Local typicality measures how similar the conditional probability of predicting a target
|
112 |
* token next is to the expected conditional probability of predicting a random token next,
|
@@ -114,33 +117,23 @@ export interface GenerationParameters {
|
|
114 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
115 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
116 |
*/
|
117 |
-
|
118 |
/**
|
119 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
120 |
*/
|
121 |
-
|
122 |
[property: string]: unknown;
|
123 |
}
|
|
|
124 |
/**
|
125 |
* Controls the stopping condition for beam-based methods.
|
126 |
*/
|
127 |
export type EarlyStoppingUnion = boolean | "never";
|
128 |
-
|
129 |
-
/**
|
130 |
-
* A chunk of text identified by the model
|
131 |
-
*/
|
132 |
-
text: string;
|
133 |
-
/**
|
134 |
-
* The start and end timestamps corresponding with the text
|
135 |
-
*/
|
136 |
-
timestamps: number[];
|
137 |
-
[property: string]: unknown;
|
138 |
-
}
|
139 |
-
export type AutomaticSpeechRecognitionOutput = AutomaticSpeechRecognitionOutputElement[];
|
140 |
/**
|
141 |
* Outputs of inference for the Automatic Speech Recognition task
|
142 |
*/
|
143 |
-
export interface
|
144 |
/**
|
145 |
* When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
|
146 |
* the model.
|
@@ -152,3 +145,15 @@ export interface AutomaticSpeechRecognitionOutputElement {
|
|
152 |
text: string;
|
153 |
[property: string]: unknown;
|
154 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
6 |
+
|
7 |
/**
|
8 |
* Inputs for Automatic Speech Recognition inference
|
9 |
*/
|
|
|
18 |
parameters?: AutomaticSpeechRecognitionParameters;
|
19 |
[property: string]: unknown;
|
20 |
}
|
21 |
+
|
22 |
/**
|
23 |
* Additional inference parameters
|
24 |
*
|
|
|
32 |
/**
|
33 |
* Whether to output corresponding timestamps with the generated text
|
34 |
*/
|
35 |
+
return_timestamps?: boolean;
|
36 |
[property: string]: unknown;
|
37 |
}
|
38 |
+
|
39 |
/**
|
40 |
* Parametrization of the text generation process
|
41 |
*
|
|
|
45 |
/**
|
46 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
47 |
*/
|
48 |
+
do_sample?: boolean;
|
49 |
/**
|
50 |
* Controls the stopping condition for beam-based methods.
|
51 |
*/
|
52 |
+
early_stopping?: EarlyStoppingUnion;
|
53 |
/**
|
54 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
55 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
56 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
57 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
58 |
*/
|
59 |
+
epsilon_cutoff?: number;
|
60 |
/**
|
61 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
62 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
|
|
66 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
67 |
* for more details.
|
68 |
*/
|
69 |
+
eta_cutoff?: number;
|
70 |
/**
|
71 |
* The maximum length (in tokens) of the generated text, including the input.
|
72 |
*/
|
73 |
+
max_length?: number;
|
74 |
/**
|
75 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
76 |
*/
|
77 |
+
max_new_tokens?: number;
|
78 |
/**
|
79 |
* The minimum length (in tokens) of the generated text, including the input.
|
80 |
*/
|
81 |
+
min_length?: number;
|
82 |
/**
|
83 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
84 |
*/
|
85 |
+
min_new_tokens?: number;
|
86 |
/**
|
87 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
88 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
89 |
*/
|
90 |
+
num_beam_groups?: number;
|
91 |
/**
|
92 |
* Number of beams to use for beam search.
|
93 |
*/
|
94 |
+
num_beams?: number;
|
95 |
/**
|
96 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
97 |
* search decoding.
|
98 |
*/
|
99 |
+
penalty_alpha?: number;
|
100 |
/**
|
101 |
* The value used to modulate the next token probabilities.
|
102 |
*/
|
|
|
104 |
/**
|
105 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
106 |
*/
|
107 |
+
top_k?: number;
|
108 |
/**
|
109 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
110 |
* that add up to top_p or higher are kept for generation.
|
111 |
*/
|
112 |
+
top_p?: number;
|
113 |
/**
|
114 |
* Local typicality measures how similar the conditional probability of predicting a target
|
115 |
* token next is to the expected conditional probability of predicting a random token next,
|
|
|
117 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
118 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
119 |
*/
|
120 |
+
typical_p?: number;
|
121 |
/**
|
122 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
123 |
*/
|
124 |
+
use_cache?: boolean;
|
125 |
[property: string]: unknown;
|
126 |
}
|
127 |
+
|
128 |
/**
|
129 |
* Controls the stopping condition for beam-based methods.
|
130 |
*/
|
131 |
export type EarlyStoppingUnion = boolean | "never";
|
132 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
/**
|
134 |
* Outputs of inference for the Automatic Speech Recognition task
|
135 |
*/
|
136 |
+
export interface AutomaticSpeechRecognitionOutput {
|
137 |
/**
|
138 |
* When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
|
139 |
* the model.
|
|
|
145 |
text: string;
|
146 |
[property: string]: unknown;
|
147 |
}
|
148 |
+
|
149 |
+
export interface AutomaticSpeechRecognitionOutputChunk {
|
150 |
+
/**
|
151 |
+
* A chunk of text identified by the model
|
152 |
+
*/
|
153 |
+
text: string;
|
154 |
+
/**
|
155 |
+
* The start and end timestamps corresponding with the text
|
156 |
+
*/
|
157 |
+
timestamps: number[];
|
158 |
+
[property: string]: unknown;
|
159 |
+
}
|
packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"description": "Additional inference parameters for Automatic Speech Recognition",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
-
"
|
23 |
"type": "boolean",
|
24 |
"description": "Whether to output corresponding timestamps with the generated text"
|
25 |
},
|
|
|
19 |
"description": "Additional inference parameters for Automatic Speech Recognition",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
+
"return_timestamps": {
|
23 |
"type": "boolean",
|
24 |
"description": "Whether to output corresponding timestamps with the generated text"
|
25 |
},
|
packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
CHANGED
@@ -3,34 +3,36 @@
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Automatic Speech Recognition task",
|
5 |
"title": "AutomaticSpeechRecognitionOutput",
|
6 |
-
"type": "
|
7 |
-
"
|
8 |
-
"
|
9 |
-
|
10 |
-
"
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
"
|
15 |
-
|
16 |
-
"
|
17 |
-
"
|
18 |
-
|
19 |
-
"
|
20 |
-
|
21 |
-
"
|
22 |
-
"timestamps": {
|
23 |
-
"type": "array",
|
24 |
-
"description": "The start and end timestamps corresponding with the text",
|
25 |
-
"items": { "type": "number" },
|
26 |
-
"minLength": 2,
|
27 |
-
"maxLength": 2
|
28 |
-
}
|
29 |
},
|
30 |
-
"
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
-
}
|
34 |
-
|
35 |
-
|
36 |
}
|
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Automatic Speech Recognition task",
|
5 |
"title": "AutomaticSpeechRecognitionOutput",
|
6 |
+
"type": "object",
|
7 |
+
"properties": {
|
8 |
+
"text": {
|
9 |
+
"type": "string",
|
10 |
+
"description": "The recognized text."
|
11 |
+
},
|
12 |
+
"chunks": {
|
13 |
+
"type": "array",
|
14 |
+
"description": "When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model.",
|
15 |
+
"items": {
|
16 |
+
"type": "object",
|
17 |
+
"title": "AutomaticSpeechRecognitionOutputChunk",
|
18 |
+
"properties": {
|
19 |
+
"text": {
|
20 |
+
"type": "string",
|
21 |
+
"description": "A chunk of text identified by the model"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
},
|
23 |
+
"timestamps": {
|
24 |
+
"type": "array",
|
25 |
+
"description": "The start and end timestamps corresponding with the text",
|
26 |
+
"items": {
|
27 |
+
"type": "number"
|
28 |
+
},
|
29 |
+
"minLength": 2,
|
30 |
+
"maxLength": 2
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"required": ["text", "timestamps"]
|
34 |
}
|
35 |
+
}
|
36 |
+
},
|
37 |
+
"required": ["text"]
|
38 |
}
|
packages/tasks/src/tasks/common-definitions.json
CHANGED
@@ -43,63 +43,71 @@
|
|
43 |
"type": "number",
|
44 |
"description": "The value used to modulate the next token probabilities."
|
45 |
},
|
46 |
-
"
|
47 |
"type": "integer",
|
48 |
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
|
49 |
},
|
50 |
-
"
|
51 |
"type": "number",
|
52 |
"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
|
53 |
},
|
54 |
-
"
|
55 |
"type": "number",
|
56 |
"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
|
57 |
},
|
58 |
-
"
|
59 |
"type": "number",
|
60 |
"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
|
61 |
},
|
62 |
-
"
|
63 |
"type": "number",
|
64 |
"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
|
65 |
},
|
66 |
-
"
|
67 |
"type": "integer",
|
68 |
"description": "The maximum length (in tokens) of the generated text, including the input."
|
69 |
},
|
70 |
-
"
|
71 |
"type": "integer",
|
72 |
"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
|
73 |
},
|
74 |
-
"
|
75 |
"type": "integer",
|
76 |
"description": "The minimum length (in tokens) of the generated text, including the input."
|
77 |
},
|
78 |
-
"
|
79 |
"type": "integer",
|
80 |
"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
|
81 |
},
|
82 |
-
"
|
83 |
"type": "boolean",
|
84 |
"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
|
85 |
},
|
86 |
-
"
|
87 |
"description": "Controls the stopping condition for beam-based methods.",
|
88 |
-
"oneOf": [
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
"type": "integer",
|
92 |
"description": "Number of beams to use for beam search."
|
93 |
},
|
94 |
-
"
|
95 |
"type": "integer",
|
96 |
"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
|
97 |
},
|
98 |
-
"
|
99 |
"type": "number",
|
100 |
"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
|
101 |
},
|
102 |
-
"
|
103 |
"type": "boolean",
|
104 |
"description": "Whether the model should use the past last key/values attentions to speed up decoding"
|
105 |
}
|
|
|
43 |
"type": "number",
|
44 |
"description": "The value used to modulate the next token probabilities."
|
45 |
},
|
46 |
+
"top_k": {
|
47 |
"type": "integer",
|
48 |
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
|
49 |
},
|
50 |
+
"top_p": {
|
51 |
"type": "number",
|
52 |
"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
|
53 |
},
|
54 |
+
"typical_p": {
|
55 |
"type": "number",
|
56 |
"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
|
57 |
},
|
58 |
+
"epsilon_cutoff": {
|
59 |
"type": "number",
|
60 |
"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
|
61 |
},
|
62 |
+
"eta_cutoff": {
|
63 |
"type": "number",
|
64 |
"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
|
65 |
},
|
66 |
+
"max_length": {
|
67 |
"type": "integer",
|
68 |
"description": "The maximum length (in tokens) of the generated text, including the input."
|
69 |
},
|
70 |
+
"max_new_tokens": {
|
71 |
"type": "integer",
|
72 |
"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
|
73 |
},
|
74 |
+
"min_length": {
|
75 |
"type": "integer",
|
76 |
"description": "The minimum length (in tokens) of the generated text, including the input."
|
77 |
},
|
78 |
+
"min_new_tokens": {
|
79 |
"type": "integer",
|
80 |
"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
|
81 |
},
|
82 |
+
"do_sample": {
|
83 |
"type": "boolean",
|
84 |
"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
|
85 |
},
|
86 |
+
"early_stopping": {
|
87 |
"description": "Controls the stopping condition for beam-based methods.",
|
88 |
+
"oneOf": [
|
89 |
+
{
|
90 |
+
"type": "boolean"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"const": "never",
|
94 |
+
"type": "string"
|
95 |
+
}
|
96 |
+
]
|
97 |
+
},
|
98 |
+
"num_beams": {
|
99 |
"type": "integer",
|
100 |
"description": "Number of beams to use for beam search."
|
101 |
},
|
102 |
+
"num_beam_groups": {
|
103 |
"type": "integer",
|
104 |
"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
|
105 |
},
|
106 |
+
"penalty_alpha": {
|
107 |
"type": "number",
|
108 |
"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
|
109 |
},
|
110 |
+
"use_cache": {
|
111 |
"type": "boolean",
|
112 |
"description": "Whether the model should use the past last key/values attentions to speed up decoding"
|
113 |
}
|
packages/tasks/src/tasks/depth-estimation/inference.ts
CHANGED
@@ -30,6 +30,6 @@ export interface DepthEstimationOutput {
|
|
30 |
/**
|
31 |
* The predicted depth as a tensor
|
32 |
*/
|
33 |
-
|
34 |
[property: string]: unknown;
|
35 |
}
|
|
|
30 |
/**
|
31 |
* The predicted depth as a tensor
|
32 |
*/
|
33 |
+
predicted_depth?: unknown;
|
34 |
[property: string]: unknown;
|
35 |
}
|
packages/tasks/src/tasks/document-question-answering/inference.ts
CHANGED
@@ -42,11 +42,11 @@ export interface DocumentQuestionAnsweringParameters {
|
|
42 |
* be split in several chunks with some overlap. This argument controls the size of that
|
43 |
* overlap.
|
44 |
*/
|
45 |
-
|
46 |
/**
|
47 |
* Whether to accept impossible as an answer
|
48 |
*/
|
49 |
-
|
50 |
/**
|
51 |
* Language to use while running OCR. Defaults to english.
|
52 |
*/
|
@@ -55,27 +55,27 @@ export interface DocumentQuestionAnsweringParameters {
|
|
55 |
* The maximum length of predicted answers (e.g., only answers with a shorter length are
|
56 |
* considered).
|
57 |
*/
|
58 |
-
|
59 |
/**
|
60 |
* The maximum length of the question after tokenization. It will be truncated if needed.
|
61 |
*/
|
62 |
-
|
63 |
/**
|
64 |
* The maximum length of the total sentence (context + question) in tokens of each chunk
|
65 |
* passed to the model. The context will be split in several chunks (using doc_stride as
|
66 |
* overlap) if needed.
|
67 |
*/
|
68 |
-
|
69 |
/**
|
70 |
* The number of answers to return (will be chosen by order of likelihood). Can return less
|
71 |
* than top_k answers if there are not enough options available within the context.
|
72 |
*/
|
73 |
-
|
74 |
/**
|
75 |
* A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
|
76 |
* skip the OCR step and use the provided bounding boxes instead.
|
77 |
*/
|
78 |
-
|
79 |
[property: string]: unknown;
|
80 |
}
|
81 |
export type WordBox = number[] | string;
|
@@ -88,11 +88,19 @@ export interface DocumentQuestionAnsweringOutputElement {
|
|
88 |
* The answer to the question.
|
89 |
*/
|
90 |
answer: string;
|
|
|
|
|
|
|
|
|
91 |
end: number;
|
92 |
/**
|
93 |
* The probability associated to the answer.
|
94 |
*/
|
95 |
score: number;
|
|
|
|
|
|
|
|
|
96 |
start: number;
|
97 |
/**
|
98 |
* The index of each word/box pair that is in the answer
|
|
|
42 |
* be split in several chunks with some overlap. This argument controls the size of that
|
43 |
* overlap.
|
44 |
*/
|
45 |
+
doc_stride?: number;
|
46 |
/**
|
47 |
* Whether to accept impossible as an answer
|
48 |
*/
|
49 |
+
handle_impossible_answer?: boolean;
|
50 |
/**
|
51 |
* Language to use while running OCR. Defaults to english.
|
52 |
*/
|
|
|
55 |
* The maximum length of predicted answers (e.g., only answers with a shorter length are
|
56 |
* considered).
|
57 |
*/
|
58 |
+
max_answer_len?: number;
|
59 |
/**
|
60 |
* The maximum length of the question after tokenization. It will be truncated if needed.
|
61 |
*/
|
62 |
+
max_question_len?: number;
|
63 |
/**
|
64 |
* The maximum length of the total sentence (context + question) in tokens of each chunk
|
65 |
* passed to the model. The context will be split in several chunks (using doc_stride as
|
66 |
* overlap) if needed.
|
67 |
*/
|
68 |
+
max_seq_len?: number;
|
69 |
/**
|
70 |
* The number of answers to return (will be chosen by order of likelihood). Can return less
|
71 |
* than top_k answers if there are not enough options available within the context.
|
72 |
*/
|
73 |
+
top_k?: number;
|
74 |
/**
|
75 |
* A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
|
76 |
* skip the OCR step and use the provided bounding boxes instead.
|
77 |
*/
|
78 |
+
word_boxes?: WordBox[];
|
79 |
[property: string]: unknown;
|
80 |
}
|
81 |
export type WordBox = number[] | string;
|
|
|
88 |
* The answer to the question.
|
89 |
*/
|
90 |
answer: string;
|
91 |
+
/**
|
92 |
+
* The end word index of the answer (in the OCR’d version of the input or provided word
|
93 |
+
* boxes).
|
94 |
+
*/
|
95 |
end: number;
|
96 |
/**
|
97 |
* The probability associated to the answer.
|
98 |
*/
|
99 |
score: number;
|
100 |
+
/**
|
101 |
+
* The start word index of the answer (in the OCR’d version of the input or provided word
|
102 |
+
* boxes).
|
103 |
+
*/
|
104 |
start: number;
|
105 |
/**
|
106 |
* The index of each word/box pair that is in the answer
|
packages/tasks/src/tasks/document-question-answering/spec/input.json
CHANGED
@@ -31,11 +31,11 @@
|
|
31 |
"description": "Additional inference parameters for Document Question Answering",
|
32 |
"type": "object",
|
33 |
"properties": {
|
34 |
-
"
|
35 |
"type": "integer",
|
36 |
"description": "If the words in the document are too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
|
37 |
},
|
38 |
-
"
|
39 |
"type": "boolean",
|
40 |
"description": "Whether to accept impossible as an answer"
|
41 |
},
|
@@ -43,23 +43,23 @@
|
|
43 |
"type": "string",
|
44 |
"description": "Language to use while running OCR. Defaults to english."
|
45 |
},
|
46 |
-
"
|
47 |
"type": "integer",
|
48 |
"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
|
49 |
},
|
50 |
-
"
|
51 |
"type": "integer",
|
52 |
"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using doc_stride as overlap) if needed."
|
53 |
},
|
54 |
-
"
|
55 |
"type": "integer",
|
56 |
"description": "The maximum length of the question after tokenization. It will be truncated if needed."
|
57 |
},
|
58 |
-
"
|
59 |
"type": "integer",
|
60 |
"description": "The number of answers to return (will be chosen by order of likelihood). Can return less than top_k answers if there are not enough options available within the context."
|
61 |
},
|
62 |
-
"
|
63 |
"type": "array",
|
64 |
"description": "A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR step and use the provided bounding boxes instead.",
|
65 |
"items": {
|
|
|
31 |
"description": "Additional inference parameters for Document Question Answering",
|
32 |
"type": "object",
|
33 |
"properties": {
|
34 |
+
"doc_stride": {
|
35 |
"type": "integer",
|
36 |
"description": "If the words in the document are too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
|
37 |
},
|
38 |
+
"handle_impossible_answer": {
|
39 |
"type": "boolean",
|
40 |
"description": "Whether to accept impossible as an answer"
|
41 |
},
|
|
|
43 |
"type": "string",
|
44 |
"description": "Language to use while running OCR. Defaults to english."
|
45 |
},
|
46 |
+
"max_answer_len": {
|
47 |
"type": "integer",
|
48 |
"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
|
49 |
},
|
50 |
+
"max_seq_len": {
|
51 |
"type": "integer",
|
52 |
"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using doc_stride as overlap) if needed."
|
53 |
},
|
54 |
+
"max_question_len": {
|
55 |
"type": "integer",
|
56 |
"description": "The maximum length of the question after tokenization. It will be truncated if needed."
|
57 |
},
|
58 |
+
"top_k": {
|
59 |
"type": "integer",
|
60 |
"description": "The number of answers to return (will be chosen by order of likelihood). Can return less than top_k answers if there are not enough options available within the context."
|
61 |
},
|
62 |
+
"word_boxes": {
|
63 |
"type": "array",
|
64 |
"description": "A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR step and use the provided bounding boxes instead.",
|
65 |
"items": {
|
packages/tasks/src/tasks/document-question-answering/spec/output.json
CHANGED
@@ -17,11 +17,11 @@
|
|
17 |
},
|
18 |
"start": {
|
19 |
"type": "integer",
|
20 |
-
"
|
21 |
},
|
22 |
"end": {
|
23 |
"type": "integer",
|
24 |
-
"
|
25 |
},
|
26 |
"words": {
|
27 |
"type": "array",
|
|
|
17 |
},
|
18 |
"start": {
|
19 |
"type": "integer",
|
20 |
+
"description": "The start word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
|
21 |
},
|
22 |
"end": {
|
23 |
"type": "integer",
|
24 |
+
"description": "The end word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
|
25 |
},
|
26 |
"words": {
|
27 |
"type": "array",
|
packages/tasks/src/tasks/fill-mask/inference.ts
CHANGED
@@ -33,7 +33,7 @@ export interface FillMaskParameters {
|
|
33 |
/**
|
34 |
* When passed, overrides the number of predictions to return.
|
35 |
*/
|
36 |
-
|
37 |
[property: string]: unknown;
|
38 |
}
|
39 |
export type FillMaskOutput = FillMaskOutputElement[];
|
@@ -53,9 +53,10 @@ export interface FillMaskOutputElement {
|
|
53 |
* The predicted token id (to replace the masked one).
|
54 |
*/
|
55 |
token: number;
|
|
|
56 |
/**
|
57 |
* The predicted token (to replace the masked one).
|
58 |
*/
|
59 |
-
|
60 |
[property: string]: unknown;
|
61 |
}
|
|
|
33 |
/**
|
34 |
* When passed, overrides the number of predictions to return.
|
35 |
*/
|
36 |
+
top_k?: number;
|
37 |
[property: string]: unknown;
|
38 |
}
|
39 |
export type FillMaskOutput = FillMaskOutputElement[];
|
|
|
53 |
* The predicted token id (to replace the masked one).
|
54 |
*/
|
55 |
token: number;
|
56 |
+
tokenStr: unknown;
|
57 |
/**
|
58 |
* The predicted token (to replace the masked one).
|
59 |
*/
|
60 |
+
token_str?: string;
|
61 |
[property: string]: unknown;
|
62 |
}
|
packages/tasks/src/tasks/fill-mask/spec/input.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"description": "Additional inference parameters for Fill Mask",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
-
"
|
24 |
"type": "integer",
|
25 |
"description": "When passed, overrides the number of predictions to return."
|
26 |
},
|
|
|
20 |
"description": "Additional inference parameters for Fill Mask",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
+
"top_k": {
|
24 |
"type": "integer",
|
25 |
"description": "When passed, overrides the number of predictions to return."
|
26 |
},
|
packages/tasks/src/tasks/fill-mask/spec/output.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"type": "integer",
|
20 |
"description": "The predicted token id (to replace the masked one)."
|
21 |
},
|
22 |
-
"
|
23 |
"type": "string",
|
24 |
"description": "The predicted token (to replace the masked one)."
|
25 |
}
|
|
|
19 |
"type": "integer",
|
20 |
"description": "The predicted token id (to replace the masked one)."
|
21 |
},
|
22 |
+
"token_str": {
|
23 |
"type": "string",
|
24 |
"description": "The predicted token (to replace the masked one)."
|
25 |
}
|
packages/tasks/src/tasks/image-classification/inference.ts
CHANGED
@@ -23,11 +23,11 @@ export interface ImageClassificationInput {
|
|
23 |
* Additional inference parameters for Image Classification
|
24 |
*/
|
25 |
export interface ImageClassificationParameters {
|
26 |
-
|
27 |
/**
|
28 |
* When specified, limits the output to the top K most probable classes.
|
29 |
*/
|
30 |
-
|
31 |
[property: string]: unknown;
|
32 |
}
|
33 |
/**
|
|
|
23 |
* Additional inference parameters for Image Classification
|
24 |
*/
|
25 |
export interface ImageClassificationParameters {
|
26 |
+
function_to_apply?: ClassificationOutputTransform;
|
27 |
/**
|
28 |
* When specified, limits the output to the top K most probable classes.
|
29 |
*/
|
30 |
+
top_k?: number;
|
31 |
[property: string]: unknown;
|
32 |
}
|
33 |
/**
|
packages/tasks/src/tasks/image-classification/spec/input.json
CHANGED
@@ -19,11 +19,11 @@
|
|
19 |
"description": "Additional inference parameters for Image Classification",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
-
"
|
23 |
"title": "ImageClassificationOutputTransform",
|
24 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
25 |
},
|
26 |
-
"
|
27 |
"type": "integer",
|
28 |
"description": "When specified, limits the output to the top K most probable classes."
|
29 |
}
|
|
|
19 |
"description": "Additional inference parameters for Image Classification",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
+
"function_to_apply": {
|
23 |
"title": "ImageClassificationOutputTransform",
|
24 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
25 |
},
|
26 |
+
"top_k": {
|
27 |
"type": "integer",
|
28 |
"description": "When specified, limits the output to the top K most probable classes."
|
29 |
}
|
packages/tasks/src/tasks/image-segmentation/inference.ts
CHANGED
@@ -26,11 +26,11 @@ export interface ImageSegmentationParameters {
|
|
26 |
/**
|
27 |
* Threshold to use when turning the predicted masks into binary values.
|
28 |
*/
|
29 |
-
|
30 |
/**
|
31 |
* Mask overlap threshold to eliminate small, disconnected segments.
|
32 |
*/
|
33 |
-
|
34 |
/**
|
35 |
* Segmentation task to be performed, depending on model capabilities.
|
36 |
*/
|
|
|
26 |
/**
|
27 |
* Threshold to use when turning the predicted masks into binary values.
|
28 |
*/
|
29 |
+
mask_threshold?: number;
|
30 |
/**
|
31 |
* Mask overlap threshold to eliminate small, disconnected segments.
|
32 |
*/
|
33 |
+
overlap_mask_area_threshold?: number;
|
34 |
/**
|
35 |
* Segmentation task to be performed, depending on model capabilities.
|
36 |
*/
|
packages/tasks/src/tasks/image-segmentation/spec/input.json
CHANGED
@@ -19,11 +19,11 @@
|
|
19 |
"description": "Additional inference parameters for Image Segmentation",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
-
"
|
23 |
"type": "number",
|
24 |
"description": "Threshold to use when turning the predicted masks into binary values."
|
25 |
},
|
26 |
-
"
|
27 |
"type": "number",
|
28 |
"description": "Mask overlap threshold to eliminate small, disconnected segments."
|
29 |
},
|
|
|
19 |
"description": "Additional inference parameters for Image Segmentation",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
+
"mask_threshold": {
|
23 |
"type": "number",
|
24 |
"description": "Threshold to use when turning the predicted masks into binary values."
|
25 |
},
|
26 |
+
"overlap_mask_area_threshold": {
|
27 |
"type": "number",
|
28 |
"description": "Mask overlap threshold to eliminate small, disconnected segments."
|
29 |
},
|
packages/tasks/src/tasks/image-to-image/inference.ts
CHANGED
@@ -29,20 +29,20 @@ export interface ImageToImageParameters {
|
|
29 |
* For diffusion models. A higher guidance scale value encourages the model to generate
|
30 |
* images closely linked to the text prompt at the expense of lower image quality.
|
31 |
*/
|
32 |
-
|
33 |
/**
|
34 |
* One or several prompt to guide what NOT to include in image generation.
|
35 |
*/
|
36 |
-
|
37 |
/**
|
38 |
* For diffusion models. The number of denoising steps. More denoising steps usually lead to
|
39 |
* a higher quality image at the expense of slower inference.
|
40 |
*/
|
41 |
-
|
42 |
/**
|
43 |
* The size in pixel of the output image
|
44 |
*/
|
45 |
-
|
46 |
[property: string]: unknown;
|
47 |
}
|
48 |
|
|
|
29 |
* For diffusion models. A higher guidance scale value encourages the model to generate
|
30 |
* images closely linked to the text prompt at the expense of lower image quality.
|
31 |
*/
|
32 |
+
guidance_scale?: number;
|
33 |
/**
|
34 |
* One or several prompt to guide what NOT to include in image generation.
|
35 |
*/
|
36 |
+
negative_prompt?: string[];
|
37 |
/**
|
38 |
* For diffusion models. The number of denoising steps. More denoising steps usually lead to
|
39 |
* a higher quality image at the expense of slower inference.
|
40 |
*/
|
41 |
+
num_inference_steps?: number;
|
42 |
/**
|
43 |
* The size in pixel of the output image
|
44 |
*/
|
45 |
+
target_size?: TargetSize;
|
46 |
[property: string]: unknown;
|
47 |
}
|
48 |
|
packages/tasks/src/tasks/image-to-image/spec/input.json
CHANGED
@@ -19,22 +19,22 @@
|
|
19 |
"description": "Additional inference parameters for Image To Image",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
-
"
|
23 |
"type": "number",
|
24 |
"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
|
25 |
},
|
26 |
-
"
|
27 |
"type": "array",
|
28 |
"items": {
|
29 |
"type": "string"
|
30 |
},
|
31 |
"description": "One or several prompt to guide what NOT to include in image generation."
|
32 |
},
|
33 |
-
"
|
34 |
"type": "integer",
|
35 |
"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
|
36 |
},
|
37 |
-
"
|
38 |
"type": "object",
|
39 |
"description": "The size in pixel of the output image",
|
40 |
"properties": {
|
|
|
19 |
"description": "Additional inference parameters for Image To Image",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
+
"guidance_scale": {
|
23 |
"type": "number",
|
24 |
"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
|
25 |
},
|
26 |
+
"negative_prompt": {
|
27 |
"type": "array",
|
28 |
"items": {
|
29 |
"type": "string"
|
30 |
},
|
31 |
"description": "One or several prompt to guide what NOT to include in image generation."
|
32 |
},
|
33 |
+
"num_inference_steps": {
|
34 |
"type": "integer",
|
35 |
"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
|
36 |
},
|
37 |
+
"target_size": {
|
38 |
"type": "object",
|
39 |
"description": "The size in pixel of the output image",
|
40 |
"properties": {
|
packages/tasks/src/tasks/image-to-text/inference.ts
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
|
|
6 |
/**
|
7 |
* Inputs for Image To Text inference
|
8 |
*/
|
@@ -17,6 +18,7 @@ export interface ImageToTextInput {
|
|
17 |
parameters?: ImageToTextParameters;
|
18 |
[property: string]: unknown;
|
19 |
}
|
|
|
20 |
/**
|
21 |
* Additional inference parameters
|
22 |
*
|
@@ -30,9 +32,10 @@ export interface ImageToTextParameters {
|
|
30 |
/**
|
31 |
* The amount of maximum tokens to generate.
|
32 |
*/
|
33 |
-
|
34 |
[property: string]: unknown;
|
35 |
}
|
|
|
36 |
/**
|
37 |
* Parametrization of the text generation process
|
38 |
*
|
@@ -42,18 +45,18 @@ export interface GenerationParameters {
|
|
42 |
/**
|
43 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
44 |
*/
|
45 |
-
|
46 |
/**
|
47 |
* Controls the stopping condition for beam-based methods.
|
48 |
*/
|
49 |
-
|
50 |
/**
|
51 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
52 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
53 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
54 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
55 |
*/
|
56 |
-
|
57 |
/**
|
58 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
59 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
@@ -63,37 +66,37 @@ export interface GenerationParameters {
|
|
63 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
64 |
* for more details.
|
65 |
*/
|
66 |
-
|
67 |
/**
|
68 |
* The maximum length (in tokens) of the generated text, including the input.
|
69 |
*/
|
70 |
-
|
71 |
/**
|
72 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
73 |
*/
|
74 |
-
|
75 |
/**
|
76 |
* The minimum length (in tokens) of the generated text, including the input.
|
77 |
*/
|
78 |
-
|
79 |
/**
|
80 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
81 |
*/
|
82 |
-
|
83 |
/**
|
84 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
85 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
86 |
*/
|
87 |
-
|
88 |
/**
|
89 |
* Number of beams to use for beam search.
|
90 |
*/
|
91 |
-
|
92 |
/**
|
93 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
94 |
* search decoding.
|
95 |
*/
|
96 |
-
|
97 |
/**
|
98 |
* The value used to modulate the next token probabilities.
|
99 |
*/
|
@@ -101,12 +104,12 @@ export interface GenerationParameters {
|
|
101 |
/**
|
102 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
103 |
*/
|
104 |
-
|
105 |
/**
|
106 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
107 |
* that add up to top_p or higher are kept for generation.
|
108 |
*/
|
109 |
-
|
110 |
/**
|
111 |
* Local typicality measures how similar the conditional probability of predicting a target
|
112 |
* token next is to the expected conditional probability of predicting a random token next,
|
@@ -114,25 +117,27 @@ export interface GenerationParameters {
|
|
114 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
115 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
116 |
*/
|
117 |
-
|
118 |
/**
|
119 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
120 |
*/
|
121 |
-
|
122 |
[property: string]: unknown;
|
123 |
}
|
|
|
124 |
/**
|
125 |
* Controls the stopping condition for beam-based methods.
|
126 |
*/
|
127 |
export type EarlyStoppingUnion = boolean | "never";
|
128 |
-
|
129 |
/**
|
130 |
* Outputs of inference for the Image To Text task
|
131 |
*/
|
132 |
-
export interface
|
|
|
133 |
/**
|
134 |
* The generated text.
|
135 |
*/
|
136 |
-
|
137 |
[property: string]: unknown;
|
138 |
}
|
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
6 |
+
|
7 |
/**
|
8 |
* Inputs for Image To Text inference
|
9 |
*/
|
|
|
18 |
parameters?: ImageToTextParameters;
|
19 |
[property: string]: unknown;
|
20 |
}
|
21 |
+
|
22 |
/**
|
23 |
* Additional inference parameters
|
24 |
*
|
|
|
32 |
/**
|
33 |
* The amount of maximum tokens to generate.
|
34 |
*/
|
35 |
+
max_new_tokens?: number;
|
36 |
[property: string]: unknown;
|
37 |
}
|
38 |
+
|
39 |
/**
|
40 |
* Parametrization of the text generation process
|
41 |
*
|
|
|
45 |
/**
|
46 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
47 |
*/
|
48 |
+
do_sample?: boolean;
|
49 |
/**
|
50 |
* Controls the stopping condition for beam-based methods.
|
51 |
*/
|
52 |
+
early_stopping?: EarlyStoppingUnion;
|
53 |
/**
|
54 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
55 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
56 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
57 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
58 |
*/
|
59 |
+
epsilon_cutoff?: number;
|
60 |
/**
|
61 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
62 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
|
|
66 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
67 |
* for more details.
|
68 |
*/
|
69 |
+
eta_cutoff?: number;
|
70 |
/**
|
71 |
* The maximum length (in tokens) of the generated text, including the input.
|
72 |
*/
|
73 |
+
max_length?: number;
|
74 |
/**
|
75 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
76 |
*/
|
77 |
+
max_new_tokens?: number;
|
78 |
/**
|
79 |
* The minimum length (in tokens) of the generated text, including the input.
|
80 |
*/
|
81 |
+
min_length?: number;
|
82 |
/**
|
83 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
84 |
*/
|
85 |
+
min_new_tokens?: number;
|
86 |
/**
|
87 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
88 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
89 |
*/
|
90 |
+
num_beam_groups?: number;
|
91 |
/**
|
92 |
* Number of beams to use for beam search.
|
93 |
*/
|
94 |
+
num_beams?: number;
|
95 |
/**
|
96 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
97 |
* search decoding.
|
98 |
*/
|
99 |
+
penalty_alpha?: number;
|
100 |
/**
|
101 |
* The value used to modulate the next token probabilities.
|
102 |
*/
|
|
|
104 |
/**
|
105 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
106 |
*/
|
107 |
+
top_k?: number;
|
108 |
/**
|
109 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
110 |
* that add up to top_p or higher are kept for generation.
|
111 |
*/
|
112 |
+
top_p?: number;
|
113 |
/**
|
114 |
* Local typicality measures how similar the conditional probability of predicting a target
|
115 |
* token next is to the expected conditional probability of predicting a random token next,
|
|
|
117 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
118 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
119 |
*/
|
120 |
+
typical_p?: number;
|
121 |
/**
|
122 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
123 |
*/
|
124 |
+
use_cache?: boolean;
|
125 |
[property: string]: unknown;
|
126 |
}
|
127 |
+
|
128 |
/**
|
129 |
* Controls the stopping condition for beam-based methods.
|
130 |
*/
|
131 |
export type EarlyStoppingUnion = boolean | "never";
|
132 |
+
|
133 |
/**
|
134 |
* Outputs of inference for the Image To Text task
|
135 |
*/
|
136 |
+
export interface ImageToTextOutput {
|
137 |
+
generatedText: unknown;
|
138 |
/**
|
139 |
* The generated text.
|
140 |
*/
|
141 |
+
generated_text?: string;
|
142 |
[property: string]: unknown;
|
143 |
}
|
packages/tasks/src/tasks/image-to-text/spec/input.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"description": "Additional inference parameters for Image To Text",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
-
"
|
23 |
"type": "integer",
|
24 |
"description": "The amount of maximum tokens to generate."
|
25 |
},
|
|
|
19 |
"description": "Additional inference parameters for Image To Text",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
+
"max_new_tokens": {
|
23 |
"type": "integer",
|
24 |
"description": "The amount of maximum tokens to generate."
|
25 |
},
|
packages/tasks/src/tasks/image-to-text/spec/output.json
CHANGED
@@ -3,15 +3,12 @@
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Image To Text task",
|
5 |
"title": "ImageToTextOutput",
|
6 |
-
"type": "
|
7 |
-
"
|
8 |
-
"
|
9 |
-
|
10 |
-
"
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
},
|
15 |
-
"required": ["generatedText"]
|
16 |
-
}
|
17 |
}
|
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Image To Text task",
|
5 |
"title": "ImageToTextOutput",
|
6 |
+
"type": "object",
|
7 |
+
"properties": {
|
8 |
+
"generated_text": {
|
9 |
+
"type": "string",
|
10 |
+
"description": "The generated text."
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"required": ["generatedText"]
|
|
|
|
|
|
|
14 |
}
|
packages/tasks/src/tasks/placeholder/spec/input.json
CHANGED
@@ -20,11 +20,11 @@
|
|
20 |
"description": "TODO: describe additional parameters here.",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
-
"
|
24 |
"type": "boolean",
|
25 |
"description": "TODO: describe the parameter here"
|
26 |
},
|
27 |
-
"
|
28 |
"type": "integer",
|
29 |
"description": "TODO: describe the parameter here"
|
30 |
}
|
|
|
20 |
"description": "TODO: describe additional parameters here.",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
+
"dummy_parameter_name": {
|
24 |
"type": "boolean",
|
25 |
"description": "TODO: describe the parameter here"
|
26 |
},
|
27 |
+
"dummy_parameter_name2": {
|
28 |
"type": "integer",
|
29 |
"description": "TODO: describe the parameter here"
|
30 |
}
|
packages/tasks/src/tasks/placeholder/spec/output.json
CHANGED
@@ -7,7 +7,7 @@
|
|
7 |
"items": {
|
8 |
"type": "object",
|
9 |
"properties": {
|
10 |
-
"
|
11 |
"type": "string",
|
12 |
"description": "TODO: Describe what is outputed by the inference here"
|
13 |
}
|
|
|
7 |
"items": {
|
8 |
"type": "object",
|
9 |
"properties": {
|
10 |
+
"meaningful_output_name": {
|
11 |
"type": "string",
|
12 |
"description": "TODO: Describe what is outputed by the inference here"
|
13 |
}
|
packages/tasks/src/tasks/question-answering/inference.ts
CHANGED
@@ -41,37 +41,37 @@ export interface QuestionAnsweringParameters {
|
|
41 |
* Attempts to align the answer to real words. Improves quality on space separated
|
42 |
* languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
|
43 |
*/
|
44 |
-
|
45 |
/**
|
46 |
* If the context is too long to fit with the question for the model, it will be split in
|
47 |
* several chunks with some overlap. This argument controls the size of that overlap.
|
48 |
*/
|
49 |
-
|
50 |
/**
|
51 |
* Whether to accept impossible as an answer.
|
52 |
*/
|
53 |
-
|
54 |
/**
|
55 |
* The maximum length of predicted answers (e.g., only answers with a shorter length are
|
56 |
* considered).
|
57 |
*/
|
58 |
-
|
59 |
/**
|
60 |
* The maximum length of the question after tokenization. It will be truncated if needed.
|
61 |
*/
|
62 |
-
|
63 |
/**
|
64 |
* The maximum length of the total sentence (context + question) in tokens of each chunk
|
65 |
* passed to the model. The context will be split in several chunks (using docStride as
|
66 |
* overlap) if needed.
|
67 |
*/
|
68 |
-
|
69 |
/**
|
70 |
* The number of answers to return (will be chosen by order of likelihood). Note that we
|
71 |
* return less than topk answers if there are not enough options available within the
|
72 |
* context.
|
73 |
*/
|
74 |
-
|
75 |
[property: string]: unknown;
|
76 |
}
|
77 |
export type QuestionAnsweringOutput = QuestionAnsweringOutputElement[];
|
|
|
41 |
* Attempts to align the answer to real words. Improves quality on space separated
|
42 |
* languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
|
43 |
*/
|
44 |
+
align_to_words?: boolean;
|
45 |
/**
|
46 |
* If the context is too long to fit with the question for the model, it will be split in
|
47 |
* several chunks with some overlap. This argument controls the size of that overlap.
|
48 |
*/
|
49 |
+
doc_stride?: number;
|
50 |
/**
|
51 |
* Whether to accept impossible as an answer.
|
52 |
*/
|
53 |
+
handle_impossible_answer?: boolean;
|
54 |
/**
|
55 |
* The maximum length of predicted answers (e.g., only answers with a shorter length are
|
56 |
* considered).
|
57 |
*/
|
58 |
+
max_answer_len?: number;
|
59 |
/**
|
60 |
* The maximum length of the question after tokenization. It will be truncated if needed.
|
61 |
*/
|
62 |
+
max_question_len?: number;
|
63 |
/**
|
64 |
* The maximum length of the total sentence (context + question) in tokens of each chunk
|
65 |
* passed to the model. The context will be split in several chunks (using docStride as
|
66 |
* overlap) if needed.
|
67 |
*/
|
68 |
+
max_seq_len?: number;
|
69 |
/**
|
70 |
* The number of answers to return (will be chosen by order of likelihood). Note that we
|
71 |
* return less than topk answers if there are not enough options available within the
|
72 |
* context.
|
73 |
*/
|
74 |
+
top_k?: number;
|
75 |
[property: string]: unknown;
|
76 |
}
|
77 |
export type QuestionAnsweringOutput = QuestionAnsweringOutputElement[];
|
packages/tasks/src/tasks/question-answering/spec/input.json
CHANGED
@@ -32,31 +32,31 @@
|
|
32 |
"description": "Additional inference parameters for Question Answering",
|
33 |
"type": "object",
|
34 |
"properties": {
|
35 |
-
"
|
36 |
"type": "integer",
|
37 |
"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
|
38 |
},
|
39 |
-
"
|
40 |
"type": "integer",
|
41 |
"description": "If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
|
42 |
},
|
43 |
-
"
|
44 |
"type": "integer",
|
45 |
"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
|
46 |
},
|
47 |
-
"
|
48 |
"type": "integer",
|
49 |
"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed."
|
50 |
},
|
51 |
-
"
|
52 |
"type": "integer",
|
53 |
"description": "The maximum length of the question after tokenization. It will be truncated if needed."
|
54 |
},
|
55 |
-
"
|
56 |
"type": "boolean",
|
57 |
"description": "Whether to accept impossible as an answer."
|
58 |
},
|
59 |
-
"
|
60 |
"type": "boolean",
|
61 |
"description": "Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese)"
|
62 |
}
|
|
|
32 |
"description": "Additional inference parameters for Question Answering",
|
33 |
"type": "object",
|
34 |
"properties": {
|
35 |
+
"top_k": {
|
36 |
"type": "integer",
|
37 |
"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
|
38 |
},
|
39 |
+
"doc_stride": {
|
40 |
"type": "integer",
|
41 |
"description": "If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
|
42 |
},
|
43 |
+
"max_answer_len": {
|
44 |
"type": "integer",
|
45 |
"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
|
46 |
},
|
47 |
+
"max_seq_len": {
|
48 |
"type": "integer",
|
49 |
"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed."
|
50 |
},
|
51 |
+
"max_question_len": {
|
52 |
"type": "integer",
|
53 |
"description": "The maximum length of the question after tokenization. It will be truncated if needed."
|
54 |
},
|
55 |
+
"handle_impossible_answer": {
|
56 |
"type": "boolean",
|
57 |
"description": "Whether to accept impossible as an answer."
|
58 |
},
|
59 |
+
"align_to_words": {
|
60 |
"type": "boolean",
|
61 |
"description": "Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese)"
|
62 |
}
|
packages/tasks/src/tasks/summarization/inference.ts
CHANGED
@@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters {
|
|
30 |
/**
|
31 |
* Whether to clean up the potential extra spaces in the text output.
|
32 |
*/
|
33 |
-
|
34 |
/**
|
35 |
* Additional parametrization of the text generation algorithm
|
36 |
*/
|
37 |
-
|
38 |
/**
|
39 |
* The truncation strategy to use
|
40 |
*/
|
@@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest
|
|
50 |
* Outputs of inference for the Text2text Generation task
|
51 |
*/
|
52 |
export interface SummarizationOutput {
|
|
|
53 |
/**
|
54 |
* The generated text.
|
55 |
*/
|
56 |
-
|
57 |
[property: string]: unknown;
|
58 |
}
|
|
|
30 |
/**
|
31 |
* Whether to clean up the potential extra spaces in the text output.
|
32 |
*/
|
33 |
+
clean_up_tokenization_spaces?: boolean;
|
34 |
/**
|
35 |
* Additional parametrization of the text generation algorithm
|
36 |
*/
|
37 |
+
generate_parameters?: { [key: string]: unknown };
|
38 |
/**
|
39 |
* The truncation strategy to use
|
40 |
*/
|
|
|
50 |
* Outputs of inference for the Text2text Generation task
|
51 |
*/
|
52 |
export interface SummarizationOutput {
|
53 |
+
generatedText: unknown;
|
54 |
/**
|
55 |
* The generated text.
|
56 |
*/
|
57 |
+
generated_text?: string;
|
58 |
[property: string]: unknown;
|
59 |
}
|
packages/tasks/src/tasks/text-classification/inference.ts
CHANGED
@@ -23,11 +23,11 @@ export interface TextClassificationInput {
|
|
23 |
* Additional inference parameters for Text Classification
|
24 |
*/
|
25 |
export interface TextClassificationParameters {
|
26 |
-
|
27 |
/**
|
28 |
* When specified, limits the output to the top K most probable classes.
|
29 |
*/
|
30 |
-
|
31 |
[property: string]: unknown;
|
32 |
}
|
33 |
/**
|
|
|
23 |
* Additional inference parameters for Text Classification
|
24 |
*/
|
25 |
export interface TextClassificationParameters {
|
26 |
+
function_to_apply?: ClassificationOutputTransform;
|
27 |
/**
|
28 |
* When specified, limits the output to the top K most probable classes.
|
29 |
*/
|
30 |
+
top_k?: number;
|
31 |
[property: string]: unknown;
|
32 |
}
|
33 |
/**
|
packages/tasks/src/tasks/text-classification/spec/input.json
CHANGED
@@ -20,11 +20,11 @@
|
|
20 |
"description": "Additional inference parameters for Text Classification",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
-
"
|
24 |
"title": "TextClassificationOutputTransform",
|
25 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
26 |
},
|
27 |
-
"
|
28 |
"type": "integer",
|
29 |
"description": "When specified, limits the output to the top K most probable classes."
|
30 |
}
|
|
|
20 |
"description": "Additional inference parameters for Text Classification",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
+
"function_to_apply": {
|
24 |
"title": "TextClassificationOutputTransform",
|
25 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
26 |
},
|
27 |
+
"top_k": {
|
28 |
"type": "integer",
|
29 |
"description": "When specified, limits the output to the top K most probable classes."
|
30 |
}
|
packages/tasks/src/tasks/text-generation/inference.ts
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
|
|
6 |
/**
|
7 |
* Inputs for Text Generation inference
|
8 |
*/
|
@@ -17,6 +18,7 @@ export interface TextGenerationInput {
|
|
17 |
parameters?: TextGenerationParameters;
|
18 |
[property: string]: unknown;
|
19 |
}
|
|
|
20 |
/**
|
21 |
* Additional inference parameters
|
22 |
*
|
@@ -26,24 +28,24 @@ export interface TextGenerationParameters {
|
|
26 |
/**
|
27 |
* Whether to use logit sampling (true) or greedy search (false).
|
28 |
*/
|
29 |
-
|
30 |
/**
|
31 |
* Maximum number of generated tokens.
|
32 |
*/
|
33 |
-
|
34 |
/**
|
35 |
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
|
36 |
* paper](https://hf.co/papers/1909.05858) for more details.
|
37 |
*/
|
38 |
-
|
39 |
/**
|
40 |
* Whether to prepend the prompt to the generated text.
|
41 |
*/
|
42 |
-
|
43 |
/**
|
44 |
* Stop generating tokens if a member of `stop_sequences` is generated.
|
45 |
*/
|
46 |
-
|
47 |
/**
|
48 |
* The value used to modulate the logits distribution.
|
49 |
*/
|
@@ -51,12 +53,12 @@ export interface TextGenerationParameters {
|
|
51 |
/**
|
52 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
53 |
*/
|
54 |
-
|
55 |
/**
|
56 |
* If set to < 1, only the smallest set of most probable tokens with probabilities that add
|
57 |
* up to `top_p` or higher are kept for generation.
|
58 |
*/
|
59 |
-
|
60 |
/**
|
61 |
* Truncate input tokens to the given size.
|
62 |
*/
|
@@ -65,21 +67,22 @@ export interface TextGenerationParameters {
|
|
65 |
* Typical Decoding mass. See [Typical Decoding for Natural Language
|
66 |
* Generation](https://hf.co/papers/2202.00666) for more information
|
67 |
*/
|
68 |
-
|
69 |
/**
|
70 |
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
|
71 |
*/
|
72 |
watermark?: boolean;
|
73 |
[property: string]: unknown;
|
74 |
}
|
75 |
-
|
76 |
/**
|
77 |
* Outputs for Text Generation inference
|
78 |
*/
|
79 |
-
export interface
|
|
|
80 |
/**
|
81 |
* The generated text
|
82 |
*/
|
83 |
-
|
84 |
[property: string]: unknown;
|
85 |
}
|
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
6 |
+
|
7 |
/**
|
8 |
* Inputs for Text Generation inference
|
9 |
*/
|
|
|
18 |
parameters?: TextGenerationParameters;
|
19 |
[property: string]: unknown;
|
20 |
}
|
21 |
+
|
22 |
/**
|
23 |
* Additional inference parameters
|
24 |
*
|
|
|
28 |
/**
|
29 |
* Whether to use logit sampling (true) or greedy search (false).
|
30 |
*/
|
31 |
+
do_sample?: boolean;
|
32 |
/**
|
33 |
* Maximum number of generated tokens.
|
34 |
*/
|
35 |
+
max_new_tokens?: number;
|
36 |
/**
|
37 |
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
|
38 |
* paper](https://hf.co/papers/1909.05858) for more details.
|
39 |
*/
|
40 |
+
repetition_penalty?: number;
|
41 |
/**
|
42 |
* Whether to prepend the prompt to the generated text.
|
43 |
*/
|
44 |
+
return_full_text?: boolean;
|
45 |
/**
|
46 |
* Stop generating tokens if a member of `stop_sequences` is generated.
|
47 |
*/
|
48 |
+
stop_sequences?: string[];
|
49 |
/**
|
50 |
* The value used to modulate the logits distribution.
|
51 |
*/
|
|
|
53 |
/**
|
54 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
55 |
*/
|
56 |
+
top_k?: number;
|
57 |
/**
|
58 |
* If set to < 1, only the smallest set of most probable tokens with probabilities that add
|
59 |
* up to `top_p` or higher are kept for generation.
|
60 |
*/
|
61 |
+
top_p?: number;
|
62 |
/**
|
63 |
* Truncate input tokens to the given size.
|
64 |
*/
|
|
|
67 |
* Typical Decoding mass. See [Typical Decoding for Natural Language
|
68 |
* Generation](https://hf.co/papers/2202.00666) for more information
|
69 |
*/
|
70 |
+
typical_p?: number;
|
71 |
/**
|
72 |
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
|
73 |
*/
|
74 |
watermark?: boolean;
|
75 |
[property: string]: unknown;
|
76 |
}
|
77 |
+
|
78 |
/**
|
79 |
* Outputs for Text Generation inference
|
80 |
*/
|
81 |
+
export interface TextGenerationOutput {
|
82 |
+
generatedText: unknown;
|
83 |
/**
|
84 |
* The generated text
|
85 |
*/
|
86 |
+
generated_text?: string;
|
87 |
[property: string]: unknown;
|
88 |
}
|
packages/tasks/src/tasks/text-generation/spec/input.json
CHANGED
@@ -20,23 +20,23 @@
|
|
20 |
"description": "Additional inference parameters for Text Generation",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
-
"
|
24 |
"type": "boolean",
|
25 |
"description": "Whether to use logit sampling (true) or greedy search (false)."
|
26 |
},
|
27 |
-
"
|
28 |
"type": "integer",
|
29 |
"description": "Maximum number of generated tokens."
|
30 |
},
|
31 |
-
"
|
32 |
"type": "number",
|
33 |
"description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details."
|
34 |
},
|
35 |
-
"
|
36 |
"type": "boolean",
|
37 |
"description": "Whether to prepend the prompt to the generated text."
|
38 |
},
|
39 |
-
"
|
40 |
"type": "array",
|
41 |
"items": {
|
42 |
"type": "string"
|
@@ -47,11 +47,11 @@
|
|
47 |
"type": "number",
|
48 |
"description": "The value used to modulate the logits distribution."
|
49 |
},
|
50 |
-
"
|
51 |
"type": "integer",
|
52 |
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
|
53 |
},
|
54 |
-
"
|
55 |
"type": "number",
|
56 |
"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
|
57 |
},
|
@@ -59,7 +59,7 @@
|
|
59 |
"type": "integer",
|
60 |
"description": "Truncate input tokens to the given size."
|
61 |
},
|
62 |
-
"
|
63 |
"type": "number",
|
64 |
"description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information"
|
65 |
},
|
|
|
20 |
"description": "Additional inference parameters for Text Generation",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
+
"do_sample": {
|
24 |
"type": "boolean",
|
25 |
"description": "Whether to use logit sampling (true) or greedy search (false)."
|
26 |
},
|
27 |
+
"max_new_tokens": {
|
28 |
"type": "integer",
|
29 |
"description": "Maximum number of generated tokens."
|
30 |
},
|
31 |
+
"repetition_penalty": {
|
32 |
"type": "number",
|
33 |
"description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details."
|
34 |
},
|
35 |
+
"return_full_text": {
|
36 |
"type": "boolean",
|
37 |
"description": "Whether to prepend the prompt to the generated text."
|
38 |
},
|
39 |
+
"stop_sequences": {
|
40 |
"type": "array",
|
41 |
"items": {
|
42 |
"type": "string"
|
|
|
47 |
"type": "number",
|
48 |
"description": "The value used to modulate the logits distribution."
|
49 |
},
|
50 |
+
"top_k": {
|
51 |
"type": "integer",
|
52 |
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
|
53 |
},
|
54 |
+
"top_p": {
|
55 |
"type": "number",
|
56 |
"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
|
57 |
},
|
|
|
59 |
"type": "integer",
|
60 |
"description": "Truncate input tokens to the given size."
|
61 |
},
|
62 |
+
"typical_p": {
|
63 |
"type": "number",
|
64 |
"description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information"
|
65 |
},
|
packages/tasks/src/tasks/text-generation/spec/output.json
CHANGED
@@ -3,15 +3,12 @@
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs for Text Generation inference",
|
5 |
"title": "TextGenerationOutput",
|
6 |
-
"type": "
|
7 |
-
"
|
8 |
-
"
|
9 |
-
|
10 |
-
"
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
},
|
15 |
-
"required": ["generatedText"]
|
16 |
-
}
|
17 |
}
|
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs for Text Generation inference",
|
5 |
"title": "TextGenerationOutput",
|
6 |
+
"type": "object",
|
7 |
+
"properties": {
|
8 |
+
"generated_text": {
|
9 |
+
"type": "string",
|
10 |
+
"description": "The generated text"
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"required": ["generatedText"]
|
|
|
|
|
|
|
14 |
}
|
packages/tasks/src/tasks/text-to-audio/inference.ts
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
|
|
6 |
/**
|
7 |
* Inputs for Text To Audio inference
|
8 |
*/
|
@@ -17,6 +18,7 @@ export interface TextToAudioInput {
|
|
17 |
parameters?: TextToAudioParameters;
|
18 |
[property: string]: unknown;
|
19 |
}
|
|
|
20 |
/**
|
21 |
* Additional inference parameters
|
22 |
*
|
@@ -29,6 +31,7 @@ export interface TextToAudioParameters {
|
|
29 |
generate?: GenerationParameters;
|
30 |
[property: string]: unknown;
|
31 |
}
|
|
|
32 |
/**
|
33 |
* Parametrization of the text generation process
|
34 |
*
|
@@ -38,18 +41,18 @@ export interface GenerationParameters {
|
|
38 |
/**
|
39 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
40 |
*/
|
41 |
-
|
42 |
/**
|
43 |
* Controls the stopping condition for beam-based methods.
|
44 |
*/
|
45 |
-
|
46 |
/**
|
47 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
48 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
49 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
50 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
51 |
*/
|
52 |
-
|
53 |
/**
|
54 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
55 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
@@ -59,37 +62,37 @@ export interface GenerationParameters {
|
|
59 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
60 |
* for more details.
|
61 |
*/
|
62 |
-
|
63 |
/**
|
64 |
* The maximum length (in tokens) of the generated text, including the input.
|
65 |
*/
|
66 |
-
|
67 |
/**
|
68 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
69 |
*/
|
70 |
-
|
71 |
/**
|
72 |
* The minimum length (in tokens) of the generated text, including the input.
|
73 |
*/
|
74 |
-
|
75 |
/**
|
76 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
77 |
*/
|
78 |
-
|
79 |
/**
|
80 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
81 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
82 |
*/
|
83 |
-
|
84 |
/**
|
85 |
* Number of beams to use for beam search.
|
86 |
*/
|
87 |
-
|
88 |
/**
|
89 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
90 |
* search decoding.
|
91 |
*/
|
92 |
-
|
93 |
/**
|
94 |
* The value used to modulate the next token probabilities.
|
95 |
*/
|
@@ -97,12 +100,12 @@ export interface GenerationParameters {
|
|
97 |
/**
|
98 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
99 |
*/
|
100 |
-
|
101 |
/**
|
102 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
103 |
* that add up to top_p or higher are kept for generation.
|
104 |
*/
|
105 |
-
|
106 |
/**
|
107 |
* Local typicality measures how similar the conditional probability of predicting a target
|
108 |
* token next is to the expected conditional probability of predicting a random token next,
|
@@ -110,29 +113,31 @@ export interface GenerationParameters {
|
|
110 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
111 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
112 |
*/
|
113 |
-
|
114 |
/**
|
115 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
116 |
*/
|
117 |
-
|
118 |
[property: string]: unknown;
|
119 |
}
|
|
|
120 |
/**
|
121 |
* Controls the stopping condition for beam-based methods.
|
122 |
*/
|
123 |
export type EarlyStoppingUnion = boolean | "never";
|
124 |
-
|
125 |
/**
|
126 |
* Outputs of inference for the Text To Audio task
|
127 |
*/
|
128 |
-
export interface
|
129 |
/**
|
130 |
* The generated audio waveform.
|
131 |
*/
|
132 |
audio: unknown;
|
|
|
133 |
/**
|
134 |
* The sampling rate of the generated audio waveform.
|
135 |
*/
|
136 |
-
|
137 |
[property: string]: unknown;
|
138 |
}
|
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
6 |
+
|
7 |
/**
|
8 |
* Inputs for Text To Audio inference
|
9 |
*/
|
|
|
18 |
parameters?: TextToAudioParameters;
|
19 |
[property: string]: unknown;
|
20 |
}
|
21 |
+
|
22 |
/**
|
23 |
* Additional inference parameters
|
24 |
*
|
|
|
31 |
generate?: GenerationParameters;
|
32 |
[property: string]: unknown;
|
33 |
}
|
34 |
+
|
35 |
/**
|
36 |
* Parametrization of the text generation process
|
37 |
*
|
|
|
41 |
/**
|
42 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
43 |
*/
|
44 |
+
do_sample?: boolean;
|
45 |
/**
|
46 |
* Controls the stopping condition for beam-based methods.
|
47 |
*/
|
48 |
+
early_stopping?: EarlyStoppingUnion;
|
49 |
/**
|
50 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
51 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
52 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
53 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
54 |
*/
|
55 |
+
epsilon_cutoff?: number;
|
56 |
/**
|
57 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
58 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
|
|
62 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
63 |
* for more details.
|
64 |
*/
|
65 |
+
eta_cutoff?: number;
|
66 |
/**
|
67 |
* The maximum length (in tokens) of the generated text, including the input.
|
68 |
*/
|
69 |
+
max_length?: number;
|
70 |
/**
|
71 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
72 |
*/
|
73 |
+
max_new_tokens?: number;
|
74 |
/**
|
75 |
* The minimum length (in tokens) of the generated text, including the input.
|
76 |
*/
|
77 |
+
min_length?: number;
|
78 |
/**
|
79 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
80 |
*/
|
81 |
+
min_new_tokens?: number;
|
82 |
/**
|
83 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
84 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
85 |
*/
|
86 |
+
num_beam_groups?: number;
|
87 |
/**
|
88 |
* Number of beams to use for beam search.
|
89 |
*/
|
90 |
+
num_beams?: number;
|
91 |
/**
|
92 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
93 |
* search decoding.
|
94 |
*/
|
95 |
+
penalty_alpha?: number;
|
96 |
/**
|
97 |
* The value used to modulate the next token probabilities.
|
98 |
*/
|
|
|
100 |
/**
|
101 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
102 |
*/
|
103 |
+
top_k?: number;
|
104 |
/**
|
105 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
106 |
* that add up to top_p or higher are kept for generation.
|
107 |
*/
|
108 |
+
top_p?: number;
|
109 |
/**
|
110 |
* Local typicality measures how similar the conditional probability of predicting a target
|
111 |
* token next is to the expected conditional probability of predicting a random token next,
|
|
|
113 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
114 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
115 |
*/
|
116 |
+
typical_p?: number;
|
117 |
/**
|
118 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
119 |
*/
|
120 |
+
use_cache?: boolean;
|
121 |
[property: string]: unknown;
|
122 |
}
|
123 |
+
|
124 |
/**
|
125 |
* Controls the stopping condition for beam-based methods.
|
126 |
*/
|
127 |
export type EarlyStoppingUnion = boolean | "never";
|
128 |
+
|
129 |
/**
|
130 |
* Outputs of inference for the Text To Audio task
|
131 |
*/
|
132 |
+
export interface TextToAudioOutput {
|
133 |
/**
|
134 |
* The generated audio waveform.
|
135 |
*/
|
136 |
audio: unknown;
|
137 |
+
samplingRate: unknown;
|
138 |
/**
|
139 |
* The sampling rate of the generated audio waveform.
|
140 |
*/
|
141 |
+
sampling_rate?: number;
|
142 |
[property: string]: unknown;
|
143 |
}
|
packages/tasks/src/tasks/text-to-audio/spec/output.json
CHANGED
@@ -3,18 +3,15 @@
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Text To Audio task",
|
5 |
"title": "TextToAudioOutput",
|
6 |
-
"type": "
|
7 |
-
"
|
8 |
-
"
|
9 |
-
|
10 |
-
"audio": {
|
11 |
-
"description": "The generated audio waveform."
|
12 |
-
},
|
13 |
-
"samplingRate": {
|
14 |
-
"type": "number",
|
15 |
-
"description": "The sampling rate of the generated audio waveform."
|
16 |
-
}
|
17 |
},
|
18 |
-
"
|
19 |
-
|
|
|
|
|
|
|
|
|
20 |
}
|
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Text To Audio task",
|
5 |
"title": "TextToAudioOutput",
|
6 |
+
"type": "object",
|
7 |
+
"properties": {
|
8 |
+
"audio": {
|
9 |
+
"description": "The generated audio waveform."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
},
|
11 |
+
"sampling_rate": {
|
12 |
+
"type": "number",
|
13 |
+
"description": "The sampling rate of the generated audio waveform."
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"required": ["audio", "samplingRate"]
|
17 |
}
|
packages/tasks/src/tasks/text-to-image/inference.ts
CHANGED
@@ -29,16 +29,16 @@ export interface TextToImageParameters {
|
|
29 |
* For diffusion models. A higher guidance scale value encourages the model to generate
|
30 |
* images closely linked to the text prompt at the expense of lower image quality.
|
31 |
*/
|
32 |
-
|
33 |
/**
|
34 |
* One or several prompt to guide what NOT to include in image generation.
|
35 |
*/
|
36 |
-
|
37 |
/**
|
38 |
* For diffusion models. The number of denoising steps. More denoising steps usually lead to
|
39 |
* a higher quality image at the expense of slower inference.
|
40 |
*/
|
41 |
-
|
42 |
/**
|
43 |
* For diffusion models. Override the scheduler with a compatible one
|
44 |
*/
|
@@ -46,7 +46,7 @@ export interface TextToImageParameters {
|
|
46 |
/**
|
47 |
* The size in pixel of the output image
|
48 |
*/
|
49 |
-
|
50 |
[property: string]: unknown;
|
51 |
}
|
52 |
|
@@ -62,9 +62,7 @@ export interface TargetSize {
|
|
62 |
/**
|
63 |
* Outputs of inference for the Text To Image task
|
64 |
*/
|
65 |
-
export
|
66 |
-
|
67 |
-
export interface TextToImageOutputObject {
|
68 |
/**
|
69 |
* The generated image
|
70 |
*/
|
|
|
29 |
* For diffusion models. A higher guidance scale value encourages the model to generate
|
30 |
* images closely linked to the text prompt at the expense of lower image quality.
|
31 |
*/
|
32 |
+
guidance_scale?: number;
|
33 |
/**
|
34 |
* One or several prompt to guide what NOT to include in image generation.
|
35 |
*/
|
36 |
+
negative_prompt?: string[];
|
37 |
/**
|
38 |
* For diffusion models. The number of denoising steps. More denoising steps usually lead to
|
39 |
* a higher quality image at the expense of slower inference.
|
40 |
*/
|
41 |
+
num_inference_steps?: number;
|
42 |
/**
|
43 |
* For diffusion models. Override the scheduler with a compatible one
|
44 |
*/
|
|
|
46 |
/**
|
47 |
* The size in pixel of the output image
|
48 |
*/
|
49 |
+
target_size?: TargetSize;
|
50 |
[property: string]: unknown;
|
51 |
}
|
52 |
|
|
|
62 |
/**
|
63 |
* Outputs of inference for the Text To Image task
|
64 |
*/
|
65 |
+
export interface TextToImageOutput {
|
|
|
|
|
66 |
/**
|
67 |
* The generated image
|
68 |
*/
|
packages/tasks/src/tasks/text-to-image/spec/input.json
CHANGED
@@ -20,22 +20,22 @@
|
|
20 |
"description": "Additional inference parameters for Text To Image",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
-
"
|
24 |
"type": "number",
|
25 |
"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
|
26 |
},
|
27 |
-
"
|
28 |
"type": "array",
|
29 |
"items": {
|
30 |
"type": "string"
|
31 |
},
|
32 |
"description": "One or several prompt to guide what NOT to include in image generation."
|
33 |
},
|
34 |
-
"
|
35 |
"type": "integer",
|
36 |
"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
|
37 |
},
|
38 |
-
"
|
39 |
"type": "object",
|
40 |
"description": "The size in pixel of the output image",
|
41 |
"properties": {
|
|
|
20 |
"description": "Additional inference parameters for Text To Image",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
+
"guidance_scale": {
|
24 |
"type": "number",
|
25 |
"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
|
26 |
},
|
27 |
+
"negative_prompt": {
|
28 |
"type": "array",
|
29 |
"items": {
|
30 |
"type": "string"
|
31 |
},
|
32 |
"description": "One or several prompt to guide what NOT to include in image generation."
|
33 |
},
|
34 |
+
"num_inference_steps": {
|
35 |
"type": "integer",
|
36 |
"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
|
37 |
},
|
38 |
+
"target_size": {
|
39 |
"type": "object",
|
40 |
"description": "The size in pixel of the output image",
|
41 |
"properties": {
|
packages/tasks/src/tasks/text-to-image/spec/output.json
CHANGED
@@ -3,13 +3,11 @@
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Text To Image task",
|
5 |
"title": "TextToImageOutput",
|
6 |
-
"type": "
|
7 |
-
"
|
8 |
-
"
|
9 |
-
"
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
"required": ["image"]
|
14 |
-
}
|
15 |
}
|
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Text To Image task",
|
5 |
"title": "TextToImageOutput",
|
6 |
+
"type": "object",
|
7 |
+
"properties": {
|
8 |
+
"image": {
|
9 |
+
"description": "The generated image"
|
10 |
+
}
|
11 |
+
},
|
12 |
+
"required": ["image"]
|
|
|
|
|
13 |
}
|
packages/tasks/src/tasks/text-to-speech/inference.ts
CHANGED
@@ -43,18 +43,18 @@ export interface GenerationParameters {
|
|
43 |
/**
|
44 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
45 |
*/
|
46 |
-
|
47 |
/**
|
48 |
* Controls the stopping condition for beam-based methods.
|
49 |
*/
|
50 |
-
|
51 |
/**
|
52 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
53 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
54 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
55 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
56 |
*/
|
57 |
-
|
58 |
/**
|
59 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
60 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
@@ -64,37 +64,37 @@ export interface GenerationParameters {
|
|
64 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
65 |
* for more details.
|
66 |
*/
|
67 |
-
|
68 |
/**
|
69 |
* The maximum length (in tokens) of the generated text, including the input.
|
70 |
*/
|
71 |
-
|
72 |
/**
|
73 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
74 |
*/
|
75 |
-
|
76 |
/**
|
77 |
* The minimum length (in tokens) of the generated text, including the input.
|
78 |
*/
|
79 |
-
|
80 |
/**
|
81 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
82 |
*/
|
83 |
-
|
84 |
/**
|
85 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
86 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
87 |
*/
|
88 |
-
|
89 |
/**
|
90 |
* Number of beams to use for beam search.
|
91 |
*/
|
92 |
-
|
93 |
/**
|
94 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
95 |
* search decoding.
|
96 |
*/
|
97 |
-
|
98 |
/**
|
99 |
* The value used to modulate the next token probabilities.
|
100 |
*/
|
@@ -102,12 +102,12 @@ export interface GenerationParameters {
|
|
102 |
/**
|
103 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
104 |
*/
|
105 |
-
|
106 |
/**
|
107 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
108 |
* that add up to top_p or higher are kept for generation.
|
109 |
*/
|
110 |
-
|
111 |
/**
|
112 |
* Local typicality measures how similar the conditional probability of predicting a target
|
113 |
* token next is to the expected conditional probability of predicting a random token next,
|
@@ -115,11 +115,11 @@ export interface GenerationParameters {
|
|
115 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
116 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
117 |
*/
|
118 |
-
|
119 |
/**
|
120 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
121 |
*/
|
122 |
-
|
123 |
[property: string]: unknown;
|
124 |
}
|
125 |
|
@@ -138,9 +138,10 @@ export interface TextToSpeechOutput {
|
|
138 |
* The generated audio waveform.
|
139 |
*/
|
140 |
audio: unknown;
|
|
|
141 |
/**
|
142 |
* The sampling rate of the generated audio waveform.
|
143 |
*/
|
144 |
-
|
145 |
[property: string]: unknown;
|
146 |
}
|
|
|
43 |
/**
|
44 |
* Whether to use sampling instead of greedy decoding when generating new tokens.
|
45 |
*/
|
46 |
+
do_sample?: boolean;
|
47 |
/**
|
48 |
* Controls the stopping condition for beam-based methods.
|
49 |
*/
|
50 |
+
early_stopping?: EarlyStoppingUnion;
|
51 |
/**
|
52 |
* If set to float strictly between 0 and 1, only tokens with a conditional probability
|
53 |
* greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
54 |
* 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
55 |
* Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
56 |
*/
|
57 |
+
epsilon_cutoff?: number;
|
58 |
/**
|
59 |
* Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
60 |
* float strictly between 0 and 1, a token is only considered if it is greater than either
|
|
|
64 |
* See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
65 |
* for more details.
|
66 |
*/
|
67 |
+
eta_cutoff?: number;
|
68 |
/**
|
69 |
* The maximum length (in tokens) of the generated text, including the input.
|
70 |
*/
|
71 |
+
max_length?: number;
|
72 |
/**
|
73 |
* The maximum number of tokens to generate. Takes precedence over maxLength.
|
74 |
*/
|
75 |
+
max_new_tokens?: number;
|
76 |
/**
|
77 |
* The minimum length (in tokens) of the generated text, including the input.
|
78 |
*/
|
79 |
+
min_length?: number;
|
80 |
/**
|
81 |
* The minimum number of tokens to generate. Takes precedence over maxLength.
|
82 |
*/
|
83 |
+
min_new_tokens?: number;
|
84 |
/**
|
85 |
* Number of groups to divide num_beams into in order to ensure diversity among different
|
86 |
* groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
87 |
*/
|
88 |
+
num_beam_groups?: number;
|
89 |
/**
|
90 |
* Number of beams to use for beam search.
|
91 |
*/
|
92 |
+
num_beams?: number;
|
93 |
/**
|
94 |
* The value balances the model confidence and the degeneration penalty in contrastive
|
95 |
* search decoding.
|
96 |
*/
|
97 |
+
penalty_alpha?: number;
|
98 |
/**
|
99 |
* The value used to modulate the next token probabilities.
|
100 |
*/
|
|
|
102 |
/**
|
103 |
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
104 |
*/
|
105 |
+
top_k?: number;
|
106 |
/**
|
107 |
* If set to float < 1, only the smallest set of most probable tokens with probabilities
|
108 |
* that add up to top_p or higher are kept for generation.
|
109 |
*/
|
110 |
+
top_p?: number;
|
111 |
/**
|
112 |
* Local typicality measures how similar the conditional probability of predicting a target
|
113 |
* token next is to the expected conditional probability of predicting a random token next,
|
|
|
115 |
* most locally typical tokens with probabilities that add up to typical_p or higher are
|
116 |
* kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
117 |
*/
|
118 |
+
typical_p?: number;
|
119 |
/**
|
120 |
* Whether the model should use the past last key/values attentions to speed up decoding
|
121 |
*/
|
122 |
+
use_cache?: boolean;
|
123 |
[property: string]: unknown;
|
124 |
}
|
125 |
|
|
|
138 |
* The generated audio waveform.
|
139 |
*/
|
140 |
audio: unknown;
|
141 |
+
samplingRate: unknown;
|
142 |
/**
|
143 |
* The sampling rate of the generated audio waveform.
|
144 |
*/
|
145 |
+
sampling_rate?: number;
|
146 |
[property: string]: unknown;
|
147 |
}
|
packages/tasks/src/tasks/text2text-generation/inference.ts
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
|
|
6 |
/**
|
7 |
* Inputs for Text2text Generation inference
|
8 |
*/
|
@@ -17,6 +18,7 @@ export interface Text2TextGenerationInput {
|
|
17 |
parameters?: Text2TextGenerationParameters;
|
18 |
[property: string]: unknown;
|
19 |
}
|
|
|
20 |
/**
|
21 |
* Additional inference parameters
|
22 |
*
|
@@ -26,28 +28,28 @@ export interface Text2TextGenerationParameters {
|
|
26 |
/**
|
27 |
* Whether to clean up the potential extra spaces in the text output.
|
28 |
*/
|
29 |
-
|
30 |
/**
|
31 |
* Additional parametrization of the text generation algorithm
|
32 |
*/
|
33 |
-
|
34 |
-
[key: string]: unknown;
|
35 |
-
};
|
36 |
/**
|
37 |
* The truncation strategy to use
|
38 |
*/
|
39 |
truncation?: Text2TextGenerationTruncationStrategy;
|
40 |
[property: string]: unknown;
|
41 |
}
|
|
|
42 |
export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
|
43 |
-
|
44 |
/**
|
45 |
* Outputs of inference for the Text2text Generation task
|
46 |
*/
|
47 |
-
export interface
|
|
|
48 |
/**
|
49 |
* The generated text.
|
50 |
*/
|
51 |
-
|
52 |
[property: string]: unknown;
|
53 |
}
|
|
|
3 |
*
|
4 |
* Using src/scripts/inference-codegen
|
5 |
*/
|
6 |
+
|
7 |
/**
|
8 |
* Inputs for Text2text Generation inference
|
9 |
*/
|
|
|
18 |
parameters?: Text2TextGenerationParameters;
|
19 |
[property: string]: unknown;
|
20 |
}
|
21 |
+
|
22 |
/**
|
23 |
* Additional inference parameters
|
24 |
*
|
|
|
28 |
/**
|
29 |
* Whether to clean up the potential extra spaces in the text output.
|
30 |
*/
|
31 |
+
clean_up_tokenization_spaces?: boolean;
|
32 |
/**
|
33 |
* Additional parametrization of the text generation algorithm
|
34 |
*/
|
35 |
+
generate_parameters?: { [key: string]: unknown };
|
|
|
|
|
36 |
/**
|
37 |
* The truncation strategy to use
|
38 |
*/
|
39 |
truncation?: Text2TextGenerationTruncationStrategy;
|
40 |
[property: string]: unknown;
|
41 |
}
|
42 |
+
|
43 |
export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
|
44 |
+
|
45 |
/**
|
46 |
* Outputs of inference for the Text2text Generation task
|
47 |
*/
|
48 |
+
export interface Text2TextGenerationOutput {
|
49 |
+
generatedText: unknown;
|
50 |
/**
|
51 |
* The generated text.
|
52 |
*/
|
53 |
+
generated_text?: string;
|
54 |
[property: string]: unknown;
|
55 |
}
|
packages/tasks/src/tasks/text2text-generation/spec/input.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"description": "Additional inference parameters for Text2text Generation",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
-
"
|
24 |
"type": "boolean",
|
25 |
"description": "Whether to clean up the potential extra spaces in the text output."
|
26 |
},
|
@@ -43,7 +43,7 @@
|
|
43 |
}
|
44 |
]
|
45 |
},
|
46 |
-
"
|
47 |
"title": "generateParameters",
|
48 |
"type": "object",
|
49 |
"description": "Additional parametrization of the text generation algorithm"
|
|
|
20 |
"description": "Additional inference parameters for Text2text Generation",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
+
"clean_up_tokenization_spaces": {
|
24 |
"type": "boolean",
|
25 |
"description": "Whether to clean up the potential extra spaces in the text output."
|
26 |
},
|
|
|
43 |
}
|
44 |
]
|
45 |
},
|
46 |
+
"generate_parameters": {
|
47 |
"title": "generateParameters",
|
48 |
"type": "object",
|
49 |
"description": "Additional parametrization of the text generation algorithm"
|
packages/tasks/src/tasks/text2text-generation/spec/output.json
CHANGED
@@ -3,15 +3,12 @@
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Text2text Generation task",
|
5 |
"title": "Text2TextGenerationOutput",
|
6 |
-
"type": "
|
7 |
-
"
|
8 |
-
"
|
9 |
-
|
10 |
-
"
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
},
|
15 |
-
"required": ["generatedText"]
|
16 |
-
}
|
17 |
}
|
|
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
"description": "Outputs of inference for the Text2text Generation task",
|
5 |
"title": "Text2TextGenerationOutput",
|
6 |
+
"type": "object",
|
7 |
+
"properties": {
|
8 |
+
"generated_text": {
|
9 |
+
"type": "string",
|
10 |
+
"description": "The generated text."
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"required": ["generatedText"]
|
|
|
|
|
|
|
14 |
}
|
packages/tasks/src/tasks/token-classification/inference.ts
CHANGED
@@ -26,11 +26,11 @@ export interface TokenClassificationParameters {
|
|
26 |
/**
|
27 |
* The strategy used to fuse tokens based on model predictions
|
28 |
*/
|
29 |
-
|
30 |
/**
|
31 |
* A list of labels to ignore
|
32 |
*/
|
33 |
-
|
34 |
/**
|
35 |
* The number of overlapping tokens between chunks when splitting the input text.
|
36 |
*/
|
@@ -64,7 +64,7 @@ export interface TokenClassificationOutputElement {
|
|
64 |
/**
|
65 |
* The predicted label for that group of tokens
|
66 |
*/
|
67 |
-
|
68 |
label: unknown;
|
69 |
/**
|
70 |
* The associated score / probability
|
|
|
26 |
/**
|
27 |
* The strategy used to fuse tokens based on model predictions
|
28 |
*/
|
29 |
+
aggregation_strategy?: TokenClassificationAggregationStrategy;
|
30 |
/**
|
31 |
* A list of labels to ignore
|
32 |
*/
|
33 |
+
ignore_labels?: string[];
|
34 |
/**
|
35 |
* The number of overlapping tokens between chunks when splitting the input text.
|
36 |
*/
|
|
|
64 |
/**
|
65 |
* The predicted label for that group of tokens
|
66 |
*/
|
67 |
+
entity_group?: string;
|
68 |
label: unknown;
|
69 |
/**
|
70 |
* The associated score / probability
|
packages/tasks/src/tasks/token-classification/spec/input.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"description": "Additional inference parameters for Token Classification",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
-
"
|
24 |
"type": "array",
|
25 |
"items": {
|
26 |
"type": "string"
|
@@ -31,7 +31,7 @@
|
|
31 |
"type": "integer",
|
32 |
"description": "The number of overlapping tokens between chunks when splitting the input text."
|
33 |
},
|
34 |
-
"
|
35 |
"title": "TokenClassificationAggregationStrategy",
|
36 |
"type": "string",
|
37 |
"description": "The strategy used to fuse tokens based on model predictions",
|
|
|
20 |
"description": "Additional inference parameters for Token Classification",
|
21 |
"type": "object",
|
22 |
"properties": {
|
23 |
+
"ignore_labels": {
|
24 |
"type": "array",
|
25 |
"items": {
|
26 |
"type": "string"
|
|
|
31 |
"type": "integer",
|
32 |
"description": "The number of overlapping tokens between chunks when splitting the input text."
|
33 |
},
|
34 |
+
"aggregation_strategy": {
|
35 |
"title": "TokenClassificationAggregationStrategy",
|
36 |
"type": "string",
|
37 |
"description": "The strategy used to fuse tokens based on model predictions",
|
packages/tasks/src/tasks/token-classification/spec/output.json
CHANGED
@@ -7,7 +7,7 @@
|
|
7 |
"items": {
|
8 |
"type": "object",
|
9 |
"properties": {
|
10 |
-
"
|
11 |
"type": "string",
|
12 |
"description": "The predicted label for that group of tokens"
|
13 |
},
|
|
|
7 |
"items": {
|
8 |
"type": "object",
|
9 |
"properties": {
|
10 |
+
"entity_group": {
|
11 |
"type": "string",
|
12 |
"description": "The predicted label for that group of tokens"
|
13 |
},
|
packages/tasks/src/tasks/translation/inference.ts
CHANGED
@@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters {
|
|
30 |
/**
|
31 |
* Whether to clean up the potential extra spaces in the text output.
|
32 |
*/
|
33 |
-
|
34 |
/**
|
35 |
* Additional parametrization of the text generation algorithm
|
36 |
*/
|
37 |
-
|
38 |
/**
|
39 |
* The truncation strategy to use
|
40 |
*/
|
@@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest
|
|
50 |
* Outputs of inference for the Text2text Generation task
|
51 |
*/
|
52 |
export interface TranslationOutput {
|
|
|
53 |
/**
|
54 |
* The generated text.
|
55 |
*/
|
56 |
-
|
57 |
[property: string]: unknown;
|
58 |
}
|
|
|
30 |
/**
|
31 |
* Whether to clean up the potential extra spaces in the text output.
|
32 |
*/
|
33 |
+
clean_up_tokenization_spaces?: boolean;
|
34 |
/**
|
35 |
* Additional parametrization of the text generation algorithm
|
36 |
*/
|
37 |
+
generate_parameters?: { [key: string]: unknown };
|
38 |
/**
|
39 |
* The truncation strategy to use
|
40 |
*/
|
|
|
50 |
* Outputs of inference for the Text2text Generation task
|
51 |
*/
|
52 |
export interface TranslationOutput {
|
53 |
+
generatedText: unknown;
|
54 |
/**
|
55 |
* The generated text.
|
56 |
*/
|
57 |
+
generated_text?: string;
|
58 |
[property: string]: unknown;
|
59 |
}
|
packages/tasks/src/tasks/video-classification/inference.ts
CHANGED
@@ -26,16 +26,16 @@ export interface VideoClassificationParameters {
|
|
26 |
/**
|
27 |
* The sampling rate used to select frames from the video.
|
28 |
*/
|
29 |
-
|
30 |
-
|
31 |
/**
|
32 |
* The number of sampled frames to consider for classification.
|
33 |
*/
|
34 |
-
|
35 |
/**
|
36 |
* When specified, limits the output to the top K most probable classes.
|
37 |
*/
|
38 |
-
|
39 |
[property: string]: unknown;
|
40 |
}
|
41 |
/**
|
|
|
26 |
/**
|
27 |
* The sampling rate used to select frames from the video.
|
28 |
*/
|
29 |
+
frame_sampling_rate?: number;
|
30 |
+
function_to_apply?: ClassificationOutputTransform;
|
31 |
/**
|
32 |
* The number of sampled frames to consider for classification.
|
33 |
*/
|
34 |
+
num_frames?: number;
|
35 |
/**
|
36 |
* When specified, limits the output to the top K most probable classes.
|
37 |
*/
|
38 |
+
top_k?: number;
|
39 |
[property: string]: unknown;
|
40 |
}
|
41 |
/**
|
packages/tasks/src/tasks/video-classification/spec/input.json
CHANGED
@@ -19,19 +19,19 @@
|
|
19 |
"description": "Additional inference parameters for Video Classification",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
-
"
|
23 |
"title": "TextClassificationOutputTransform",
|
24 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
25 |
},
|
26 |
-
"
|
27 |
"type": "integer",
|
28 |
"description": "The number of sampled frames to consider for classification."
|
29 |
},
|
30 |
-
"
|
31 |
"type": "integer",
|
32 |
"description": "The sampling rate used to select frames from the video."
|
33 |
},
|
34 |
-
"
|
35 |
"type": "integer",
|
36 |
"description": "When specified, limits the output to the top K most probable classes."
|
37 |
}
|
|
|
19 |
"description": "Additional inference parameters for Video Classification",
|
20 |
"type": "object",
|
21 |
"properties": {
|
22 |
+
"function_to_apply": {
|
23 |
"title": "TextClassificationOutputTransform",
|
24 |
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
|
25 |
},
|
26 |
+
"num_frames": {
|
27 |
"type": "integer",
|
28 |
"description": "The number of sampled frames to consider for classification."
|
29 |
},
|
30 |
+
"frame_sampling_rate": {
|
31 |
"type": "integer",
|
32 |
"description": "The sampling rate used to select frames from the video."
|
33 |
},
|
34 |
+
"top_k": {
|
35 |
"type": "integer",
|
36 |
"description": "When specified, limits the output to the top K most probable classes."
|
37 |
}
|
packages/tasks/src/tasks/visual-question-answering/inference.ts
CHANGED
@@ -42,7 +42,7 @@ export interface VisualQuestionAnsweringParameters {
|
|
42 |
* return less than topk answers if there are not enough options available within the
|
43 |
* context.
|
44 |
*/
|
45 |
-
|
46 |
[property: string]: unknown;
|
47 |
}
|
48 |
export type VisualQuestionAnsweringOutput = VisualQuestionAnsweringOutputElement[];
|
|
|
42 |
* return less than topk answers if there are not enough options available within the
|
43 |
* context.
|
44 |
*/
|
45 |
+
top_k?: number;
|
46 |
[property: string]: unknown;
|
47 |
}
|
48 |
export type VisualQuestionAnsweringOutput = VisualQuestionAnsweringOutputElement[];
|