metadata
vocab_size: 50257
n_positions: 1024
n_embd: 768
n_layer: 12
n_head: 12
n_inner: null
activation_function: gelu_new
resid_pdrop: 0.1
embd_pdrop: 0.1
attn_pdrop: 0.1
layer_norm_epsilon: 0.00001
initializer_range: 0.02
summary_type: cls_index
summary_use_proj: true
summary_activation: null
summary_first_dropout: 0.1
summary_proj_to_labels: true
scale_attn_weights: true
use_cache: true
scale_attn_by_inverse_layer_idx: false
reorder_and_upcast_attn: false
bos_token_id: 50256
eos_token_id: 50256
return_dict: true
output_hidden_states: false
output_attentions: false
torchscript: false
torch_dtype: float32
use_bfloat16: false
tf_legacy_loss: false
pruned_heads: {}
tie_word_embeddings: true
chunk_size_feed_forward: 0
is_encoder_decoder: false
is_decoder: false
cross_attention_hidden_size: null
add_cross_attention: false
tie_encoder_decoder: false
max_length: 20
min_length: 0
do_sample: false
early_stopping: false
num_beams: 1
num_beam_groups: 1
diversity_penalty: 0
temperature: 1
top_k: 50
top_p: 1
typical_p: 1
repetition_penalty: 1
length_penalty: 1
no_repeat_ngram_size: 0
encoder_no_repeat_ngram_size: 0
bad_words_ids: null
num_return_sequences: 1
output_scores: false
return_dict_in_generate: false
forced_bos_token_id: null
forced_eos_token_id: null
remove_invalid_values: false
exponential_decay_length_penalty: null
suppress_tokens: null
begin_suppress_tokens: null
architectures:
- GPT2LMHeadModel
finetuning_task: null
id2label:
'0': LABEL_0
'1': LABEL_1
label2id:
LABEL_0: 0
LABEL_1: 1
tokenizer_class: null
prefix: null
pad_token_id: null
sep_token_id: null
decoder_start_token_id: null
task_specific_params:
conversational:
early_stopping: true
length_penalty: 1.5
max_length: 1024
min_length: 20
no_repeat_ngram_size: 3
num_beams: 5
temperature: 0.7
top_k: 50
top_p: 0.9
problem_type: null
_name_or_path: ayjays132/Phillnet2
transformers_version: 4.37.2
config:
activation_function: gelu_new
attn_pdrop: 0.1
embd_pdrop: 0.1
initializer_range: 0.02
layer_norm_epsilon: 0.00001
n_ctx: 2048
n_embd: 2048
n_head: 16
n_layer: 24
n_positions: 2048
n_special: 0
resid_pdrop: 0.1
summary_activation: null
summary_first_dropout: 0.1
summary_proj_to_labels: true
summary_type: cls_index
summary_use_proj: true
task_specific_params:
conversational:
max_length: 1024
min_length: 20
length_penalty: 1.5
num_beams: 5
early_stopping: true
no_repeat_ngram_size: 3
temperature: 0.7
top_k: 50
top_p: 0.9
license: apache-2.0
datasets:
- LDJnr/Capybara
language:
- en
tags:
- conversational
metrics:
- perplexity
- accuracy
- bleu
pipeline_tag: conversational
library_name: transformers
๐ Introducing CustomGPT2Conversational ๐ค
๐ Dive into the world of limitless conversations with our fine-tuned model!
๐จ Unique Features:
- ๐ฌ Seamlessly generates engaging and contextually relevant responses.
- ๐ง Fine-tuned for conversational tasks, ensuring natural and fluid interactions.
- ๐ Employs cutting-edge technologies to deliver state-of-the-art performance.
๐ง Technical Specifications:
- ๐ Architecture: GPT2LMHeadModel
- ๐ Parameters: 24 layers, 2048 hidden size, 16 attention heads
- ๐งฎ Dropout: Attention dropout of 0.1, embedding dropout of 0.1, and residual dropout of 0.1
- ๐ Activation Function: GELU (Gaussian Error Linear Unit)
- ๐ Metrics: Achieves outstanding perplexity and accuracy scores
๐ Configuration Details:
- ๐ Task-Specific Parameters: Early stopping, length penalty, beam search, and more to enhance conversational quality.
- ๐ Dynamic Adaptation: Adaptable to changing environmental conditions for optimal performance.
- ๐ Language Support: Primarily English with potential for multilingual applications.
๐ Pipeline Tag: Conversational ๐ฃ๏ธ
๐ Dataset: Utilizes data from the vicgalle/alpaca-gpt4 dataset ๐ฆ
๐ Library and Version: Transformers v4.37.2 ๐
๐ License: Apache-2.0 ๐
Get ready to experience the next level of conversational AI with CustomGPT2Conversational! ๐๐ค