metadata

base_model: shibing624/text2vec-base-multilingual
datasets: []
language: []
library_name: sentence-transformers
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:64000
  - loss:DenoisingAutoEncoderLoss
widget:
  - source_sentence: >-
      च बच 𑀱चपच𑀟 पच पच 𑀙णच𑀪 𑀱च𑀳च 𑀠च𑀢 𑀳𑀫𑁦𑀞च𑀪न𑀣च पच 𑀞𑀱चलल𑁣
      पच𑀪𑀢𑀫𑀢𑀟 ल𑁣𑀞चत𑀢𑀟 𑀱च𑀳च𑀟 𑀳च𑀠न 𑀟च𑀳च𑀪च 𑀱च𑀟𑀣च च ल𑁦खच𑀟प𑁦
      लच𑀳 धलच𑀟च𑀳 𑀣𑀢ख𑀢𑀳𑀢𑀨𑀟
    sentences:
      - ' च𑀟 पच𑀟पच𑀟त𑁦 पच च 𑀠चप𑀳चण𑀢𑀟 गणच𑀪 पच𑀞च𑀪च𑀪 𑁦च𑀳पल𑁦𑀢ब𑀫 च 𑀤चढ𑁦𑀟 𑀲𑀢𑀣𑀣च ब𑀱च𑀟𑀢 𑀟च 𑀳𑀫𑁦𑀞च𑀪च𑀪 𑀭थथर च𑀠𑀠च पच 𑀳𑀫च 𑀞चण𑁦 च 𑀤चढ𑁦𑀟𑀯'
      - ' च 𑀪च𑀟च𑀪 ठ𑀖 बच 𑀱चपच𑀟 𑀘च𑀟च𑀢𑀪न च 𑀳𑀫𑁦𑀞च𑀪च𑀪 ठ𑀧ठ𑀰 पच 𑀞च𑀲च पच𑀪𑀢𑀫𑀢 पच 𑀤च𑀠च 𑀠चपच𑀳𑀫𑀢णच𑀪 𑀙णच𑀪 𑀱च𑀳च 𑀠च𑀢 𑀞च𑀪च𑀟त𑀢𑀟 𑀳𑀫𑁦𑀞च𑀪न𑀣च पच त𑀢 𑀞𑀱चलल𑁣 च पच𑀪𑀢𑀫𑀢𑀟 ढच𑀪तच ल𑁣𑀞चत𑀢𑀟 𑀣च पच त𑀢 च 𑀱च𑀳च𑀟 𑀣च 𑀳न𑀞च 𑀳च𑀠न 𑀟च𑀳च𑀪च 𑀱च𑀟𑀣च 𑀞न𑀟ब𑀢णच𑀪 पच ढच𑀪त𑁦ल𑁣𑀟च 𑀬ष𑀧 च 𑀞च𑀟 ल𑁦खच𑀟प𑁦 लच𑀳 धलच𑀟च𑀳 च 𑀱च𑀳च𑀟 ध𑀪𑀢𑀠𑁦𑀪च 𑀣𑀢ख𑀢𑀳𑀢𑀨𑀟 𑀯'
      - '  च 𑀞च𑀞च𑀪 𑀱च𑀳च𑀟𑀳च 𑀟च ढ𑀢णन च त𑀢𑀞𑀢𑀟 ठ𑀧ठ𑀭𑀦 णच 𑀤च𑀠च 𑀣च𑀟 𑀱च𑀳च च 𑀞नल𑁣ढ 𑀣𑀢𑀟 𑀞न𑀠च णच पच𑀢𑀠च𑀞च 𑀠न𑀳न 𑀳न𑀟 त𑀢 𑁦पपच𑀟 ठ𑀧ठ𑀭𑀦 𑀞न𑀠च च𑀟 𑀟च𑀣च 𑀳𑀫𑀢 ब𑀱च𑀟𑀢𑀟 बच𑀳च𑀪 𑀞च𑀞च𑀪 𑀱च𑀳च𑀯'
  - source_sentence: 𑀣च 𑀟च प𑀳𑁦𑀪𑁦𑀟 च
    sentences:
      - >-
        ल𑀢𑀳𑀳च𑀲𑀢𑀟 ल𑀢𑀳𑀳च𑀲𑀢𑀟 𑀫चझझ𑀢𑀟 𑀫चझझ𑀢𑀟
        𑀠चललच𑀞चबचढचञचणचपच𑀞च𑀣𑀣न𑀟 𑀣च च𑀞च ण𑀢 𑀟𑀢णणच 𑀟च 𑀠न𑀳च𑀠𑀠च𑀟 
        𑀣𑁣𑀞च𑀪 𑀫चझझ𑀢𑀟 𑀠चढन𑀞चत𑀢 𑀣𑁣𑀞च𑀪 𑀫च𑀞𑀞𑁣𑀞𑀢𑀟 𑀠च𑀪च ब𑀢𑀣च
        𑀣𑁣𑀞च𑀪 𑀫चझझ𑀢𑀟 𑀠च𑀢 ढ𑀢णच𑀟 𑀫च𑀪च𑀘𑀢 𑀣𑁣𑀞च𑀪 𑀫च𑀞𑀞𑁣𑀞𑀢𑀟
        𑀢ल𑀢𑀠𑀢 𑀦 𑀣𑁣𑀞च𑀪 𑀫च𑀞𑀞𑁣𑀞𑀢𑀟 प𑀳𑁣𑀫𑁣𑀟 𑀳𑁣𑀘𑁣𑀘𑀢 ब𑀢
        ढ𑀢लल 𑁣𑀲 𑀪𑀢ब𑀫प𑀳𑀦 𑀱च𑀟𑀣च च𑀞च 𑀲𑀢 𑀳च𑀟𑀢 𑀣च ब𑀢 ढ𑀢लल
        𑀣𑁣𑀞च𑀪 𑀙णच𑀟 लन𑀱च𑀣𑀢𑀦 पच𑀪𑁣𑀟 झन𑀟ब𑀢ण𑁣ण𑀢𑀟 𑀙णच𑀟 लन𑀱च𑀣𑀢
        𑀟च च𑀪𑁦𑀱चत𑀢𑀟 च𑀠𑀢𑀪𑀞च 𑀟𑁦 𑀳न𑀞च प𑀳च𑀪च 𑀣𑁣𑀞च𑀪 𑀫चझझ𑀢𑀟
        लचढन𑀪च𑀪𑁦𑀦 झन𑀟ब𑀢णच𑀪 लचढन𑀪च𑀪𑁦 पच च𑀠𑀢𑀪𑀞च पच ढनबच 𑀣𑁣𑀞च𑀪
        𑀫चझझ𑀢𑀟 𑀠न𑀫चलल𑀢 𑀞𑁣 च𑀘च𑀟𑀣च ठ𑀭 𑀞न𑀣𑀢𑀪𑀢𑀟 𑀫च𑀞𑀞𑀢 𑀟च
        𑀠च𑀫चल𑀢तत𑀢𑀦 𑀠च𑀪नढनपच𑀟 ढच𑀟 𑀣च𑀪𑀢णच 𑀣च 𑀠च𑀳न 𑀲च𑀳च𑀫च
        𑀣𑁣𑀞च𑀪 𑀫चझझ𑀢𑀟 𑀠च𑀢 ढच 𑀣च बन𑀣न𑀠𑀠च𑀱च𑀦 𑀣𑁣𑀟 𑀠च𑀳न ढच 𑀣च
        चबच𑀘𑀢 𑀞न𑀣𑀢𑀪𑀢𑀟 𑀫च𑀞𑀞𑁣𑀞𑀢𑀟 𑀘च𑀠𑀢𑀙च𑀟 𑀣𑁣𑀞च 𑀣𑁣𑀞च𑀪
        𑀫चझझ𑀢𑀟 𑀠च𑀳न 𑀤च𑁥𑁦 पच तचल𑀢𑀲𑁣𑀪𑀟𑀢च𑀦 𑀣च𑀢𑀣च𑀢पच𑀱च 𑀣च
        𑀣𑁣𑀞च𑀪 𑀫चझझ𑀢𑀟 𑀤च𑁥𑁦 𑀣𑁣𑀞च𑀪 𑀠न𑀳नलन𑀟त𑀢 पच 𑀫च𑀞𑀞𑁣𑀞𑀢𑀟
        𑀠चपच च 𑀠च𑀳चललचत𑀢𑀟 𑀟𑁦𑀱 𑀘𑁦𑀪𑀳𑁦ण 𑀣𑁣𑀞च𑀪 𑀫चझझ𑀢𑀟 𑀫चझझ𑀢𑀟
        त𑀢𑀟 𑀫च𑀟त𑀢 𑀣च 𑀪च𑀳𑀫च𑀱च 𑀞न𑀣𑀢𑀪𑀢𑀟 𑀫चझझ𑀢𑀟 𑀠च𑀳न 𑀞चप𑀢𑀟
        𑀞𑀢𑀪𑁦𑀣𑀢प𑀦 𑀱च𑀟𑀣च 𑀞𑁦 झन𑀟𑀳𑀫𑁦 च त𑀢𑀞𑀢𑀟 𑀣𑁣𑀞च𑀪 तच𑀪𑀣
        𑀟च 𑀳𑀫𑁦𑀞च𑀪चपच ठ𑀧𑀧थ 𑀣𑁣𑀞𑁣𑀞𑀢𑀟 𑀫चझझ𑀢𑀟 𑀠च𑀳न त𑀢 बचढच𑀟
        त𑀢𑀟 𑀣न𑀪𑀢 𑀣च 𑀘𑀢𑀠च𑀙𑀢 𑀝𑀣𑁣𑀞च𑀪 𑀫चझझ𑀢𑀟 𑀠च𑀳न त𑀢 बचढच 𑀣च
        𑀘𑀢𑀠च𑀙𑀢 𑀮𑀣नढच 𑀱च𑀳न चढनढन𑀱च𑀟  झ𑀢𑀪च𑀪 𑀫चझझ𑀢𑀟
        ढ𑀢𑀪𑀢पच𑀟𑀢णच 𑀫चझझ𑀢𑀟 𑀣च ढच 𑀤च च 𑀢णच पचनण𑁦𑀱च ढच 𑀣𑁣𑀞च𑀪
        𑀞च𑀪𑁦 𑀫च𑀞𑀞𑁣𑀞𑀢𑀟 𑀣च𑀟 च𑀣च𑀠 पच 𑀣न𑀟𑀢णच 𑀞च𑀙𑀢𑀣𑁣𑀘𑀢𑀟
        𑀞च𑀪𑁦 𑀫च𑀞𑀞𑀢𑀟 ढ𑀢ल𑀙च𑀣च𑀠च 𑀟च 𑀣न𑀟𑀢णच 𑀫च𑀞𑀞𑁣𑀞𑀢𑀟
        𑀫चल𑀢पपच प𑀳च𑀪𑀢𑀟 𑀣𑁣𑀞च 𑀣𑁣𑀞च𑀪 𑀫च𑀞𑀞𑀢 𑀟च
        ढ𑀢णन𑀠च𑀟च𑀤च𑀪पच𑀯
      - >-
        𑀭𑀰𑀮𑀦 𑀣च लच𑀠ढच𑀪 पचबनललच च त𑀢𑀞𑀢𑀟 𑀪न𑀞न𑀟𑀢𑀟 ढठ 𑀟च
        प𑀳𑁦𑀪𑁦𑀟 झच𑀳च च 𑀧𑀕𑀖र𑀯
      - >-
        द द द य𑀞न𑀠च 𑀞न ढचनपच 𑀱च चललच𑀫 𑀞न𑀠च 𑀞च 𑀣च 𑀞न 𑀫चञच𑀱च𑀟𑀢 𑀣च
        𑀳𑀫𑀢द 𑀞न𑀠च बच 𑀠च𑀫च𑀢𑀲च 𑀞न ण𑀢 𑀞णचनपचपच𑀱च𑀦 𑀞न𑀠च बच 𑀠चभ
        चढ𑁣पच 𑀤न𑀠न𑀟पच 𑀣च 𑀠च𑀪चणन 𑀣च 𑀠चपचलचनपच 𑀣च 𑀠चझ𑀱चढत𑀢
        𑀠चभचढनत𑀢𑀟 𑀞न𑀳च𑀟पच𑀦 𑀣च 𑀠चझ𑀱चढत𑀢 𑀠च𑀟𑀢𑀳च𑀟त𑀢𑀦 𑀣च
        चढ𑁣𑀞𑀢च ब𑁦𑀲𑁦 𑀣च 𑀩च𑀟 𑀫च𑀟णच 𑀣च चढ𑀢𑀟 𑀣च 𑀫च𑀟𑀟न𑀱च𑀟𑀞न 𑀟च
        𑀣च𑀠च 𑀳न𑀞च 𑀠चललच𑀞च𑀯
  - source_sentence: पच𑀞च 𑀪च𑀱च𑀪 च 𑀳च𑀪𑀞𑀢
    sentences:
      - ' णच पच𑀞च 𑀪च𑀱च𑀪 बच𑀟𑀢 च 𑀠चप𑀳चण𑀢𑀟𑀦 𑀳च𑀪𑀞𑀢 𑀣च𑀠ढच च त𑀢𑀞𑀢𑀟 𑀳𑀫𑀢𑀪𑀢𑀟𑀯'
      - थ𑀰𑀭𑀗𑀖ठ𑀰ठ𑁢थ𑁢𑀭      𑀦             𑀭𑀧𑀯
      - >-
        पचलचढ𑀢𑀘च𑀟 𑀣च 𑀪𑁦𑀣𑀢ण𑁣 च𑀟 बचढचपच𑀪 𑀣च पचलचढ𑀢𑀘𑀢𑀟 बच ब𑀫च𑀟च च
        𑀭थ𑁢𑀖 𑀞न𑀠च णच𑀟च 𑀞च𑀪𑀞च𑀳𑀫𑀢𑀟 𑀢𑀞𑁣𑀟 𑀘𑀢𑀫च𑀯
  - source_sentence: >-
      𑀱चप𑀳च 𑀣च 𑀟च𑀣च 𑀳न𑀦 𑀣𑀪𑀢ख𑁦 𑀞𑁣𑀱च𑀟𑁦 णच 𑀣च च ल𑁣𑀞चत𑀢𑀟
      𑀫च𑀳च𑀳𑀫𑁦𑀟𑀳च𑀯
    sentences:
      - ' ण𑀢𑀟 च𑀢𑀞𑀢 पच𑀪𑁦 𑀣च 𑀳चन𑀪च𑀟 𑀞न𑀟ब𑀢ण𑁣ण𑀢𑀟 णच𑀫न𑀣च𑀱च 𑀣𑁣𑀟 𑀞च𑀪च 𑀱चणच𑀪 𑀣च 𑀞च𑀟 णच𑀟 च𑀣च𑀠 𑀣च 𑀞च𑀪𑀲च𑀲च 𑀫𑀢𑀠𑀠च च प𑀳च𑀞च𑀟𑀢𑀟 चल𑀙न𑀠𑀠𑁣𑀠𑀢𑀟 णच𑀫न𑀣च𑀱च च 𑀠च𑀣च𑀣𑀢𑀟 𑀱च𑀣च𑀟𑀣च च𑀞च 𑀲चपचपपच𑀞च 𑀣च 𑀱च𑀣च𑀟𑀣च च𑀞𑁦 𑀤चलन𑀟पच च 𑀣न𑀟𑀢णच𑀯'
      - ' 𑀫𑁣पन𑀟च𑀟 च𑀟च 𑀱चप𑀳च 𑀳न पच 𑀫च𑀟णच𑀪 𑀟च𑀙न𑀪च𑀪 𑀣चन𑀞च𑀪 𑀫𑁣प𑁣 𑀣च𑀢𑀣च𑀢 𑀣च णच𑀣𑀣च च𑀞च 𑀟च𑀣च 𑀳न𑀦 पच𑀪𑁦 𑀣च  ब𑁦𑀟𑁦खच 𑀣𑀪𑀢ख𑁦 𑀣च 𑀞𑁦 पचढढचपच𑀪 𑀣च त𑁦𑀱च 𑀞𑁣𑀱च𑀟𑁦 𑀲𑀢𑀪च𑀠 णच त𑀢 बचढच 𑀣च 𑀞च𑀳च𑀟त𑁦𑀱च च त𑀢𑀞𑀢𑀟 बच𑀘𑁦𑀪𑁦𑀟 ल𑁣𑀞चत𑀢𑀟 𑀫च𑀳च𑀳𑀫𑁦𑀟𑀳च𑀯'
      - ' 𑀪च𑀠न𑀞च च त𑀢𑀞𑀢𑀟 झच𑀟च𑀟च𑀟 𑀪चढ𑁣 𑀣𑁣𑀟 𑀞च𑀪𑁦 प𑀳𑀢𑀪𑁦षप𑀳𑀢𑀪𑁦 𑀣चबच 𑀲𑁦𑀳च 𑀠चबच𑀟𑀢𑀟 𑀫𑁦𑀪ढ𑀢त𑀢𑀣𑁦𑀳 𑀣च लचलचपच 𑀪𑁣𑀣𑁦𑀟प𑀯'
  - source_sentence: 𑀠चपच𑀞𑀢𑀟 पच𑀟च ढनबच 𑀱च 𑀟च𑀠ध𑁣ल लच𑀣𑀢𑁦𑀳 𑀲त पच 𑀱च𑀳च𑀯
    sentences:
      - ' च 𑀠चपच𑀞𑀢𑀟 𑀞नल𑁣ढ पच𑀟च ढनबच 𑀱च 𑀞𑁣𑀠च𑀳 𑀟च𑀠ध𑁣ल लच𑀣𑀢𑁦𑀳 𑀲त पच 𑀟च𑀠𑀢ढ𑀢च 𑀱च𑀳च𑀯'
      - ' णच𑀟𑀞न𑀟च𑀟 बन𑀟𑀣न𑀠च𑀪 𑀘𑀣𑁦ण𑀣𑁦𑀫 ब𑀢𑀣च 𑀟𑁦 बच ब𑀢𑀣च𑀘𑁦 𑀠च𑀳न णच𑀱च 𑀟च झच𑀪𑀟𑀢 𑀟च 𑀭𑁢 𑀣च 𑀟च 𑀭𑀬 𑀟च चल𑁦धध𑀢𑀟 ढ𑁣न𑀪ब𑁦𑁣𑀢𑀳𑀢𑁦𑀦 𑀱चञच𑀟𑀣च 𑀞𑁦 ञचन𑀞𑁦 𑀣च 𑀤च𑀟𑁦𑀟 𑀣नप𑀳𑁦𑀯'
      - 𑀪च𑀪𑀪चढच 𑀳𑀫𑁦𑀞च𑀪न𑀟 णच 𑀞च𑀳च𑀟त𑁦 ठर𑀯

SentenceTransformer based on shibing624/text2vec-base-multilingual

This is a sentence-transformers model finetuned from shibing624/text2vec-base-multilingual. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Type: Sentence Transformer
Base model: shibing624/text2vec-base-multilingual
Maximum Sequence Length: 512 tokens
Output Dimensionality: 384 tokens
Similarity Function: Cosine Similarity

Model Sources

Documentation: Sentence Transformers Documentation
Repository: Sentence Transformers on GitHub
Hugging Face: Sentence Transformers on Hugging Face

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("T-Blue/tsdae_pro_text2vec")
# Run inference
sentences = [
    '𑀠चपच𑀞𑀢𑀟 पच𑀟च ढनबच 𑀱च 𑀟च𑀠ध𑁣ल लच𑀣𑀢𑁦𑀳 𑀲त पच 𑀱च𑀳च𑀯',
    ' च 𑀠चपच𑀞𑀢𑀟 𑀞नल𑁣ढ पच𑀟च ढनबच 𑀱च 𑀞𑁣𑀠च𑀳 𑀟च𑀠ध𑁣ल लच𑀣𑀢𑁦𑀳 𑀲त पच 𑀟च𑀠𑀢ढ𑀢च 𑀱च𑀳च𑀯',
    ' णच𑀟𑀞न𑀟च𑀟 बन𑀟𑀣न𑀠च𑀪 𑀘𑀣𑁦ण𑀣𑁦𑀫 ब𑀢𑀣च 𑀟𑁦 बच ब𑀢𑀣च𑀘𑁦 𑀠च𑀳न णच𑀱च 𑀟च झच𑀪𑀟𑀢 𑀟च 𑀭𑁢 𑀣च 𑀟च 𑀭𑀬 𑀟च चल𑁦धध𑀢𑀟 ढ𑁣न𑀪ब𑁦𑁣𑀢𑀳𑀢𑁦𑀦 𑀱चञच𑀟𑀣च 𑀞𑁦 ञचन𑀞𑁦 𑀣च 𑀤च𑀟𑁦𑀟 𑀣नप𑀳𑁦𑀯',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Training Details

Training Dataset

Unnamed Dataset

Size: 64,000 training samples
Columns: sentence_0 and sentence_1
Approximate statistics based on the first 1000 samples:
sentence_0 sentence_1
type string string
details
min: 3 tokens
mean: 37.42 tokens
max: 342 tokens

min: 4 tokens
mean: 89.84 tokens
max: 512 tokens

	sentence_0	sentence_1
type	string	string
details	min: 3 tokens mean: 37.42 tokens max: 342 tokens	min: 4 tokens mean: 89.84 tokens max: 512 tokens

Samples:

sentence_0	sentence_1
`𑀠नपच𑀟𑁦𑀫च𑀢𑀫न𑀱च𑀟 𑀭थथ𑀬𑀯`	`𑀞𑀢𑀣𑀢𑀣𑀣𑀢बच𑀪 𑀳च𑀟च𑀙च𑀞नल𑁣ढझच𑀳च𑀳𑀫𑁦𑀟 𑀣न𑀟𑀢णच𑀠च𑀟च𑀤च𑀪पच 𑀪चणचणणन𑀟 𑀠नपच𑀟𑁦𑀫च𑀢𑀫न𑀱च𑀟 𑀭थथ𑀬𑀯`
`च 𑀱च𑀘𑁦𑀟 𑀘च𑀠भ𑀢णणच 𑀠च𑀢 𑀞𑀢𑀳𑀫𑀢𑀟 पच बच𑀳𑀞𑀢णच𑀯`	`𑀘च𑀠भ𑀢णणच𑀪 च ल𑁣𑀞चत𑀢𑀟 𑀢पच त𑁦 पच ढ𑀢णन 𑀣च पच ण𑀢 𑀟च𑀠𑀢𑀘𑀢𑀟 𑀞𑁣𑀞च𑀪𑀢 𑀱च𑀘𑁦𑀟 𑀳च𑀠च𑀪 𑀣च 𑀘च𑀠भ𑀢णणच 𑀠च𑀢 𑀞𑀢𑀳𑀫𑀢𑀟 𑀞च𑀳च पच बच𑀳𑀞𑀢णच𑀯`
`𑀯`	`𑀯`

Loss: DenoisingAutoEncoderLoss

Training Hyperparameters

Non-Default Hyperparameters

per_device_train_batch_size: 16
per_device_eval_batch_size: 16
multi_dataset_batch_sampler: round_robin

All Hyperparameters

Click to expand

overwrite_output_dir: False
do_predict: False
eval_strategy: no
prediction_loss_only: True
per_device_train_batch_size: 16
per_device_eval_batch_size: 16
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 1
eval_accumulation_steps: None
learning_rate: 5e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1
num_train_epochs: 3
max_steps: -1
lr_scheduler_type: linear
lr_scheduler_kwargs: {}
warmup_ratio: 0.0
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
use_ipex: False
bf16: False
fp16: False
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: None
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: False
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: False
resume_from_checkpoint: None
hub_model_id: None
hub_strategy: every_save
hub_private_repo: False
hub_always_push: False
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: False
include_num_input_tokens_seen: False
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
eval_on_start: False
batch_sampler: batch_sampler
multi_dataset_batch_sampler: round_robin

Training Logs

Epoch	Step	Training Loss
0.125	500	4.0592
0.25	1000	1.6454
0.375	1500	1.4774
0.5	2000	1.4131
0.625	2500	1.3766
0.75	3000	1.3488
0.875	3500	1.3252
1.0	4000	1.3087
1.125	4500	1.2931
1.25	5000	1.2772
1.375	5500	1.2655
1.5	6000	1.2535
1.625	6500	1.243
1.75	7000	1.2305
1.875	7500	1.223
2.0	8000	1.216
2.125	8500	1.2073
2.25	9000	1.1999
2.375	9500	1.1935
2.5	10000	1.1872
2.625	10500	1.1804
2.75	11000	1.17
2.875	11500	1.167
3.0	12000	1.1623

Framework Versions

Python: 3.10.12
Sentence Transformers: 3.0.1
Transformers: 4.42.4
PyTorch: 2.3.1+cu121
Accelerate: 0.33.0
Datasets: 2.18.0
Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

DenoisingAutoEncoderLoss

@inproceedings{wang-2021-TSDAE,
    title = "TSDAE: Using Transformer-based Sequential Denoising Auto-Encoderfor Unsupervised Sentence Embedding Learning",
    author = "Wang, Kexin and Reimers, Nils and Gurevych, Iryna", 
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
    month = nov,
    year = "2021",
    address = "Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    pages = "671--688",
    url = "https://arxiv.org/abs/2104.06979",
}