diff --git "a/awesome-japanese-nlp-resources-search.json" "b/awesome-japanese-nlp-resources-search.json" --- "a/awesome-japanese-nlp-resources-search.json" +++ "b/awesome-japanese-nlp-resources-search.json" @@ -10,7 +10,8 @@ "latest_commit": "2022-12-14 01:58:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Official Implementation of OCR-free Document Understanding Transformer (Donut) and Synthetic Document Generator (SynthDoG), ECCV 2022", @@ -37,7 +38,8 @@ "latest_commit": "2024-02-22 00:57:00", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "xlm-roberta-ner-japanese (Japanese caption : 日本語の固有表現抽出のモデル)", @@ -50,7 +52,8 @@ "latest_commit": "2024-07-12 00:01:56", "languages": [], "model_or_dataset": "model", - "model_size": 0.277 + "model_size": 0.277, + "model_architectures": "RobertaForTokenClassification" }, { "description": "Neologism dictionary based on the language resources on the Web for mecab-ipadic", @@ -106,7 +109,8 @@ "latest_commit": "2024-02-22 00:57:37", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "Engineer Vocabulary List in Japanese/English", @@ -160,7 +164,8 @@ "latest_commit": "2023-08-25 02:53:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeModel" }, { "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXの音声合成エンジン", @@ -264,7 +269,8 @@ "latest_commit": "2021-09-23 15:45:41", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "Japanese morphological analysis engine written in pure Python", @@ -309,7 +315,8 @@ "latest_commit": "2022-06-22 15:34:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "VisionEncoderDecoderModel" }, { "description": "JavaScript implementation of Japanese morphological analyzer", @@ -485,7 +492,8 @@ "latest_commit": "2024-02-22 00:57:58", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "オープンソースの住所正規化ライブラリ。", @@ -513,7 +521,8 @@ "latest_commit": "2021-09-23 15:45:24", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "Automatically exported from code.google.com/p/mozc-morse", @@ -542,7 +551,8 @@ "latest_commit": "2024-06-28 15:18:20", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Code for producing Japanese pretrained models provided by rinna Co., Ltd.", @@ -569,7 +579,8 @@ "latest_commit": "2024-04-17 11:39:38", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertModel" }, { "description": "mecab-python. mecab-python. you can find original version here:http://taku910.github.io/mecab/", @@ -656,7 +667,8 @@ "latest_commit": "2023-08-29 03:46:15", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "BERT base Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)", @@ -669,7 +681,8 @@ "latest_commit": "2023-05-19 00:31:53", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForPreTraining" }, { "description": "This is a Japanese sentence-BERT model.", @@ -682,7 +695,8 @@ "latest_commit": "2024-04-17 11:40:03", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForMaskedLM" }, { "description": "Lightweight converter from Japanese Kana-kanji sentences into Kana-Roman.", @@ -709,7 +723,8 @@ "latest_commit": "2024-06-29 08:56:26", "languages": [], "model_or_dataset": "model", - "model_size": 6.83 + "model_size": 6.83, + "model_architectures": "LlamaForCausalLM" }, { "description": "Sentence BERT base Japanese model This repository contains a Sentence BERT base model for Japanese.", @@ -722,7 +737,8 @@ "latest_commit": "2021-08-08 15:47:42", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertModel" }, { "description": "テキストを壱百満天原サロメお嬢様風の口調に変換します", @@ -808,7 +824,8 @@ "latest_commit": "2024-05-19 06:07:36", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "This is a Japanese sentence-LUKE model.", @@ -821,7 +838,8 @@ "latest_commit": "2023-03-20 01:32:34", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeModel" }, { "description": "NDLOCRのアプリケーション", @@ -1122,7 +1140,8 @@ "latest_commit": "2023-05-12 14:13:03", "languages": [], "model_or_dataset": "model", - "model_size": 0.137 + "model_size": 0.137, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Japanese text normalizer for mecab-neologd", @@ -1165,7 +1184,8 @@ "latest_commit": "2024-06-10 03:57:05", "languages": [], "model_or_dataset": "model", - "model_size": 0.107 + "model_size": 0.107, + "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "azooKey: A Japanese Keyboard iOS Application Fully Developed in Swift", @@ -1221,7 +1241,8 @@ "latest_commit": "2024-07-20 08:42:32", "languages": [], "model_or_dataset": "model", - "model_size": 0.197 + "model_size": 0.197, + "model_architectures": "CLIPModel" }, { "description": "A tool for dividing the Japanese full name into a family name and a given name.", @@ -1248,7 +1269,8 @@ "latest_commit": "2023-08-15 17:45:04", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MarianMTModel" }, { "description": "JMMLU Japanese Massive Multitask Language Understanding Benchmark JMMLU is a four-choice question set consisting of Japanese-translated questions of a portion of MMLU (Paper, Github) (Translated questions) and questions based on unique Japanese cultural context (Japanese questions).", @@ -1261,7 +1283,8 @@ "latest_commit": "2024-02-27 05:22:30", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "This repository is for building Windows 64-bit MeCab binary and improving MeCab Python binding.", @@ -1309,7 +1332,8 @@ "latest_commit": "2023-08-15 17:40:58", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MarianMTModel" }, { "description": "Using Vim as an input method for X11 apps", @@ -1378,7 +1402,8 @@ "latest_commit": "2023-10-01 08:58:54", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Vaporetto: Very Accelerated POintwise pREdicTion based TOkenizer", @@ -1419,7 +1444,8 @@ "latest_commit": "2024-06-26 02:56:23", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "日本語OCR", @@ -1518,7 +1544,8 @@ "latest_commit": "2024-05-21 11:23:51", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "BERT base Japanese (unidic-lite with whole word masking, jawiki-20200831)", @@ -1531,7 +1558,8 @@ "latest_commit": "2021-09-23 15:45:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "LaBSE Model description Language-agnostic BERT Sentence Encoder (LaBSE) is a BERT-based model trained for sentence embedding for 109 languages.", @@ -1544,7 +1572,8 @@ "latest_commit": "2023-10-18 23:23:16", "languages": [], "model_or_dataset": "model", - "model_size": 0.47100000000000003 + "model_size": 0.47100000000000003, + "model_architectures": "BertModel" }, { "description": "Python wrapper for OpenJTalk", @@ -1600,7 +1629,8 @@ "latest_commit": "2022-10-21 10:57:40", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "llm-book/bert-base-japanese-v3-ner-wikipedia-dataset 「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。 ", @@ -1613,7 +1643,8 @@ "latest_commit": "2023-07-25 13:32:15", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "Java library for identifying Japanese characters from images", @@ -1725,7 +1756,8 @@ "latest_commit": "2024-07-06 15:02:39", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "国会議案データベース:衆議院", @@ -1807,7 +1839,8 @@ "latest_commit": "2023-12-01 09:50:34", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DistilBertForMaskedLM" }, { "description": "JapaneseEmbeddingEval", @@ -1849,7 +1882,8 @@ "latest_commit": "2024-01-25 08:05:12", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "このドキュメントの日本語版はまだ作成中です。", @@ -1862,7 +1896,8 @@ "latest_commit": "2024-01-27 15:30:00", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "HF_ColBERT" }, { "description": "Japanese negative positive classification.日本語文書のネガポジを判定。", @@ -1918,7 +1953,8 @@ "latest_commit": "2022-07-31 08:20:41", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5Model" }, { "description": "Phishing URL dataset from JPCERT/CC", @@ -1945,7 +1981,8 @@ "latest_commit": "2024-07-27 05:59:10", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": null }, { "description": "WRIME: 主観と客観の感情分析データセット", @@ -2038,7 +2075,8 @@ "latest_commit": "2024-05-12 06:06:51", "languages": [], "model_or_dataset": "model", - "model_size": 13.4 + "model_size": 13.4, + "model_architectures": null }, { "description": "A set of metrics for feature selection from text data", @@ -2122,7 +2160,8 @@ "latest_commit": "2024-01-20 14:45:14", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "luke-japanese-large luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", @@ -2135,7 +2174,8 @@ "latest_commit": "2022-11-09 11:18:56", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeForMaskedLM" }, { "description": "Wikipediaを用いた日本語の固有表現抽出データセット", @@ -2178,7 +2218,8 @@ "latest_commit": "2024-07-19 03:08:14", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Node.js module for converting Japanese Hiragana and Katakana script to, and from, Romaji using Hepburn romanisation", @@ -2205,7 +2246,8 @@ "latest_commit": "2023-05-19 00:39:44", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForPreTraining" }, { "description": "Kanji usage frequency data collected from various sources", @@ -2233,7 +2275,8 @@ "latest_commit": "2024-07-06 15:18:14", "languages": [], "model_or_dataset": "model", - "model_size": 6.83 + "model_size": 6.83, + "model_architectures": "LlamaForCausalLM" }, { "description": "deep-learning-with-pytorchの日本語版repositoryです。", @@ -2277,7 +2320,8 @@ "latest_commit": "2024-07-01 06:24:48", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Swallow-MX-8x7b-NVE-v0.1 Our Swallow-MX-8x7b-NVE-v0.1 model has undergone continuous pre-training from the Mixtral-8x7B-Instruct-v0.1, primarily with the addition of Japanese language data.", @@ -2290,7 +2334,8 @@ "latest_commit": "2024-05-03 18:51:12", "languages": [], "model_or_dataset": "model", - "model_size": 46.7 + "model_size": 46.7, + "model_architectures": "MixtralForCausalLM" }, { "description": "CyberAgentLM2-7B (CALM2-7B)", @@ -2303,7 +2348,8 @@ "latest_commit": "2023-11-02 05:46:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Python 3 library for manipulating Jim Breen's JMdict, KanjiDic2, JMnedict and kanji-radical mappings", @@ -2330,7 +2376,8 @@ "latest_commit": "2023-11-16 14:27:48", "languages": [], "model_or_dataset": "model", - "model_size": 6.85 + "model_size": 6.85, + "model_architectures": null }, { "description": "lists of text corpus and more (mainly Japanese)", @@ -2399,7 +2446,8 @@ "latest_commit": "2023-06-09 23:08:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "japanese-roberta-base This repository provides a base-sized Japanese RoBERTa model.", @@ -2412,7 +2460,8 @@ "latest_commit": "2024-07-20 07:44:40", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "RobertaForMaskedLM" }, { "description": "複数の前処理を構成して管理するテキスト前処理ツール", @@ -2453,7 +2502,8 @@ "latest_commit": "2024-07-20 07:55:19", "languages": [], "model_or_dataset": "model", - "model_size": 3.76 + "model_size": 3.76, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "自動生成のマルチターンデータセット オープンなデータソースから、Calm3-22bを使ってQ&Aを自動生成したものです。 一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました。 データソース はじめの質問(q1)を、種々のデータソースから収集しました。その後のやりとりはすべて、Calmが生成しました。質問文については、元データのライセンスに準拠します。 oasst2-33k-ja apache 2.0 databricks-dolly-15k-ja cc-by-sa-3.0 minnade CC0 cyberagent/chatbot-arena-ja-calm2-7b-chat-experimental cc-by-4.0", @@ -2466,7 +2516,8 @@ "latest_commit": "2024-07-17 10:03:02", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "A fast LSTM Language Model for large vocabulary language like Japanese and Chinese", @@ -2521,7 +2572,8 @@ "latest_commit": "2021-12-05 21:12:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "ElectraModel" }, { "description": "Model Card for Japanese character-level DeBERTa V2 large Model description This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", @@ -2534,7 +2586,8 @@ "latest_commit": "2023-09-15 03:48:28", "languages": [], "model_or_dataset": "model", - "model_size": 0.33 + "model_size": 0.33, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Yet Another Japanese Dependency Structure Analyzer", @@ -2580,7 +2633,8 @@ "latest_commit": "2023-04-07 17:27:53", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "Llama-3-ELYZA-JP-8B-GGUF Model Description Llama-3-ELYZA-JP-8B is a large language model trained by ELYZA, Inc.", @@ -2593,7 +2647,8 @@ "latest_commit": "2024-06-26 02:56:52", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "JaQuAD: Japanese Question Answering Dataset for Machine Reading Comprehension (2022, Skelter Labs)", @@ -2634,7 +2689,8 @@ "latest_commit": "2024-07-20 07:50:47", "languages": [], "model_or_dataset": "model", - "model_size": 0.361 + "model_size": 0.361, + "model_architectures": "GPT2LMHeadModel" }, { "description": "BERT large Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)", @@ -2647,7 +2703,8 @@ "latest_commit": "2023-05-19 00:47:40", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForPreTraining" }, { "description": "Llama-3.1-8B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3.1-8B-Instructのggufフォーマット変換版です。 ", @@ -2660,7 +2717,8 @@ "latest_commit": "2024-07-24 21:04:40", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "OpenCALM-7B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", @@ -2673,7 +2731,8 @@ "latest_commit": "2023-05-18 01:12:08", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "bert-large-japanese-upos Model Description", @@ -2686,7 +2745,8 @@ "latest_commit": "2022-09-18 19:43:53", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "An example usage of JParaCrawl pre-trained Neural Machine Translation (NMT) models.", @@ -2747,7 +2807,8 @@ "latest_commit": "2024-05-14 06:42:38", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-13b Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", @@ -2760,7 +2821,8 @@ "latest_commit": "2023-12-27 01:41:15", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF Original Model elyza/ELYZA-japanese-Llama-2-13b-fast-instruct Run with LlamaEdge LlamaEdge version: v0.2.8 and above Prompt template Prompt type: llama-2-chat Prompt string <s>[INST] <<SYS>> {{ system_prompt }} <</SYS>> {{ user_msg_1 }}", @@ -2773,7 +2835,8 @@ "latest_commit": "2024-03-20 07:21:25", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "Llama-3.1-70B-Japanese-Instruct-2407 Model Description This is a Japanese continually pre-trained model based on meta-llama/Meta-Llama-3.1-70B-Instruct.", @@ -2786,7 +2849,8 @@ "latest_commit": "2024-07-26 02:30:17", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": "LlamaForCausalLM" }, { "description": "CJK computer science terms comparison / 中日韓電腦科學術語對照 / 日中韓のコンピュータ科学の用語対照 / 한·중·일 전산학 용어 대조", @@ -2840,7 +2904,8 @@ "latest_commit": "2024-07-01 06:24:32", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": "LlamaForCausalLM" }, { "description": "reazonspeech-nemo-v2 reazonspeech-nemo-v2 is an automatic speech recognition model trained on ReazonSpeech v2.0 corpus.", @@ -2853,7 +2918,8 @@ "latest_commit": "2024-02-14 01:32:45", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", @@ -2866,7 +2932,8 @@ "latest_commit": "2023-08-29 03:45:51", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "RetrievaBERT Model The RetrievaBERT is the pre-trained Transformer Encoder using Megatron-LM.", @@ -2879,7 +2946,8 @@ "latest_commit": "2024-07-09 05:36:08", "languages": [], "model_or_dataset": "model", - "model_size": 1.3 + "model_size": 1.3, + "model_architectures": "RetrievaBertForMaskedLM" }, { "description": "PLaMo-13B Model Description PLaMo-13B is a LLaMA-based 13B model pre-trained on English and Japanese open datasets, developed by Preferred Networks, Inc. ", @@ -2892,7 +2960,8 @@ "latest_commit": "2023-10-10 15:24:54", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "PlamoForCausalLM" }, { "description": "Asynchronous japanese morphological analyser using MeCab.", @@ -2935,7 +3004,8 @@ "latest_commit": "2023-09-11 01:10:36", "languages": [], "model_or_dataset": "model", - "model_size": 6.05 + "model_size": 6.05, + "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -2948,7 +3018,8 @@ "latest_commit": "2024-06-29 08:56:29", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "Kotoba-Whisper-v1.1 Kotoba-Whisper-v1.1 is a Japanese ASR model based on kotoba-tech/kotoba-whisper-v1.0, with additional postprocessing stacks integrated as pipeline.", @@ -2961,7 +3032,8 @@ "latest_commit": "2024-05-08 15:34:40", "languages": [], "model_or_dataset": "model", - "model_size": 0.756 + "model_size": 0.756, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "Llama 3 Youko 8B (rinna/llama-3-youko-8b)", @@ -2974,7 +3046,8 @@ "latest_commit": "2024-07-25 05:14:42", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -2987,7 +3060,8 @@ "latest_commit": "2024-06-29 08:56:17", "languages": [], "model_or_dataset": "model", - "model_size": 6.83 + "model_size": 6.83, + "model_architectures": "LlamaForCausalLM" }, { "description": "Model Card for Model ID 実験モデルです /", @@ -3000,7 +3074,8 @@ "latest_commit": "2024-04-28 07:46:32", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese SimCSE (BERT-base)", @@ -3013,7 +3088,8 @@ "latest_commit": "2023-01-27 06:44:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertModel" }, { "description": "Model Card for Japanese DeBERTa V3 base Model description This is a Japanese DeBERTa V3 base model pre-trained on LLM-jp corpus v1.0.", @@ -3026,7 +3102,8 @@ "latest_commit": "2024-04-28 06:08:55", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "OpenCALM-Small Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", @@ -3039,7 +3116,8 @@ "latest_commit": "2023-05-18 01:10:33", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Llama-3-Swallow-70B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3-Swallow-70B-Instruct-v0.1のggufフォーマット変換版です。 ", @@ -3052,7 +3130,8 @@ "latest_commit": "2024-07-07 05:04:16", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": null }, { "description": "Japanese Company Lexicon (JCLdic)", @@ -3093,7 +3172,8 @@ "latest_commit": "2023-08-29 03:47:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "haqishen-Llama-3-8B-Japanese-Instruct-gguf haqishenさんが公開しているLlama-3-8B-Japanese-Instructのggufフォーマット変換版です。 ", @@ -3106,7 +3186,8 @@ "latest_commit": "2024-04-23 14:54:23", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Llama3 Swallow", @@ -3119,7 +3200,8 @@ "latest_commit": "2024-07-19 08:08:59", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-gpt2-xsmall", @@ -3132,7 +3214,8 @@ "latest_commit": "2024-07-20 07:48:11", "languages": [], "model_or_dataset": "model", - "model_size": 0.0437 + "model_size": 0.0437, + "model_architectures": "GPT2LMHeadModel" }, { "description": "rinna/japanese-hubert-base Overview This is a Japanese HuBERT Base model trained by rinna Co.", @@ -3145,7 +3228,8 @@ "latest_commit": "2024-07-20 08:55:38", "languages": [], "model_or_dataset": "model", - "model_size": 0.09440000000000001 + "model_size": 0.09440000000000001, + "model_architectures": "HubertModel" }, { "description": "Japanese Morphological Analyzer written in Rust", @@ -3170,7 +3254,8 @@ "latest_commit": "2023-05-15 12:58:08", "languages": [], "model_or_dataset": "model", - "model_size": 0.41400000000000003 + "model_size": 0.41400000000000003, + "model_architectures": "LukeForSequenceClassification" }, { "description": "50k English-Japanese Parallel Corpus for Machine Translation Benchmark.", @@ -3195,7 +3280,8 @@ "latest_commit": "2024-06-26 02:56:39", "languages": [], "model_or_dataset": "model", - "model_size": 1.98 + "model_size": 1.98, + "model_architectures": "LlamaForCausalLM" }, { "description": "bert-base-japanese-upos Model Description", @@ -3208,7 +3294,8 @@ "latest_commit": "2022-09-18 19:43:26", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "Gemma-Mling: Multilingual Gemma Update @ 2024.04.15: First release of Gemma-Mling 7B model Original Gemma Model Page:", @@ -3221,7 +3308,8 @@ "latest_commit": "2024-04-18 14:28:20", "languages": [], "model_or_dataset": "model", - "model_size": 8.54 + "model_size": 8.54, + "model_architectures": "GemmaForCausalLM" }, { "description": "Llama-3.1-8B-EZO-1.1-it-gguf HODACHIさんが公開しているLlama-3.1-8B-EZO-1.1-itのggufフォーマット変換版です。 ", @@ -3234,7 +3322,8 @@ "latest_commit": "2024-07-31 12:47:45", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Fish Speech V1.2 Fish Speech V1.2 is a leading text-to-speech (TTS) model trained on 300k hours of English, Chinese, and Japanese audio data.", @@ -3247,7 +3336,8 @@ "latest_commit": "2024-08-02 08:13:06", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "HODACHI-EZO-Common-9B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Common-9B-gemma-2-itのggufフォーマット変換版です。 ", @@ -3260,7 +3350,8 @@ "latest_commit": "2024-07-15 16:20:33", "languages": [], "model_or_dataset": "model", - "model_size": 9.24 + "model_size": 9.24, + "model_architectures": null }, { "description": "Japanese-StableLM-Base-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", @@ -3273,7 +3364,8 @@ "latest_commit": "2023-12-19 06:43:01", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "BERTによる日本語固有表現抽出のモデル BertForTokenClassificationを用いて、日本語の文から固有表現を抽出します。 ", @@ -3286,7 +3378,8 @@ "latest_commit": "2022-09-26 12:13:44", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "natto-py combines the Python programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.", @@ -3344,7 +3437,8 @@ "latest_commit": "2023-03-23 07:31:19", "languages": [], "model_or_dataset": "model", - "model_size": 0.0101 + "model_size": 0.0101, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "FINGU-AI/FinguAI-Chat-v1 Overview The FINGU-AI/FinguAI-Chat-v1 model offers a specialized curriculum tailored to English, Korean, and Japanese speakers interested in finance, investment, and legal frameworks.", @@ -3357,7 +3451,8 @@ "latest_commit": "2024-03-22 09:36:44", "languages": [], "model_or_dataset": "model", - "model_size": 0.464 + "model_size": 0.464, + "model_architectures": "Qwen2ForCausalLM" }, { "description": "stockmark/stockmark-13b Stockmark-13b is a 13 billion parameter LLM pretrained from scratch based on Japanese corpus of about 220B tokens.", @@ -3370,7 +3465,8 @@ "latest_commit": "2024-05-17 06:15:56", "languages": [], "model_or_dataset": "model", - "model_size": 13.2 + "model_size": 13.2, + "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-gpt-neox-3.6b-instruction-ppo Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", @@ -3383,7 +3479,8 @@ "latest_commit": "2024-07-20 07:58:49", "languages": [], "model_or_dataset": "model", - "model_size": 3.76 + "model_size": 3.76, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Converted from clu-ling/whisper-large-v2-japanese-5k-steps using CTranslate2.", @@ -3396,7 +3493,8 @@ "latest_commit": "2023-07-03 18:42:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "gpt-neox-japanese-2.7b", @@ -3409,7 +3507,8 @@ "latest_commit": "2023-04-10 05:12:30", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXJapaneseForCausalLM" }, { "description": "Kotoba-Whisper Kotoba-Whisper is a collection of distilled Whisper models for Japanese ASR, developed through the collaboration bewteen Asahi Ushio and Kotoba Technologies.", @@ -3422,7 +3521,8 @@ "latest_commit": "2024-05-08 12:40:53", "languages": [], "model_or_dataset": "model", - "model_size": 0.756 + "model_size": 0.756, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "bert-base-japanese-v3-marc_ja 「大規模言語モデル入門」の第5章で紹介している(感情分析)のモデルです。 ", @@ -3435,7 +3535,8 @@ "latest_commit": "2023-07-24 06:49:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "Pytorch implementation and pre-trained Japanese model for CANINE, the efficient character-level transformer.", @@ -3476,7 +3577,8 @@ "latest_commit": "2023-11-16 14:27:23", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": null }, { "description": "sbert-jsnli-luke-japanese-base-lite This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.", @@ -3489,7 +3591,8 @@ "latest_commit": "2023-01-10 12:36:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeModel" }, { "description": "ElanMT ElanMT-BT-ja-en is a Japanese to English translation model developed by ELAN MITSUA Project / Abstract Engine.", @@ -3502,7 +3605,8 @@ "latest_commit": "2024-05-20 01:56:57", "languages": [], "model_or_dataset": "model", - "model_size": 0.0606 + "model_size": 0.0606, + "model_architectures": "MarianMTModel" }, { "description": "bert-base-japanese-v3-jsts 「大規模言語モデル入門」の第5章で紹介している(意味類似度計算)のモデルです。 ", @@ -3515,7 +3619,8 @@ "latest_commit": "2023-07-29 11:27:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "BERT base Japanese (character tokenization, whole word masking enabled)", @@ -3528,7 +3633,8 @@ "latest_commit": "2024-02-22 00:58:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "Stockmark-13b-instruct Stockmark-13b-instruct is an instruction-tuned version of Stockmark-13b, a 13 billion parameter Japanese LLM.", @@ -3541,7 +3647,8 @@ "latest_commit": "2023-11-08 17:02:17", "languages": [], "model_or_dataset": "model", - "model_size": 13.2 + "model_size": 13.2, + "model_architectures": "LlamaForCausalLM" }, { "description": "rinna/youri-7b Overview We conduct continual pre-training of llama2-7b on 40B tokens from a mixture of Japanese and English datasets.", @@ -3554,7 +3661,8 @@ "latest_commit": "2024-07-22 08:01:22", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-large-lm-3.6b", @@ -3567,7 +3675,8 @@ "latest_commit": "2023-08-17 01:06:17", "languages": [], "model_or_dataset": "model", - "model_size": 3.68 + "model_size": 3.68, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "japanese-gpt-1b This repository provides a 1.3B-parameter Japanese GPT model.", @@ -3580,7 +3689,8 @@ "latest_commit": "2024-07-20 07:52:31", "languages": [], "model_or_dataset": "model", - "model_size": 1.33 + "model_size": 1.33, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Phi-3-mini-128k-instruct-gguf microsoftさんが公開しているPhi-3-mini-128k-instructのggufフォーマット変換版です。 ", @@ -3593,7 +3703,8 @@ "latest_commit": "2024-04-24 14:24:09", "languages": [], "model_or_dataset": "model", - "model_size": 3.82 + "model_size": 3.82, + "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -3606,7 +3717,8 @@ "latest_commit": "2024-06-29 09:00:15", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", @@ -3619,7 +3731,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": 1.64 + "model_size": 1.64, + "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -3632,7 +3745,8 @@ "latest_commit": "2024-06-29 08:56:31", "languages": [], "model_or_dataset": "model", - "model_size": 69.2 + "model_size": 69.2, + "model_architectures": "LlamaForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -3645,7 +3759,8 @@ "latest_commit": "2024-06-29 08:56:21", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Llama-3-Swallow-8B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3-Swallow-8B-Instruct-v0.1のggufフォーマット変換版です。 ", @@ -3658,7 +3773,8 @@ "latest_commit": "2024-07-02 10:43:55", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Llama-3-ELYZA-JP-8B-gguf elyzaさんが公開しているLlama-3-ELYZA-JP-8Bのggufフォーマット変換版です。 ", @@ -3671,7 +3787,8 @@ "latest_commit": "2024-06-26 17:55:35", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "ELYZA-tasks-100: 日本語instructionモデル評価データセット Data Description 本データセットはinstruction-tuningを行ったモデルの評価用データセットです。", @@ -3684,7 +3801,8 @@ "latest_commit": "2023-12-27 18:17:36", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "A Python Module for JUMAN++/KNP", @@ -3737,7 +3855,8 @@ "latest_commit": "2023-11-16 14:28:24", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": null }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", @@ -3750,7 +3869,8 @@ "latest_commit": "2024-01-24 05:54:38", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "wav2vec2-base-asr", @@ -3763,7 +3883,8 @@ "latest_commit": "2024-04-14 14:00:30", "languages": [], "model_or_dataset": "model", - "model_size": 0.0945 + "model_size": 0.0945, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Llama-3.1-70B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3.1-70B-Instructのggufフォーマット変換版です。 ", @@ -3776,7 +3897,8 @@ "latest_commit": "2024-07-24 21:04:27", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": null }, { "description": "clip-japanese-base This is a Japanese CLIP (Contrastive Language-Image Pre-training) model developed by LY Corporation.", @@ -3789,7 +3911,8 @@ "latest_commit": "2024-05-10 03:07:04", "languages": [], "model_or_dataset": "model", - "model_size": 0.197 + "model_size": 0.197, + "model_architectures": "CLYPModel" }, { "description": "Leia-Swallow-7B LEIA is a training technique for autoregressive LLMs that effectively improves their performance in languages other than English by enhancing cross-lingual knowledge transfer from English to a target language.", @@ -3802,7 +3925,8 @@ "latest_commit": "2024-04-17 10:29:56", "languages": [], "model_or_dataset": "model", - "model_size": 6.83 + "model_size": 6.83, + "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-StableLM-Base-Alpha-7B \"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XL Model Description japanese-stablelm-base-alpha-7b is a 7B-parameter decoder-only language model pre-trained on a diverse collection of Japanese and English datasets which focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", @@ -3815,7 +3939,8 @@ "latest_commit": "2023-08-22 09:36:29", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "JapaneseStableLMAlphaForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -3828,7 +3953,8 @@ "latest_commit": "2024-07-06 15:18:11", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "Wav2Vec2-Large-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice, JSUT, TEDxJP and some other data.", @@ -3841,7 +3967,8 @@ "latest_commit": "2023-02-17 13:07:47", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Leia-Swallow-13B LEIA is a training technique for autoregressive LLMs that effectively improves their performance in languages other than English by enhancing cross-lingual knowledge transfer from English to a target language.", @@ -3854,7 +3981,8 @@ "latest_commit": "2024-04-18 05:21:10", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "Chatbot Arena Conversationsの質問文から、aixsatoshi/Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2を使用して応答文を作成しました 質問文は、以下のモデルのPrompt部分を使用しました Chatbot Arena Conversations JA (calm2) 以下引用です。 ", @@ -3867,7 +3995,8 @@ "latest_commit": "2024-03-31 08:16:43", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Ninja-v1-NSFW-128k-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-NSFW-128kのggufフォーマット��換版です。 ", @@ -3880,7 +4009,8 @@ "latest_commit": "2024-05-04 13:25:47", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、固有表現抽出(NER)に用いれるようにしたものです。 ", @@ -3893,7 +4023,8 @@ "latest_commit": "2023-05-12 00:36:17", "languages": [], "model_or_dataset": "model", - "model_size": 0.279 + "model_size": 0.279, + "model_architectures": "LukeForTokenClassification" }, { "description": "Model Card for Japanese DeBERTa V2 tiny Model description", @@ -3906,7 +4037,8 @@ "latest_commit": "2023-03-23 16:13:46", "languages": [], "model_or_dataset": "model", - "model_size": 0.013900000000000001 + "model_size": 0.013900000000000001, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Japanese E5 Mixtral 7B Slerp GGUF GGUF conversion of oshizo/japanese-e5-mistral-7b_slerp Avaiable formats: Q2_K.gguf Q3_K.gguf Q4_K.gguf Q5_K.gguf", @@ -3919,7 +4051,8 @@ "latest_commit": "2024-06-14 16:12:17", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "hubert-base-asr", @@ -3932,7 +4065,8 @@ "latest_commit": "2024-04-14 13:20:43", "languages": [], "model_or_dataset": "model", - "model_size": 0.0945 + "model_size": 0.0945, + "model_architectures": "HubertForCTC" }, { "description": "This dataset was created by automatically translating \"databricks-dolly-15k\" into Japanese.", @@ -3945,7 +4079,8 @@ "latest_commit": "2024-04-01 17:26:37", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "OpenCALM-Large Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", @@ -3958,7 +4093,8 @@ "latest_commit": "2023-05-18 01:11:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "nlp-waseda/roberta-base-japanese Model description This is a Japanese RoBERTa base model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.", @@ -3971,7 +4107,8 @@ "latest_commit": "2022-10-21 14:46:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "HODACHI-EZO-Humanities-9B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Humanities-9B-gemma-2-itのggufフォーマット変換版です。 ", @@ -3984,7 +4121,8 @@ "latest_commit": "2024-07-15 17:01:09", "languages": [], "model_or_dataset": "model", - "model_size": 9.24 + "model_size": 9.24, + "model_architectures": null }, { "description": "🎈 FlexDreamHK FlexDreamHKはリークされたNovelAIモデルの入っていない、あるいはそのリスクを可能な限り低くしたモデルを目指して作成しました。 ", @@ -3997,7 +4135,8 @@ "latest_commit": "2023-07-29 04:21:29", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "alpacaデータセットを日本語化したものです", @@ -4022,7 +4161,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "bilingual-gpt-neox-4b Overview This repository provides an English-Japanese bilingual GPT-NeoX model of 3.8 billion parameters.", @@ -4035,7 +4175,8 @@ "latest_commit": "2024-07-20 08:02:07", "languages": [], "model_or_dataset": "model", - "model_size": 3.95 + "model_size": 3.95, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "OpenCALM-1B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", @@ -4048,7 +4189,8 @@ "latest_commit": "2023-05-18 01:11:30", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-13b Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", @@ -4061,7 +4203,8 @@ "latest_commit": "2023-12-27 01:40:43", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Dataset Summary RealPersonaChat は,話者本人のペルソナと性格特性を含む,約14,000件の日本語雑談対話からなるコーパスです.", @@ -4074,7 +4217,8 @@ "latest_commit": "2024-03-13 10:26:42", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -4087,7 +4231,8 @@ "latest_commit": "2024-06-29 08:56:19", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "roberta-small-japanese-luw-upos Model Description", @@ -4100,7 +4245,8 @@ "latest_commit": "2022-09-18 19:45:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForTokenClassification" }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", @@ -4113,7 +4259,8 @@ "latest_commit": "2023-12-27 01:41:51", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", @@ -4126,7 +4273,8 @@ "latest_commit": "2023-08-29 03:46:37", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "luke-japanese-large-lite luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", @@ -4139,7 +4287,8 @@ "latest_commit": "2022-11-09 11:19:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeForMaskedLM" }, { "description": "japanese-gpt2-small This repository provides a small-sized Japanese GPT-2 model.", @@ -4152,7 +4301,8 @@ "latest_commit": "2024-07-20 07:49:31", "languages": [], "model_or_dataset": "model", - "model_size": 0.123 + "model_size": 0.123, + "model_architectures": "GPT2LMHeadModel" }, { "description": "GUIで動作する文書校正ツール GUI tool for textlinting.", @@ -4179,7 +4329,8 @@ "latest_commit": "2024-07-31 21:47:25", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -4192,7 +4343,8 @@ "latest_commit": "2024-06-29 08:56:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-StableLM-Instruct-Beta-70B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-beta-70b is a 70B-parameter decoder-only language model based on japanese-stablelm-base-beta-70b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", @@ -4205,7 +4357,8 @@ "latest_commit": "2023-12-19 06:45:10", "languages": [], "model_or_dataset": "model", - "model_size": 69.0 + "model_size": 69.0, + "model_architectures": "LlamaForCausalLM" }, { "description": "hubert-large-asr", @@ -4218,7 +4371,8 @@ "latest_commit": "2024-04-14 13:21:01", "languages": [], "model_or_dataset": "model", - "model_size": 0.316 + "model_size": 0.316, + "model_architectures": "HubertForCTC" }, { "description": "uniTKU-hubert-japanese-asr", @@ -4231,7 +4385,8 @@ "latest_commit": "2024-04-22 18:37:33", "languages": [], "model_or_dataset": "model", - "model_size": 0.0945 + "model_size": 0.0945, + "model_architectures": "HubertForCTC" }, { "description": "Japanese-StableLM-Base-Beta-70B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-beta-70b is a 70B-parameter decoder-only language model based on Llama-2-70b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", @@ -4244,7 +4399,8 @@ "latest_commit": "2023-12-19 06:44:53", "languages": [], "model_or_dataset": "model", - "model_size": 69.0 + "model_size": 69.0, + "model_architectures": "LlamaForCausalLM" }, { "description": "rinna-llama-3-youko-70b-instruct-gguf rinnaさんが公開しているllama-3-youko-70b-instructのggufフォーマット変換版です。 ", @@ -4257,7 +4413,8 @@ "latest_commit": "2024-07-31 14:35:52", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": null }, { "description": "OpenCALM-3B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", @@ -4270,7 +4427,8 @@ "latest_commit": "2023-05-18 01:11:50", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "stockmark/gpt-neox-japanese-1.4b This repository provides a GPT-NeoX based model with 1.4B parameters pre-trained on Japanese corpus of about 20B tokens.", @@ -4283,7 +4441,8 @@ "latest_commit": "2023-09-07 03:44:19", "languages": [], "model_or_dataset": "model", - "model_size": 1.44 + "model_size": 1.44, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Vecteus-v1-gguf Local-Novel-LLM-projectさんが公開しているVecteus-v1のggufフォーマット変換版です。 ", @@ -4296,7 +4455,8 @@ "latest_commit": "2024-05-01 18:37:01", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Ninja-v1-NSFW-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-NSFWのggufフォーマット変換版です。 ", @@ -4309,7 +4469,8 @@ "latest_commit": "2024-05-04 13:26:52", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -4322,7 +4483,8 @@ "latest_commit": "2024-06-29 09:00:17", "languages": [], "model_or_dataset": "model", - "model_size": 69.2 + "model_size": 69.2, + "model_architectures": "LlamaForCausalLM" }, { "description": "llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", @@ -4335,7 +4497,8 @@ "latest_commit": "2024-02-07 19:49:25", "languages": [], "model_or_dataset": "model", - "model_size": 12.9 + "model_size": 12.9, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Corpus of Annual Reports in Japan", @@ -4362,7 +4525,8 @@ "latest_commit": "2023-12-27 11:39:18", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": null }, { "description": "GitHub リポジトリ ids-cv/wrime で公開されているデータセットを利用しています。 ", @@ -4375,7 +4539,8 @@ "latest_commit": "2023-10-06 00:56:38", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "hotchpotch/japanese-reranker-cross-encoder-large-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", @@ -4388,7 +4553,8 @@ "latest_commit": "2024-04-01 02:39:45", "languages": [], "model_or_dataset": "model", - "model_size": 0.337 + "model_size": 0.337, + "model_architectures": "BertForSequenceClassification" }, { "description": "ELYZA-japanese-Llama-2-13b-fast Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", @@ -4401,7 +4567,8 @@ "latest_commit": "2023-12-27 01:41:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "rinna/nekomata-7b Overview We conduct continual pre-training of qwen-7b on 30B tokens from a mixture of Japanese and English datasets.", @@ -4414,7 +4581,8 @@ "latest_commit": "2024-07-20 08:35:21", "languages": [], "model_or_dataset": "model", - "model_size": 7.72 + "model_size": 7.72, + "model_architectures": "QWenLMHeadModel" }, { "description": "Model card for model ID", @@ -4427,7 +4595,8 @@ "latest_commit": "2023-05-10 09:55:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "japanese-gpt-neox-3.6b-instruction-sft Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", @@ -4440,7 +4609,8 @@ "latest_commit": "2024-07-20 07:56:34", "languages": [], "model_or_dataset": "model", - "model_size": 3.76 + "model_size": 3.76, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Model Description llava-calm2-siglip is an experimental Vision Language Model that can answer questions in Japanese about images.", @@ -4453,7 +4623,8 @@ "latest_commit": "2024-06-12 19:40:39", "languages": [], "model_or_dataset": "model", - "model_size": 7.46 + "model_size": 7.46, + "model_architectures": "LlavaForConditionalGeneration" }, { "description": "Orion-14B 🌐English | 🇨", @@ -4466,7 +4637,8 @@ "latest_commit": "2024-04-11 10:48:51", "languages": [], "model_or_dataset": "model", - "model_size": 14.5 + "model_size": 14.5, + "model_architectures": "OrionForCausalLM" }, { "description": "llm-book/bert-base-japanese-v3-crf-ner-wikipedia-dataset 「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。 ", @@ -4479,7 +4651,8 @@ "latest_commit": "2023-07-25 15:04:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertWithCrfForTokenClassification" }, { "description": "このツールは、複数のデータセットを横断して日本語の大規模言語モデルを自動評価するものです.", @@ -4506,7 +4679,8 @@ "latest_commit": "2024-07-20 08:05:14", "languages": [], "model_or_dataset": "model", - "model_size": 3.95 + "model_size": 3.95, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "ODEX is an Open-Domain EXecution-based NL-to-Code generation data benchmark.", @@ -4519,7 +4693,8 @@ "latest_commit": "2023-02-10 18:01:34", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Japanese-StableLM-Instruct-Alpha-7B-v2 \"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XL Model Description japanese-stablelm-instruct-alpha-7b-v2 is a 7B parameter decoder-only language models pre-trained built on top of the Japanese-StableLM-Base-Alpha-7B model and further fine-tuned on various instruction-following datasets.", @@ -4532,7 +4707,8 @@ "latest_commit": "2023-10-06 08:40:24", "languages": [], "model_or_dataset": "model", - "model_size": 7.01 + "model_size": 7.01, + "model_architectures": "JapaneseStableLMAlphaForCausalLM" }, { "description": "HODACHI-EZO-Common-T2-2B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Common-T2-2B-gemma-2-itのggufフォーマット変換版です。 ", @@ -4545,7 +4721,8 @@ "latest_commit": "2024-08-01 18:38:31", "languages": [], "model_or_dataset": "model", - "model_size": 2.61 + "model_size": 2.61, + "model_architectures": null }, { "description": "stockmark/stockmark-100b Stockmark-100b is a 100 billion parameter LLM pretrained from scratch based on Japanese and English corpus of about 910 billion tokens.", @@ -4558,7 +4735,8 @@ "latest_commit": "2024-05-15 06:18:10", "languages": [], "model_or_dataset": "model", - "model_size": 96.2 + "model_size": 96.2, + "model_architectures": "LlamaForCausalLM" }, { "description": "recruit-jp/japanese-clip-vit-b-32-roberta-base Overview Developed by: Recruit Co.", @@ -4571,7 +4749,8 @@ "latest_commit": "2024-01-22 07:41:59", "languages": [], "model_or_dataset": "model", - "model_size": 0.198 + "model_size": 0.198, + "model_architectures": "JapaneseCLIPModel" }, { "description": "BERT Base Japanese for Irony", @@ -4584,7 +4763,8 @@ "latest_commit": "2022-11-08 04:23:27", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "Model Card for Japanese BART base Model description This is a Japanese BART base model pre-trained on Japanese Wikipedia.", @@ -4597,7 +4777,8 @@ "latest_commit": "2023-05-12 11:03:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MBartForConditionalGeneration" }, { "description": "japanese-large-lm-1.7b This repository provides a 1.7B parameters Japanese language model, trained by LINE Corporation.", @@ -4610,7 +4791,8 @@ "latest_commit": "2023-08-17 01:06:37", "languages": [], "model_or_dataset": "model", - "model_size": 1.75 + "model_size": 1.75, + "model_architectures": "GPT2LMHeadModel" }, { "description": "PLaMo-13B-Instruct Model Description PLaMo-13B-Instruct is an instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", @@ -4623,7 +4805,8 @@ "latest_commit": "2024-01-25 07:46:09", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "PlamoForCausalLM" }, { "description": "japanese-large-lm-3.6b-instruction-sft", @@ -4636,7 +4819,8 @@ "latest_commit": "2023-08-24 10:08:28", "languages": [], "model_or_dataset": "model", - "model_size": 3.68 + "model_size": 3.68, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "OpenCALM-Medium Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", @@ -4649,7 +4833,8 @@ "latest_commit": "2023-05-18 01:10:54", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "stockmark-gpt-neox-japanese-1.4b-gguf stockmarkさんが公開しているgpt-neox-japanese-1.4bのggufフォーマット変換版です。 ", @@ -4662,7 +4847,8 @@ "latest_commit": "2023-09-08 22:00:37", "languages": [], "model_or_dataset": "model", - "model_size": 1.41 + "model_size": 1.41, + "model_architectures": null }, { "description": "hotchpotch/japanese-reranker-cross-encoder-base-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", @@ -4675,7 +4861,8 @@ "latest_commit": "2024-04-01 02:39:31", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "japanese-stablelm-2-instruct-1_6b-gguf stabilityaiさんが公開しているjapanese-stablelm-2-instruct-1_6bのggufフォーマット変換版です。 ", @@ -4688,7 +4875,8 @@ "latest_commit": "2024-05-11 09:56:19", "languages": [], "model_or_dataset": "model", - "model_size": 1.64 + "model_size": 1.64, + "model_architectures": null }, { "description": "alabnii/jmedroberta-base-sentencepiece Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", @@ -4701,7 +4889,8 @@ "latest_commit": "2023-03-21 23:57:37", "languages": [], "model_or_dataset": "model", - "model_size": 0.109 + "model_size": 0.109, + "model_architectures": "BertForMaskedLM" }, { "description": "rinna/japanese-wav2vec2-base Overview This is a Japanese wav2vec 2.0 Base model trained by rinna Co.", @@ -4714,7 +4903,8 @@ "latest_commit": "2024-07-22 08:11:46", "languages": [], "model_or_dataset": "model", - "model_size": 0.095 + "model_size": 0.095, + "model_architectures": "Wav2Vec2ForPreTraining" }, { "description": "Mistral-Nemo-Instruct-2407-gguf mistralaiさんが公開しているMistral-Nemo-Instruct-2407のggufフォーマット変換版です。 ", @@ -4727,7 +4917,8 @@ "latest_commit": "2024-07-22 17:25:48", "languages": [], "model_or_dataset": "model", - "model_size": 12.2 + "model_size": 12.2, + "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-7b-fast-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7b-fastのggufフォーマット変換版です。 ", @@ -4740,7 +4931,8 @@ "latest_commit": "2023-11-16 14:27:36", "languages": [], "model_or_dataset": "model", - "model_size": 6.85 + "model_size": 6.85, + "model_architectures": null }, { "description": "Japanese StableLM-3B-4E1T Instruct Model Description", @@ -4753,7 +4945,8 @@ "latest_commit": "2024-04-26 03:20:42", "languages": [], "model_or_dataset": "model", - "model_size": 2.8 + "model_size": 2.8, + "model_architectures": "StableLMEpochForCausalLM" }, { "description": "hubert-base-jtube This repo provides model weights for the hubert-base model trained on the JTubeSpeech corpus. ", @@ -4766,7 +4959,8 @@ "latest_commit": "2024-02-05 11:49:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "HubertModel" }, { "description": "llm-lora-classification", @@ -4807,7 +5001,8 @@ "latest_commit": "2024-08-21 13:26:35", "languages": [], "model_or_dataset": "model", - "model_size": 22.5 + "model_size": 22.5, + "model_architectures": null }, { "description": "shisa-base-7b-v1 shisa-base-7b-v1 takes Mistral 7B and adds an additional 8B tokens of primarily Japanese pre-training.", @@ -4820,7 +5015,8 @@ "latest_commit": "2023-12-09 10:34:29", "languages": [], "model_or_dataset": "model", - "model_size": 7.96 + "model_size": 7.96, + "model_architectures": "MistralForCausalLM" }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", @@ -4833,7 +5029,8 @@ "latest_commit": "2023-12-20 18:11:13", "languages": [], "model_or_dataset": "model", - "model_size": 7.96 + "model_size": 7.96, + "model_architectures": "MistralForCausalLM" }, { "description": "albert-base-japanese-v1 日本語事前学習済みALBERTモデルです How to use ファインチューニング このモデルはPreTrainedモデルです基本的には各種タスク用にファインチューニングして使用されることを想定しています Fill-Mask このモデルではTokenizerにSentencepieceを利用していますそのままでは[MASK]トークンのあとに余計なトークンが混入する問題があるので、利用する際には以下のようにする必要があります for PyTorch from transformers import ( AlbertForMaskedLM, AlbertTokenizerFast ) import torch tokenizer = AlbertTokenizerFast.from_pretrained(\"ken11/albert-base-japanese-v1\")", @@ -4846,7 +5043,8 @@ "latest_commit": "2021-12-22 03:04:30", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "AlbertForMaskedLM" }, { "description": "bilingual-gpt-neox-4b-8k Overview Notice: This model requires transformers>=4.31.0 to work properly.", @@ -4859,7 +5057,8 @@ "latest_commit": "2024-07-20 08:03:16", "languages": [], "model_or_dataset": "model", - "model_size": 3.95 + "model_size": 3.95, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "rinna-llama-3-youko-8b-gguf rinnaさんが公開しているllama-3-youko-8bのggufフォーマット変換版です。 ", @@ -4872,7 +5071,8 @@ "latest_commit": "2024-05-01 15:11:21", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "This is the filtered Japanese subset of XL-Sum followed by PaLM 2 filters 15-gram overlap * code: https://gist.github.com/mkshing/d6371cbfdd50d4f352cee247fd4dd86a number of examples train: 4215 (before: 7113) validation: 758 (before: 889) test: 766 (before: 889)", @@ -4885,7 +5085,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Orion-14B 🌐English | 🇨", @@ -4898,7 +5099,8 @@ "latest_commit": "2024-03-26 09:21:52", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "OrionForCausalLM" }, { "description": "Model Card for Model ID Original model elyza/ELYZA-japanese-Llama-2-7b-fast-instruct which is based on Meta's \"Llama 2\" and has undergone additional pre-training in Japanese, and thier original post-training and speed up tuning.", @@ -4911,7 +5113,8 @@ "latest_commit": "2023-11-14 00:10:58", "languages": [], "model_or_dataset": "model", - "model_size": 1.24 + "model_size": 1.24, + "model_architectures": "LlamaForCausalLM" }, { "description": "aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf aixsatoshiさんが公開しているLlama-3-8b-Cosmopedia-japaneseのggufフォーマット変換版です。 ", @@ -4924,7 +5127,8 @@ "latest_commit": "2024-05-19 08:27:21", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "weblab-10b-instruction-sft-GPTQ Original model weblab-10b-instruction-sft which is a Japanese-centric multilingual GPT-NeoX model of 10 billion parameters created by matsuo-lab Takeshi Kojima.", @@ -4937,7 +5141,8 @@ "latest_commit": "2023-11-14 00:24:22", "languages": [], "model_or_dataset": "model", - "model_size": 1.86 + "model_size": 1.86, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Please feel free to open an issue or pull request. ", @@ -4950,7 +5155,8 @@ "latest_commit": "2023-10-09 06:44:28", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "In this study, we introduce a new dataset, WRIME, for emotional intensity estimation.", @@ -4963,7 +5169,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "HODACHI-Borea-Phi-3.5-mini-Instruct-Jp-gguf HODACHIさんが公開しているBorea-Phi-3.5-mini-Instruct-Jpのggufフォーマット変換版です。 ", @@ -4976,7 +5183,8 @@ "latest_commit": "2024-08-21 11:08:38", "languages": [], "model_or_dataset": "model", - "model_size": 3.82 + "model_size": 3.82, + "model_architectures": null }, { "description": "Parakeet TDT-CTC 0.6B (ja) | | parakeet-tdt_ctc-0.6b-ja is an ASR model that transcribes Japanese speech with Punctuations.", @@ -4989,7 +5197,8 @@ "latest_commit": "2024-05-17 17:20:17", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Japanese-StableLM-Instruct-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-beta-7b is a 7B-parameter decoder-only language model based on", @@ -5002,7 +5211,8 @@ "latest_commit": "2023-12-19 06:43:49", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "Polyglot-math-4x7b-24b Polyglot-4x7b is a Mixture of Experts approach to a multilingual model.", @@ -5015,7 +5225,8 @@ "latest_commit": "2024-03-04 19:25:12", "languages": [], "model_or_dataset": "model", - "model_size": 24.2 + "model_size": 24.2, + "model_architectures": "MixtralForCausalLM" }, { "description": "This is a Japanese translated version of HumanEval, an evaluation harness for the HumanEval problem solving dataset described in the paper \"Evaluating Large Language Models Trained on Code\".", @@ -5028,7 +5239,8 @@ "latest_commit": "2024-01-10 21:52:35", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "HODACHI様の Llama-3.1-8B-EZO-1.1-it をGGUF形式に変換したものです。 ", @@ -5041,7 +5253,8 @@ "latest_commit": "2024-07-31 18:13:59", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "japanese-gpt-neox-3.6b-instruction-sft-v2 Overview", @@ -5054,7 +5267,8 @@ "latest_commit": "2024-07-20 07:57:35", "languages": [], "model_or_dataset": "model", - "model_size": 3.76 + "model_size": 3.76, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "このモデルは何? ", @@ -5067,7 +5281,8 @@ "latest_commit": "2023-11-08 07:37:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "QuantFactory/Llama3.1-ArrowSE-v0.4-GGUF This is quantized version of DataPilot/Llama3.1-ArrowSE-v0.4 created using llama.cpp Original Model Card 概要 このモデルはllama3.1-8B-instructをもとに日本語性能を高めることを目的にMergekit&ファインチューニングを用いて作成されました。 ", @@ -5080,7 +5295,8 @@ "latest_commit": "2024-07-28 06:57:40", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Fish Speech V1.2 Fish Speech V1.2 is a leading text-to-speech (TTS) model trained on 300k hours of English, Chinese, and Japanese audio data.", @@ -5093,7 +5309,8 @@ "latest_commit": "2024-07-02 04:31:26", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "hotchpotch/japanese-bge-reranker-v2-m3-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", @@ -5106,7 +5323,8 @@ "latest_commit": "2024-04-01 02:40:22", "languages": [], "model_or_dataset": "model", - "model_size": 0.5680000000000001 + "model_size": 0.5680000000000001, + "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "rinna/nekomata-14b Overview We conduct continual pre-training of qwen-14b on 66B tokens from a mixture of Japanese and English datasets.", @@ -5119,7 +5337,8 @@ "latest_commit": "2024-07-22 07:58:40", "languages": [], "model_or_dataset": "model", - "model_size": 14.2 + "model_size": 14.2, + "model_architectures": "QWenLMHeadModel" }, { "description": "ELYZA-japanese-Llama-2-7b-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7bのggufフォーマット変換版です。 ", @@ -5132,7 +5351,8 @@ "latest_commit": "2023-11-16 14:27:12", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": null }, { "description": "Japanese StableLM-3B-4E1T Base Model Description This is a 3B-parameter decoder-only language model with a focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", @@ -5145,7 +5365,8 @@ "latest_commit": "2024-04-26 03:20:34", "languages": [], "model_or_dataset": "model", - "model_size": 2.8 + "model_size": 2.8, + "model_architectures": "StableLMEpochForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -5158,7 +5379,8 @@ "latest_commit": "2024-07-06 15:18:24", "languages": [], "model_or_dataset": "model", - "model_size": 69.0 + "model_size": 69.0, + "model_architectures": "LlamaForCausalLM" }, { "description": "External dictionary importer for Yomichan.", @@ -5197,7 +5419,8 @@ "latest_commit": "2024-06-29 08:56:25", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "KARAKURI LM KARAKURI LM is a pretrained language model that builds upon Llama 2.", @@ -5210,7 +5433,8 @@ "latest_commit": "2024-05-07 09:00:06", "languages": [], "model_or_dataset": "model", - "model_size": 69.2 + "model_size": 69.2, + "model_architectures": "LlamaForCausalLM" }, { "description": "gpt2-large-japanese This repository provides a large sized Japanese GPT-2 model.", @@ -5223,7 +5447,8 @@ "latest_commit": "2022-08-29 16:10:11", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf umiyukiさんが公開しているJapanese-Chat-Umievo-itr001-7bのggufフォーマット変換版です。 ", @@ -5236,7 +5461,8 @@ "latest_commit": "2024-04-27 10:52:17", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "PLaMo-13B-Instruct-NC Model Description PLaMo-13B-Instruct-NC is a noncommercial instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", @@ -5249,7 +5475,8 @@ "latest_commit": "2024-01-25 07:46:45", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "PlamoForCausalLM" }, { "description": "HODACHI-Borea-Phi-3.5-mini-Instruct-Common-gguf HODACHIさんが公開しているBorea-Phi-3.5-mini-Instruct-Commonのggufフォーマット変換版です。 ", @@ -5262,7 +5489,8 @@ "latest_commit": "2024-08-21 11:42:56", "languages": [], "model_or_dataset": "model", - "model_size": 3.82 + "model_size": 3.82, + "model_architectures": null }, { "description": "rinna/japanese-cloob-vit-b-16", @@ -5275,7 +5503,8 @@ "latest_commit": "2024-07-22 08:09:24", "languages": [], "model_or_dataset": "model", - "model_size": 0.197 + "model_size": 0.197, + "model_architectures": "CLOOBModel" }, { "description": "Ninja-v1-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1のggufフォーマット変換版です。 ", @@ -5288,7 +5517,8 @@ "latest_commit": "2024-05-04 13:26:22", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-13b-fast-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-13b-fastのggufフォーマット変換版です。 ", @@ -5301,7 +5531,8 @@ "latest_commit": "2023-12-27 13:18:46", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": null }, { "description": "DataPilot-ArrowPro-7B-KUJIRA-gguf DataPilotさんが公開しているArrowPro-7B-KUJIRAのggufフォーマット変換版です。 ", @@ -5314,7 +5545,8 @@ "latest_commit": "2024-05-11 07:24:16", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Japanese-StableLM-Base-JAVocab-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", @@ -5327,7 +5559,8 @@ "latest_commit": "2023-12-19 06:45:58", "languages": [], "model_or_dataset": "model", - "model_size": 6.88 + "model_size": 6.88, + "model_architectures": "LlamaForCausalLM" }, { "description": "lightblue-suzume-llama-3-8B-multilingual-gguf lightblueさんが公開しているsuzume-llama-3-8B-multilingualのggufフォーマット変換版です。 ", @@ -5340,7 +5573,8 @@ "latest_commit": "2024-05-07 12:59:57", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Japanese-Starling-ChatV-7B-GGUF GGUF conversion of \"Japanese-Starling-ChatV-7B\" \"Japanese-Starling-ChatV-7B\" is a Japanese chat model built on top of \"chatntq-ja-7b-v1.0\", originally based on Mistral-7B-v0.1.", @@ -5353,7 +5587,8 @@ "latest_commit": "2024-04-20 01:23:10", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Llama3-ArrowSE-8B-v0.3-gguf DataPilotさんが公開しているLlama3-ArrowSE-8B-v0.3のggufフォーマット変換版です。 ", @@ -5366,7 +5601,8 @@ "latest_commit": "2024-07-07 09:30:16", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "japanese-gpt-neox-small This repository provides a small-sized Japanese GPT-NeoX model.", @@ -5379,7 +5615,8 @@ "latest_commit": "2024-07-20 07:53:40", "languages": [], "model_or_dataset": "model", - "model_size": 0.20400000000000001 + "model_size": 0.20400000000000001, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "lightblue-suzume-llama-3-8B-japanese-gguf lightblueさんが公開しているsuzume-llama-3-8B-japaneseのggufフォーマット変換版です。 ", @@ -5392,7 +5629,8 @@ "latest_commit": "2024-05-07 12:58:06", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "aya-23-35B-gguf CohereForAIさんが公開しているaya-23-35Bのggufフォーマット変換版です。 ", @@ -5405,7 +5643,8 @@ "latest_commit": "2024-05-27 00:47:56", "languages": [], "model_or_dataset": "model", - "model_size": 35.0 + "model_size": 35.0, + "model_architectures": null }, { "description": "bert-base-japanese-v3-unsup-simcse-jawiki 「大規模言語モデル入門」の第8章で紹介している教師なしSimCSEのモデルです。 ", @@ -5418,7 +5657,8 @@ "latest_commit": "2023-07-24 07:07:44", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertModel" }, { "description": "Meta-Llama-3-8B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3-8B-Instructのggufフォーマット変換版です。 ", @@ -5431,7 +5671,8 @@ "latest_commit": "2024-05-12 08:08:38", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "KARAKURI LM KARAKURI LM is a pretrained language model that builds upon Llama 2.", @@ -5444,7 +5685,8 @@ "latest_commit": "2024-05-07 09:00:17", "languages": [], "model_or_dataset": "model", - "model_size": 69.2 + "model_size": 69.2, + "model_architectures": "LlamaForCausalLM" }, { "description": "※llama.cpp Releases b3428(7/21)", @@ -5457,7 +5699,8 @@ "latest_commit": "2024-07-21 11:26:08", "languages": [], "model_or_dataset": "model", - "model_size": 9.24 + "model_size": 9.24, + "model_architectures": null }, { "description": "alabnii/jmedroberta-base-manbyo-wordpiece Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", @@ -5470,7 +5713,8 @@ "latest_commit": "2023-03-08 01:44:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "japanese-large-lm-1.7b-instruction-sft This repository provides a 1.7B parameters Japanese language model, fine-tuned and trained by LINE Corporation.", @@ -5483,7 +5727,8 @@ "latest_commit": "2023-08-14 17:19:11", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -5496,7 +5741,8 @@ "latest_commit": "2023-11-02 18:22:05", "languages": [], "model_or_dataset": "model", - "model_size": 69.0 + "model_size": 69.0, + "model_architectures": null }, { "description": "Sarashina1-65B", @@ -5509,7 +5755,8 @@ "latest_commit": "2024-06-27 06:56:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "old? ", @@ -5522,7 +5769,8 @@ "latest_commit": "2024-07-23 07:24:33", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "rinna/japanese-hubert-large Overview This is a Japanese HuBERT Large model trained by rinna Co.", @@ -5535,7 +5783,8 @@ "latest_commit": "2024-07-22 08:12:21", "languages": [], "model_or_dataset": "model", - "model_size": 0.315 + "model_size": 0.315, + "model_architectures": "HubertModel" }, { "description": "bert-base-japanese-v3-jnli 「大規模言語モデル入門」の第5章で紹介している(自然言語推論)のモデルです。 ", @@ -5548,7 +5797,8 @@ "latest_commit": "2023-07-24 06:49:14", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "JaCWIR: Japanese Casual Web IR - 日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット 近年、大規模言語モデル(LLM)の台頭により、一般的な日本語を用いた自然な検索クエリで質問するユースケースが増えています。", @@ -5561,7 +5811,8 @@ "latest_commit": "2024-04-01 02:34:34", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "[Llama-3.1-70B-EZO-1.1-it] Model Card モデル情報 / Model Information このモデルは、Meta AI の Llama 3.1 をベースに、日本語タスクでの性能を向上させるためにファインチューニングを行ったものです。", @@ -5574,7 +5825,8 @@ "latest_commit": "2024-08-23 10:52:31", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": "LlamaForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -5587,7 +5839,8 @@ "latest_commit": "2024-06-29 08:56:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "(简体中文|English|日本語) Introduction github repo : https://github.com/FunAudioLLM/SenseVoice SenseVoice is a speech foundation model with multiple speech understanding capabilities, including automatic speech recognition (ASR), spoken language identification (LID), speech emotion recognition (SER), and audio event detection (AED).", @@ -5600,7 +5853,8 @@ "latest_commit": "2024-07-31 05:47:48", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -5613,7 +5867,8 @@ "latest_commit": "2023-10-28 19:07:41", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Sarashina1-13B", @@ -5626,7 +5881,8 @@ "latest_commit": "2024-06-27 06:56:06", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Phi-3-medium-128k-instruct-gguf microsoftさんが公開しているPhi-3-medium-128k-instructのggufフォーマット変換版です。 ", @@ -5639,7 +5895,8 @@ "latest_commit": "2024-05-22 16:56:55", "languages": [], "model_or_dataset": "model", - "model_size": 14.0 + "model_size": 14.0, + "model_architectures": null }, { "description": "Sarashina1-7B This repository provides Japanese language models trained by SB Intuitions.", @@ -5652,7 +5909,8 @@ "latest_commit": "2024-06-27 06:55:38", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "tokyotech-llm-Swallow-13b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-13b-instruct-v0.1のggufフォーマット変換版です。 ", @@ -5665,7 +5923,8 @@ "latest_commit": "2024-05-03 04:36:24", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": null }, { "description": "This repository is publicly accessible, but you have to accept the conditions to access its files and content.", @@ -5678,7 +5937,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "tokyotech-llm様の Llama-3-Swallow-8B-Instruct-v0.1 をGGUF形式に変換したものです。 ", @@ -5691,7 +5951,8 @@ "latest_commit": "2024-07-01 17:54:05", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "HODACHI様の EZO-Common-T2-2B-gemma-2-it をGGUF形式に変換したものです。 ", @@ -5704,7 +5965,8 @@ "latest_commit": "2024-08-01 13:42:20", "languages": [], "model_or_dataset": "model", - "model_size": 2.61 + "model_size": 2.61, + "model_architectures": null }, { "description": "hotchpotch/japanese-reranker-cross-encoder-small-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", @@ -5717,7 +5979,8 @@ "latest_commit": "2024-04-01 02:39:19", "languages": [], "model_or_dataset": "model", - "model_size": 0.11800000000000001 + "model_size": 0.11800000000000001, + "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -5730,7 +5993,8 @@ "latest_commit": "2023-11-03 12:54:55", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": null }, { "description": "pfnet-Llama3-Preferred-MedSwallow-70B-gguf pfnetさんが公開しているLlama3-Preferred-MedSwallow-70Bのggufフォーマット変換版です��� ", @@ -5743,7 +6007,8 @@ "latest_commit": "2024-07-19 09:14:38", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": null }, { "description": "nlp-waseda/roberta-large-japanese-seq512 Model description This is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100 with the maximum sequence length of 512.", @@ -5756,7 +6021,8 @@ "latest_commit": "2022-10-21 14:49:40", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "gemma-2-2b-it-gguf googleさんが公開しているgemma-2-2b-itのggufフォーマット変換版です。 ", @@ -5769,7 +6035,8 @@ "latest_commit": "2024-08-01 18:29:08", "languages": [], "model_or_dataset": "model", - "model_size": 2.61 + "model_size": 2.61, + "model_architectures": null }, { "description": "QuantFactory/Llama3-ArrowSE-8B-v0.3-GGUF This is quantized version of DataPilot/Llama3-ArrowSE-8B-v0.3 created using llama.cpp Original Model Card 概要 elyza/Llama-3-ELYZA-JP-8Bを元にchat vectorを用いて改良しAItuberに特化させました。 ", @@ -5782,7 +6049,8 @@ "latest_commit": "2024-07-28 16:29:51", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "luke-japanese luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", @@ -5795,7 +6063,8 @@ "latest_commit": "2022-11-09 15:23:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeForMaskedLM" }, { "description": "JQaRA : Japanese Question Answering with Retrieval Augmentation - 検索拡張(RAG)評価のための日本語 Q&A データセット 高性能な LLM の台頭に伴い、LLM を用いた質疑応答のユースケースが増加しています。", @@ -5808,7 +6077,8 @@ "latest_commit": "2024-08-10 02:56:05", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "tokyotech-llm-Swallow-70b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-70b-instruct-v0.1のggufフォーマット変換版です。 ", @@ -5821,7 +6091,8 @@ "latest_commit": "2024-05-04 06:52:16", "languages": [], "model_or_dataset": "model", - "model_size": 69.2 + "model_size": 69.2, + "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", @@ -5834,7 +6105,8 @@ "latest_commit": "2024-06-29 08:56:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-StableLM-Instruct-JAVocab-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on japanese-stablelm-ja_vocab-beta-7b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", @@ -5847,7 +6119,8 @@ "latest_commit": "2023-12-19 06:46:01", "languages": [], "model_or_dataset": "model", - "model_size": 6.88 + "model_size": 6.88, + "model_architectures": "LlamaForCausalLM" }, { "description": "c4ai-command-r-plus-gguf CohereForAIさんが公開しているc4ai-command-r-plusのggufフォーマット変換版です。 ", @@ -5860,7 +6133,8 @@ "latest_commit": "2024-04-23 16:13:37", "languages": [], "model_or_dataset": "model", - "model_size": 104.0 + "model_size": 104.0, + "model_architectures": null }, { "description": "pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf pfnetさんが公開しているnekomata-14b-pfn-qfin-inst-mergeのggufフォーマット変換版です。 ", @@ -5873,7 +6147,8 @@ "latest_commit": "2024-04-24 14:39:32", "languages": [], "model_or_dataset": "model", - "model_size": 14.2 + "model_size": 14.2, + "model_architectures": null }, { "description": "databricks/dolly-v2-12b の学習データに使用されたdatabricks-dolly-15k.jsonl を���本語に翻訳したデータセットになります。", @@ -5898,7 +6173,8 @@ "latest_commit": "2024-05-03 04:35:34", "languages": [], "model_or_dataset": "model", - "model_size": 7.33 + "model_size": 7.33, + "model_architectures": null }, { "description": "whisper-large-v2-japanese-5k-steps This model is a fine-tuned version of openai/whisper-large-v2 on the Japanese CommonVoice dataset (v11)..", @@ -5911,7 +6187,8 @@ "latest_commit": "2023-03-03 21:11:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "hh-rlhf-12k-ja This repository provides a human preference dataset developed by LLM-jp, a collaborative project launched in Japan.", @@ -5924,7 +6201,8 @@ "latest_commit": "2024-02-04 21:45:59", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf yuisekiさんが公開しているYuisekinAIEvol-Mistral-7B-ja-math-v0.1.1のggufフォーマット変換版です。 ", @@ -5937,7 +6215,8 @@ "latest_commit": "2024-04-29 15:52:08", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "rinna/japanese-gpt-neox-3.6b-instruction-ppo rinnaさんが公開しているjapanese-gpt-neox-3.6b-instruction-ppoのgguf変換版です。 ", @@ -5950,7 +6229,8 @@ "latest_commit": "2023-09-08 02:39:00", "languages": [], "model_or_dataset": "model", - "model_size": 3.61 + "model_size": 3.61, + "model_architectures": null }, { "description": "luke-japanese luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", @@ -5963,7 +6243,8 @@ "latest_commit": "2022-11-09 15:22:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeForMaskedLM" }, { "description": "ichikara-instruction (Non Commercial) LLMのための日本語インストラクションデータ 公開ページ 公開ページより、 本データに関して、言語処理学会第30回年次大会において発表を行います。", @@ -5976,7 +6257,8 @@ "latest_commit": "2024-03-12 08:36:40", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "BERT small Japanese finance This is a BERT model pretrained on texts in the Japanese language.", @@ -5989,7 +6271,8 @@ "latest_commit": "2022-12-09 00:40:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "alfredplpl-Llama-3-8B-Instruct-Ja-gguf alfredplplさんが公開しているLlama-3-8B-Instruct-Jaのggufフォーマット変換版です。 ", @@ -6002,7 +6285,8 @@ "latest_commit": "2024-04-23 15:24:47", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "mt5_summarize_japanese (Japanese caption : 日本語の要約のモデル)", @@ -6015,7 +6299,8 @@ "latest_commit": "2024-07-12 00:01:31", "languages": [], "model_or_dataset": "model", - "model_size": 0.3 + "model_size": 0.3, + "model_architectures": "MT5ForConditionalGeneration" }, { "description": "rinna/japanese-gpt-neox-3.6b rinnaさんが公開しているjapanese-gpt-neox-3.6bのgguf変換版です。 ", @@ -6028,7 +6313,8 @@ "latest_commit": "2023-09-08 02:37:19", "languages": [], "model_or_dataset": "model", - "model_size": 3.61 + "model_size": 3.61, + "model_architectures": null }, { "description": "aya-23-8B-gguf CohereForAIさんが公開しているaya-23-8Bのggufフォーマット変換版です。 ", @@ -6041,7 +6327,8 @@ "latest_commit": "2024-05-27 00:54:36", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "This repository contains some GGUF quantizations of the VNTL Gemma 2 27B model.", @@ -6054,7 +6341,8 @@ "latest_commit": "2024-07-08 16:13:54", "languages": [], "model_or_dataset": "model", - "model_size": 27.2 + "model_size": 27.2, + "model_architectures": null }, { "description": "This repository contains some GGUF quantizations of the merge of the VNTL LLaMA 3 8B qlora.", @@ -6067,7 +6355,8 @@ "latest_commit": "2024-06-15 17:33:02", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "ryota39-Phi-3-mini-4k-instruct-dpo-gguf ryota39さんが公開しているPhi-3-mini-4k-instruct-dpoのggufフォーマット変換版です。 ", @@ -6080,7 +6369,8 @@ "latest_commit": "2024-04-29 16:53:45", "languages": [], "model_or_dataset": "model", - "model_size": 3.82 + "model_size": 3.82, + "model_architectures": null }, { "description": "llm-book/t5-base-long-livedoor-news-corpus 「大規模言語モデル入門」の第7章で紹介している要約生成のモデルです。 ", @@ -6093,7 +6383,8 @@ "latest_commit": "2023-07-25 13:10:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "tokyotech-llm-Swallow-7b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-7b-instruct-v0.1のggufフォーマット変換版です。 ", @@ -6106,7 +6397,8 @@ "latest_commit": "2024-05-03 04:53:43", "languages": [], "model_or_dataset": "model", - "model_size": 6.83 + "model_size": 6.83, + "model_architectures": null }, { "description": "line-corporation/japanese-large-lm-1.7b line-corporationさんが公開しているjapanese-large-lm-1.7bのgguf変換版です。 ", @@ -6119,7 +6411,8 @@ "latest_commit": "2024-03-24 05:54:30", "languages": [], "model_or_dataset": "model", - "model_size": 1.77 + "model_size": 1.77, + "model_architectures": null }, { "description": "Fugaku-LLM利用規約 この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", @@ -6132,7 +6425,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": 13.2 + "model_size": 13.2, + "model_architectures": null }, { "description": "日本語版CLIPモデル This is a CLIP text/image encoder model for Japanese. ", @@ -6145,7 +6439,8 @@ "latest_commit": "2022-04-19 14:18:58", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertModel" }, { "description": "BERT large Japanese (character-level tokenization with whole word masking, CC-100 and jawiki-20230102)", @@ -6158,7 +6453,8 @@ "latest_commit": "2023-05-19 00:54:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForPreTraining" }, { "description": "bert-base-japanese-wikipedia-ud-head Model Description", @@ -6171,7 +6467,8 @@ "latest_commit": "2023-03-04 20:16:55", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForQuestionAnswering" }, { "description": "※llama.cpp Releases b3428(7/21)", @@ -6184,7 +6481,8 @@ "latest_commit": "2024-07-21 18:11:21", "languages": [], "model_or_dataset": "model", - "model_size": 9.24 + "model_size": 9.24, + "model_architectures": null }, { "description": "umiyuki-Umievo-itr012-Gleipnir-7B-gguf umiyukiさんが公開しているUmievo-itr012-Gleipnir-7Bのggufフォーマット変換版です。 ", @@ -6197,7 +6495,8 @@ "latest_commit": "2024-05-29 15:53:40", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Model Card for Japanese character-level DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", @@ -6210,7 +6509,8 @@ "latest_commit": "2023-03-26 03:32:27", "languages": [], "model_or_dataset": "model", - "model_size": 0.122 + "model_size": 0.122, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "pfnet-nekomata-14b-pfn-qfin-gguf pfnetさんが公開しているnekomata-14b-pfn-qfinのggufフォーマット変換版です。 ", @@ -6223,7 +6523,8 @@ "latest_commit": "2024-04-24 14:46:15", "languages": [], "model_or_dataset": "model", - "model_size": 14.2 + "model_size": 14.2, + "model_architectures": null }, { "description": "Oumuamua-7b-RP GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", @@ -6236,7 +6537,8 @@ "latest_commit": "2024-06-23 17:06:53", "languages": [], "model_or_dataset": "model", - "model_size": 7.33 + "model_size": 7.33, + "model_architectures": "MistralForCausalLM" }, { "description": "DataPilot-ArrowPro-7B-RobinHood-gguf DataPilotさんが公開しているArrowPro-7B-RobinHoodのggufフォーマット変換版です。 ", @@ -6249,7 +6551,8 @@ "latest_commit": "2024-05-11 13:43:09", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "ELYZA-japanese-CodeLlama-7b Model Description ELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", @@ -6262,7 +6565,8 @@ "latest_commit": "2023-11-17 05:01:00", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "line-corporation/japanese-large-lm-1.7b-instruction-sft line-corporationさんが公開しているjapanese-large-lm-1.7b-instruction-sftのgguf変換版です。 ", @@ -6275,7 +6579,8 @@ "latest_commit": "2024-03-24 05:54:56", "languages": [], "model_or_dataset": "model", - "model_size": 1.77 + "model_size": 1.77, + "model_architectures": null }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Ninja-v1-RP-expressive-v2の量子化済みGGUF版です。", @@ -6288,7 +6593,8 @@ "latest_commit": "2024-05-26 15:22:01", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "ku-nlp/roberta-base-japanese-char-wwm Model description This is a Japanese RoBERTa base model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.", @@ -6301,7 +6607,8 @@ "latest_commit": "2023-03-20 08:05:45", "languages": [], "model_or_dataset": "model", - "model_size": 0.1 + "model_size": 0.1, + "model_architectures": "RobertaForMaskedLM" }, { "description": "LLaVA-JP Model Card Model detail Model type: LLaVA-JP is a vision-language model that can converse about input images.", @@ -6314,7 +6621,8 @@ "latest_commit": "2023-12-18 10:21:11", "languages": [], "model_or_dataset": "model", - "model_size": 1.73 + "model_size": 1.73, + "model_architectures": "LlavaGpt2ForCausalLM" }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングしてQAタスクに用いれるようにしたものです。 ", @@ -6327,7 +6635,8 @@ "latest_commit": "2023-03-27 02:43:35", "languages": [], "model_or_dataset": "model", - "model_size": 0.112 + "model_size": 0.112, + "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "Japanese GPT2 Lyric Model Model description", @@ -6340,7 +6649,8 @@ "latest_commit": "2023-10-23 12:46:36", "languages": [], "model_or_dataset": "model", - "model_size": 0.123 + "model_size": 0.123, + "model_architectures": "GPT2LMHeadModel" }, { "description": "This is a model for named entity recognition of Japanese medical documents.", @@ -6353,7 +6663,8 @@ "latest_commit": "2024-07-31 07:44:00", "languages": [], "model_or_dataset": "model", - "model_size": 0.11 + "model_size": 0.11, + "model_architectures": "BertForTokenClassification" }, { "description": "Llama-3-8B-Japanese-Instruct-GGUF Original Model haqishen/Llama-3-8B-Japanese-Instruct Run with Gaianet Prompt template: prompt template: llama-3-chat Context size: chat_ctx_size: 4096 Run with GaiaNet:", @@ -6366,7 +6677,8 @@ "latest_commit": "2024-05-16 13:44:53", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "aixsatoshi-Honyaku-13b-gguf aixsatoshiさんが公開しているHonyaku-13bのggufフォーマット変換版です。 ", @@ -6379,7 +6691,8 @@ "latest_commit": "2024-05-19 09:24:59", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": null }, { "description": "t5-base-japanese-web (with Byte-fallback, 32K) Description megagonlabs/t5-base-japanese-web is a T5 (Text-to-Text Transfer Transformer) model pre-trained on Japanese web texts.", @@ -6392,7 +6705,8 @@ "latest_commit": "2021-09-06 19:32:21", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Model card for model ID", @@ -6405,7 +6719,8 @@ "latest_commit": "2023-05-10 10:00:00", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", @@ -6418,7 +6733,8 @@ "latest_commit": "2024-07-13 13:29:45", "languages": [], "model_or_dataset": "model", - "model_size": 1.1 + "model_size": 1.1, + "model_architectures": null }, { "description": "Model card for model ID", @@ -6431,7 +6747,8 @@ "latest_commit": "2023-05-10 10:00:35", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "ElanMT ElanMT-BT-en-ja is a English to Japanese translation model developed by ELAN MITSUA Project / Abstract Engine.", @@ -6444,7 +6761,8 @@ "latest_commit": "2024-05-20 01:53:38", "languages": [], "model_or_dataset": "model", - "model_size": 0.0606 + "model_size": 0.0606, + "model_architectures": "MarianMTModel" }, { "description": "回答と回答が出てくるパラグラフを与えると質問文を生成するモデル SEE: https://github.com/sonoisa/deep-question-generation 本モデルの作成ステップ概要 SQuAD 1.1を日本語に機械翻訳し、不正なデータをクレンジング(有効なデータは約半分)。", @@ -6457,7 +6775,8 @@ "latest_commit": "2022-03-11 02:50:33", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Dataset Preprocessing Supported Tasks and Leaderboards Languages 注釈はすべて日本語を主要言語としています。 ", @@ -6470,7 +6789,8 @@ "latest_commit": "2022-12-12 16:36:58", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Overview This dataset provides a convenient and user-friendly format of data from Aozora Bunko (青空文庫), a website that compiles public-domain books in Japan, ideal for Machine Learning applications.", @@ -6483,7 +6803,8 @@ "latest_commit": "2023-10-27 13:22:32", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Dataset.", @@ -6496,7 +6817,8 @@ "latest_commit": "2023-04-11 15:18:09", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Ninja-v1-128k-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-128kのggufフォーマット変換版です。 ", @@ -6509,7 +6831,8 @@ "latest_commit": "2024-05-04 13:25:20", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "License:CreativeML Open RAIL-M Additional Copyright: sazyou_roukaku (TwitterID @sazyou_roukaku) as of June 25, 2023 このモデルは『CreativeML Open RAIL-M』でLicenseそのものに変更はありません。 ", @@ -6522,7 +6845,8 @@ "latest_commit": "2023-07-04 10:47:46", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Llama-3-EZO-VLM-1 Based on SakanaAI/Llama-3-EvoVLM-JP-v2, it has been enhanced for Japanese usage through additional pre-training and instruction tuning.", @@ -6535,7 +6859,8 @@ "latest_commit": "2024-08-23 10:55:53", "languages": [], "model_or_dataset": "model", - "model_size": 8.48 + "model_size": 8.48, + "model_architectures": "LlavaForConditionalGeneration" }, { "description": "Githubリポジトリstockmarkteam/ner-wikipedia-datasetで公開されているデータセットを利用しています。", @@ -6548,7 +6873,8 @@ "latest_commit": "2023-12-12 11:25:51", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "ArrowPro-7B-KillerWhale-gguf DataPilotさんが公開しているArrowPro-7B-KillerWhaleのggufフォーマット変換版です。 ", @@ -6561,7 +6887,8 @@ "latest_commit": "2024-05-29 15:53:17", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "What’s this?", @@ -6574,7 +6901,8 @@ "latest_commit": "2024-07-05 05:48:15", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "OcuteusのGGUF版です。 ", @@ -6587,7 +6915,8 @@ "latest_commit": "2024-05-10 06:18:35", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Mistral-7B-Instruct-v0.3-gguf mistralaiさんが公開しているMistral-7B-Instruct-v0.3のggufフォーマット変換版です。 ", @@ -6600,7 +6929,8 @@ "latest_commit": "2024-05-23 15:58:46", "languages": [], "model_or_dataset": "model", - "model_size": 7.25 + "model_size": 7.25, + "model_architectures": null }, { "description": "オリジナルのサイトと同じものを使用しています。 ", @@ -6613,7 +6943,8 @@ "latest_commit": "2023-12-12 11:19:43", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Fugaku-LLM利用規約 この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", @@ -6626,7 +6957,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive information japanese-stablelm-3b-4e1t-base - GGUF Model creator: stabilityai Original model: japanese-stablelm-3b-4e1t-base StableLM", @@ -6639,7 +6971,8 @@ "latest_commit": "2023-11-16 11:18:48", "languages": [], "model_or_dataset": "model", - "model_size": 2.8 + "model_size": 2.8, + "model_architectures": null }, { "description": "stockmark-100b-gguf stockmarkさんが公開しているstockmark-100bのggufフォーマット変換版です。 ", @@ -6652,7 +6985,8 @@ "latest_commit": "2024-05-18 09:14:46", "languages": [], "model_or_dataset": "model", - "model_size": 96.2 + "model_size": 96.2, + "model_architectures": null }, { "description": "JaQuAD is developed to provide a SQuAD-like QA dataset in Japanese.", @@ -6665,7 +6999,8 @@ "latest_commit": "2022-10-25 09:06:40", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "fio-base-japanese-v0.1 日本語版は近日公開予定です(日本語を勉強中なので、間違いはご容赦ください!", @@ -6678,7 +7013,8 @@ "latest_commit": "2023-12-19 10:28:16", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertModel" }, { "description": "Japanese Natural Language Inference Model", @@ -6691,7 +7027,8 @@ "latest_commit": "2022-12-23 10:51:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "QuantFactory/llama-3-youko-8b-GGUF", @@ -6704,7 +7041,8 @@ "latest_commit": "2024-06-24 06:35:40", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Fugaku-LLM利用規約 この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", @@ -6717,7 +7055,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": 13.4 + "model_size": 13.4, + "model_architectures": null }, { "description": "Qwen1.5-110B-Chat-gguf Qwenさんが公開しているQwen1.5-110B-Chatのggufフォーマット変換版です。 ", @@ -6730,7 +7069,8 @@ "latest_commit": "2024-04-28 08:09:17", "languages": [], "model_or_dataset": "model", - "model_size": 111.0 + "model_size": 111.0, + "model_architectures": null }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", @@ -6743,7 +7083,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "SakanaAI-EvoLLM-JP-A-v1-7B-gguf SakanaAIさんが公開しているEvoLLM-JP-A-v1-7Bのggufフォーマット変換版です。 ", @@ -6756,7 +7097,8 @@ "latest_commit": "2024-03-21 14:48:28", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "SpeechT5 (TTS task) for Japanese SpeechT5 model fine-tuned for Japanese speech synthesis (text-to-speech)", @@ -6769,7 +7111,8 @@ "latest_commit": "2023-08-09 09:25:38", "languages": [], "model_or_dataset": "model", - "model_size": 0.14400000000000002 + "model_size": 0.14400000000000002, + "model_architectures": "SpeechT5ForTextToSpeech" }, { "description": "Python library for CJK (Chinese, Japanese, and Korean) language dictionary", @@ -6808,7 +7151,8 @@ "latest_commit": "2024-05-01 19:16:01", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-CodeLlama-7b-gguf ELYZAさんが公開しているELYZA-japanese-CodeLlama-7b-instructのggufフォーマット変換版です。 ", @@ -6821,7 +7165,8 @@ "latest_commit": "2023-11-16 14:28:03", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": null }, { "description": "upskyy/gte-korean-base This model is korsts and kornli finetuning model from Alibaba-NLP/gte-multilingual-base.", @@ -6834,7 +7179,8 @@ "latest_commit": "2024-08-08 15:29:27", "languages": [], "model_or_dataset": "model", - "model_size": 0.305 + "model_size": 0.305, + "model_architectures": "NewModel" }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for VecTeus-v1.0 The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 VecTeus has the following changes compared to Mistral-7B-v0.1.", @@ -6847,7 +7193,8 @@ "latest_commit": "2024-05-04 04:07:22", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "DataPilot様の Llama3-ArrowSE-8B-v0.3 をGGUF形式に変換したものです。 ", @@ -6860,7 +7207,8 @@ "latest_commit": "2024-07-07 13:40:26", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Japanese CLIP ViT-H/14 (Wider) Table of Contents Overview Usage Model Details Evaluation Limitations and Biases Citation See Also Contact Information Overview Developed by:", @@ -6873,7 +7221,8 @@ "latest_commit": "2024-03-06 21:46:11", "languages": [], "model_or_dataset": "model", - "model_size": 0.91 + "model_size": 0.91, + "model_architectures": "CustomCLIPModel" }, { "description": "Model Card for Japanese BART large Model description", @@ -6886,7 +7235,8 @@ "latest_commit": "2023-05-12 11:05:03", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MBartForConditionalGeneration" }, { "description": "This is a model for named entity recognition of Japanese medical documents.", @@ -6899,7 +7249,8 @@ "latest_commit": "2024-02-26 13:53:06", "languages": [], "model_or_dataset": "model", - "model_size": 0.11 + "model_size": 0.11, + "model_architectures": "BertForTokenClassification" }, { "description": "Evaluation on MIRACL japanese These models don't train on the MIRACL training data.", @@ -6912,7 +7263,8 @@ "latest_commit": "2024-05-22 02:59:37", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "XLNet-japanese Model description This model require Mecab and senetencepiece with XLNetTokenizer.", @@ -6925,7 +7277,8 @@ "latest_commit": "2023-01-05 04:28:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "XLNetLMHeadModel" }, { "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive information japanese-stablelm-3b-4e1t-instruct - GGUF Model creator: stabilityai Original model: japanese-stablelm-3b-4e1t-instruct StableLM", @@ -6938,7 +7291,8 @@ "latest_commit": "2023-11-16 12:53:33", "languages": [], "model_or_dataset": "model", - "model_size": 2.8 + "model_size": 2.8, + "model_architectures": null }, { "description": "This is a Japanese sentence-LUKE model.", @@ -6951,7 +7305,8 @@ "latest_commit": "2023-10-05 05:13:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeModel" }, { "description": "Umievo-itr012-Gleipnir-7B-GGUF", @@ -6964,7 +7319,8 @@ "latest_commit": "2024-06-09 13:12:32", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "[Llama-3-EZO model card]", @@ -6977,7 +7333,8 @@ "latest_commit": "2024-08-23 10:52:05", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Wav2Vec2-Large-XLSR-53-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.", @@ -6990,7 +7347,8 @@ "latest_commit": "2023-02-08 00:36:47", "languages": [], "model_or_dataset": "model", - "model_size": 0.316 + "model_size": 0.316, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -7003,7 +7361,8 @@ "latest_commit": "2023-11-06 12:14:36", "languages": [], "model_or_dataset": "model", - "model_size": 69.0 + "model_size": 69.0, + "model_architectures": null }, { "description": "Dataset Summary This is the Business Scene Dialogue (BSD) dataset, a Japanese-English parallel corpus containing written conversations in various business scenarios.", @@ -7016,7 +7375,8 @@ "latest_commit": "2024-01-11 07:36:44", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "HODACHI様の Llama-3-EZO-8b-Common-it をGGUF形式に変換したものです。 ", @@ -7029,7 +7389,8 @@ "latest_commit": "2024-07-15 20:08:22", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Mistral-Large-Instruct-2407-gguf mistralaiさんが公開しているMistral-Large-Instruct-2407のggufフォーマット変換版です。 ", @@ -7042,7 +7403,8 @@ "latest_commit": "2024-07-26 12:21:45", "languages": [], "model_or_dataset": "model", - "model_size": 123.0 + "model_size": 123.0, + "model_architectures": null }, { "description": "概要 このモデルはllama3.1-8B-instructをもとに日本語性能を高めることを目的にMergekit&ファインチューニングを用いて作成されました。 ", @@ -7055,7 +7417,8 @@ "latest_commit": "2024-07-24 12:00:46", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "llm-japanese-dataset LLM構築用の日本語インストラクション(チャット)データセット 主に,英語で構築されたLLMモデルなどに対して,チャット(Instruction)応答タスクに関してLoRAなどでチューニングするために使用できます. ", @@ -7068,7 +7431,8 @@ "latest_commit": "2024-01-18 13:42:50", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Rakuda - Questions for Japanese models Repository:", @@ -7081,7 +7445,8 @@ "latest_commit": "2023-06-23 08:01:35", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "shisa-7b-v1-gguf augmxntさんが公開しているshisa-7b-v1のggufフォーマット変換版です。 ", @@ -7094,7 +7459,8 @@ "latest_commit": "2023-12-10 12:24:25", "languages": [], "model_or_dataset": "model", - "model_size": 7.96 + "model_size": 7.96, + "model_architectures": null }, { "description": "SakanaAI-EvoLLM-JP-v1-7B-gguf SakanaAIさんが公開しているEvoLLM-JP-v1-7Bのggufフォーマット変換版です。 ", @@ -7107,7 +7473,8 @@ "latest_commit": "2024-03-21 14:41:04", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "モデル説明 (model explanation) CoolJapanDiffusion 2.1.1とWaifuDiffusion 1.4 anime epoch2のマージ。", @@ -7120,7 +7487,8 @@ "latest_commit": "2023-01-22 19:16:59", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", @@ -7133,7 +7501,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": 1.64 + "model_size": 1.64, + "model_architectures": null }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Ninja-v1-RP-expressiveの量子化済みGGUF版です。", @@ -7146,7 +7515,8 @@ "latest_commit": "2024-05-24 15:11:25", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Japanese Laws This dataset comprises 8.75K law records retrieved from the official Japanese government website e-Gov. ", @@ -7159,7 +7529,8 @@ "latest_commit": "2023-07-20 06:45:14", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "記事本文からタイトルを生成するモデル SEE: https://qiita.com/sonoisa/items/a9af64ff641f0bbfed44", @@ -7172,7 +7543,8 @@ "latest_commit": "2022-02-21 13:38:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "QuantFactory/shisa-7b-v1-GGUF This is quantized version of augmxnt/shisa-base-7b-v1 created using llama.cpp Model Description shisa-base-7b-v1 takes Mistral 7B and adds an additional 8B tokens of primarily Japanese pre-training.", @@ -7185,7 +7557,8 @@ "latest_commit": "2024-06-18 05:53:41", "languages": [], "model_or_dataset": "model", - "model_size": 7.96 + "model_size": 7.96, + "model_architectures": null }, { "description": "Additional pretrained BERT base Japanese finance This is a BERT model pretrained on texts in the Japanese language.", @@ -7198,7 +7571,8 @@ "latest_commit": "2022-12-09 00:40:25", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForPreTraining" }, { "description": "Anime with caption CC-0 dataset このデータセットはイラストに対する日本語キャプションを 倫理的に学習しやすくするためのデータセットです。 ", @@ -7211,7 +7585,8 @@ "latest_commit": "2024-06-03 05:49:20", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Model Card for Tanrei/GPTSAN-japanese General-purpose Swich transformer based Japanese language model GPTSAN has some unique features.", @@ -7224,7 +7599,8 @@ "latest_commit": "2023-04-21 19:04:49", "languages": [], "model_or_dataset": "model", - "model_size": 2.78 + "model_size": 2.78, + "model_architectures": "GPTSanJapaneseForConditionalGeneration" }, { "description": "JA-VG-VQA-500 Dataset Description JA-VG-VQA-500 is a 500-sample subset of Japanese Visual Genome VQA dataset.", @@ -7237,7 +7613,8 @@ "latest_commit": "2024-05-14 04:11:31", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "モデルの説明(English explanation is below.", @@ -7250,7 +7627,8 @@ "latest_commit": "2024-06-28 05:56:23", "languages": [], "model_or_dataset": "model", - "model_size": 13.7 + "model_size": 13.7, + "model_architectures": null }, { "description": "DataPilot様の ArrowPro-7B-KUJIRA をGGUF形式に変換したものです。 ", @@ -7263,7 +7641,8 @@ "latest_commit": "2024-05-09 23:32:52", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", @@ -7276,7 +7655,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": 0.41400000000000003 + "model_size": 0.41400000000000003, + "model_architectures": null }, { "description": "Japanese Anime Speech Dataset 日本語はこちら japanese-anime-speech is an audio-text dataset designed for the training of automatic speech recognition models.", @@ -7289,7 +7669,8 @@ "latest_commit": "2024-06-30 10:06:34", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Local-Novel-LLM-project様の Assistance をGGUF形式に変換したものです。 ", @@ -7302,7 +7683,8 @@ "latest_commit": "2024-05-04 07:48:41", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "QuantFactory/shisa-gamma-7b-v1-GGUF", @@ -7315,7 +7697,8 @@ "latest_commit": "2024-06-18 06:17:30", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Sakura_dataset 商用利用可能な超小規模高品質日本語データセット。 ", @@ -7328,7 +7711,8 @@ "latest_commit": "2023-06-08 11:31:06", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Model card for model ID", @@ -7341,7 +7725,8 @@ "latest_commit": "2023-05-10 10:01:29", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "range3/wiki40b-ja This dataset consists of three parquet files from the wiki40b dataset with only Japanese data extracted.", @@ -7354,7 +7739,8 @@ "latest_commit": "2023-02-04 05:44:21", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Hibiki ASR Phonemizer This model is a Phoneme Level Speech Recognition network, originally a fine-tuned version of openai/whisper-large-v3 on a mixture of Different Japanese datasets.", @@ -7367,7 +7753,8 @@ "latest_commit": "2024-08-19 18:13:01", "languages": [], "model_or_dataset": "model", - "model_size": 1.54 + "model_size": 1.54, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "Stanza model for Japanese (ja)", @@ -7380,7 +7767,8 @@ "latest_commit": "2024-07-31 05:09:43", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Japanese-Starling-ChatV-7B このモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", @@ -7393,7 +7781,8 @@ "latest_commit": "2024-04-14 15:26:06", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "Deepreneur-blue-lizard-gguf Deepreneurさんが公開しているblue-lizardのggufフォーマット変換版です。 ", @@ -7406,7 +7795,8 @@ "latest_commit": "2024-02-13 16:26:26", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": null }, { "description": "Vecteus-V2-7B このモデルは、ベクトルマージなどを用い作成された高性能ベースモデルです。 ", @@ -7419,7 +7809,8 @@ "latest_commit": "2024-07-06 13:39:41", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "DeBERTa V2 base Japanese This is a DeBERTaV2 model pretrained on Japanese texts.", @@ -7432,7 +7823,8 @@ "latest_commit": "2024-07-19 03:07:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Oumuamua-7b-instruct-GGUF This is quantized version of nitky/Oumuamua-7b-instruct created using llama.cpp Model Description This is a merge of pre-trained language models created using mergekit. ", @@ -7445,7 +7837,8 @@ "latest_commit": "2024-06-19 11:40:58", "languages": [], "model_or_dataset": "model", - "model_size": 7.33 + "model_size": 7.33, + "model_architectures": null }, { "description": "databricks-dolly-15k-ja This repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", @@ -7458,7 +7851,8 @@ "latest_commit": "2024-01-30 18:09:37", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "GitHub リポジトリ singletongue/wikipedia-utils で公開されているデータセットを利用しています。 ", @@ -7471,7 +7865,8 @@ "latest_commit": "2023-10-25 15:22:05", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF Model creator: MaziyarPanahi Original model: MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 Description MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1.", @@ -7484,7 +7879,8 @@ "latest_commit": "2024-01-28 16:24:30", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", @@ -7497,7 +7893,8 @@ "latest_commit": "2022-02-10 07:50:15", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "VisionEncoderDecoderModel" }, { "description": "Llama3ベースの日本語医療LLM MedLlama3-JP このモデルはLlama3の継続学習により作成された4種類のLLMから成るマージモデルです。 ", @@ -7510,7 +7907,8 @@ "latest_commit": "2024-07-13 06:12:43", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Local-Novel-LLM-project様の Vecteus-V2-7B をGGUF形式に変換したものです。 ", @@ -7523,7 +7921,8 @@ "latest_commit": "2024-06-16 11:32:15", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Kotoba-Speech-v0.1 Kotoba-Speech v0.1 is a 1.2B Transformer-based speech generative model.", @@ -7536,7 +7935,8 @@ "latest_commit": "2024-04-17 07:54:48", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Japanese-WizardLM2-ChatV-7B-GGUF GGUF conversion of \"Japanese-WizardLM2-ChatV-7B\" This model, Japanese-WizardLM2-ChatV-7B, is based on \"chatntq-ja-7b-v1.0 \", and was created by subtracting \"Mistral-7B-v0.1\" from \"WizardLM-2-7b\" ChatVector was added by a factor of 1.0.", @@ -7549,7 +7949,8 @@ "latest_commit": "2024-04-17 01:41:16", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", @@ -7562,7 +7963,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": 7.57 + "model_size": 7.57, + "model_architectures": null }, { "description": "MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF Model creator: MaziyarPanahi Original model: MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 Description MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1.", @@ -7575,7 +7977,8 @@ "latest_commit": "2024-01-26 06:36:22", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "What’s this?", @@ -7588,7 +7991,8 @@ "latest_commit": "2024-07-05 05:49:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "oasst2-33k-ja This repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", @@ -7601,7 +8005,8 @@ "latest_commit": "2024-04-28 16:39:03", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "モデル概要 AWSのtrn1インスタンスを用いて開発した大喜利言語モデルです。", @@ -7614,7 +8019,8 @@ "latest_commit": "2024-03-04 05:21:14", "languages": [], "model_or_dataset": "model", - "model_size": 5.83 + "model_size": 5.83, + "model_architectures": "GPT2LMHeadModel" }, { "description": "MobileBERT 日本語事前学習済みモデル爆誕!! ", @@ -7627,7 +8033,8 @@ "latest_commit": "2022-01-24 05:25:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "mbpp-ja", @@ -7640,7 +8047,8 @@ "latest_commit": "2024-04-20 06:26:51", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Local-Novel-LLM-project様の Ninja-V3 をGGUF形式に変換したものです。 ", @@ -7653,7 +8061,8 @@ "latest_commit": "2024-07-03 16:59:05", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "DataPilot様の ArrowPro-7B-RobinHood をGGUF形式に変換したものです。 ", @@ -7666,7 +8075,8 @@ "latest_commit": "2024-05-10 18:14:28", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "line-corporation/japanese-large-lm-3.6b-instruction-sft line-corporationさんが公開しているjapanese-large-lm-3.6b-instruction-sftのgguf変換版です。 ", @@ -7679,7 +8089,8 @@ "latest_commit": "2023-09-08 02:52:29", "languages": [], "model_or_dataset": "model", - "model_size": 3.71 + "model_size": 3.71, + "model_architectures": null }, { "description": "Tanuki-ZeRo-gguf kanhatakeyamaさんが公開しているTanuki-ZeRoのggufフォーマット変換版です。 ", @@ -7692,7 +8103,8 @@ "latest_commit": "2024-03-30 17:01:16", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": null }, { "description": "mathstral-7B-v0.1-gguf mistralaiさんが公開しているmathstral-7B-v0.1のggufフォーマット変換版です。 ", @@ -7705,7 +8117,8 @@ "latest_commit": "2024-07-17 18:54:27", "languages": [], "model_or_dataset": "model", - "model_size": 7.25 + "model_size": 7.25, + "model_architectures": null }, { "description": "WRIME-fine-tuned BERT base Japanese This model is a Japanese BERTBASE fine-tuned on the WRIME dataset.", @@ -7718,7 +8131,8 @@ "latest_commit": "2023-03-22 08:11:34", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "Local-Novel-LLM-project様の Ninja-V2-7B をGGUF形式に変換したものです。 ", @@ -7731,7 +8145,8 @@ "latest_commit": "2024-06-15 21:25:59", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "c4ai-command-r-v01-japanese-instruct-GGUF 概要 Aratako/c4ai-command-r-v01-japanese-instructの量子化済みGGUF版です。", @@ -7744,7 +8159,8 @@ "latest_commit": "2024-04-07 03:19:34", "languages": [], "model_or_dataset": "model", - "model_size": 35.0 + "model_size": 35.0, + "model_architectures": null }, { "description": "umiyuki様の Japanese-Chat-Umievo-itr004-7b をGGUF形式に変換したものです。 ", @@ -7757,7 +8173,8 @@ "latest_commit": "2024-05-13 23:33:49", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Ninja-v1-RP-GGUF 概要 Aratako/Ninja-v1-RPの量子化済みGGUF版です。", @@ -7770,7 +8187,8 @@ "latest_commit": "2024-05-24 15:11:08", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "GitHub リポジトリ cl-tohoku/quiz-datasets で公開されているデータセットを利用しています。 ", @@ -7783,7 +8201,8 @@ "latest_commit": "2023-10-25 15:31:08", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Orion-14B 🌐English | 🇨", @@ -7796,7 +8215,8 @@ "latest_commit": "2024-03-26 10:04:46", "languages": [], "model_or_dataset": "model", - "model_size": 2.69 + "model_size": 2.69, + "model_architectures": "OrionForCausalLM" }, { "description": "What’s this?", @@ -7809,7 +8229,8 @@ "latest_commit": "2024-07-05 05:50:06", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "更新履歴 2023年5月7日 「oasst1-89k-ja」データセットを追加して対話システムに対応しました。", @@ -7822,7 +8243,8 @@ "latest_commit": "2023-08-01 07:55:27", "languages": [], "model_or_dataset": "model", - "model_size": 1.33 + "model_size": 1.33, + "model_architectures": "GPT2LMHeadModel" }, { "description": "ryota39様の Tora-7B-v0.1 をGGUF形式に変換したものです。 ", @@ -7835,7 +8257,8 @@ "latest_commit": "2024-06-15 03:16:21", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "275.86Mのmixtralを日本語データセットでpretrainingしたものです sample from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained(\"if001/tiny_mixtral_ja\")", @@ -7848,7 +8271,8 @@ "latest_commit": "2024-01-23 00:42:05", "languages": [], "model_or_dataset": "model", - "model_size": 0.276 + "model_size": 0.276, + "model_architectures": "MixtralForCausalLM" }, { "description": "abc-multiple-choice Dataset abc-multiple-choice は、競技クイズの大会「abc」で使用された4択問題を元に作成された、多肢選択式の質問応答データセットです。 ", @@ -7861,7 +8285,8 @@ "latest_commit": "2024-03-12 07:32:13", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Oumuamua-7b-RPの量子化済みGGUF版です。", @@ -7874,7 +8299,8 @@ "latest_commit": "2024-06-23 14:45:14", "languages": [], "model_or_dataset": "model", - "model_size": 7.33 + "model_size": 7.33, + "model_architectures": null }, { "description": "deberta-base-japanese-aozora-ud-head Model Description", @@ -7887,7 +8313,8 @@ "latest_commit": "2023-03-04 20:10:16", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "[github].", @@ -7900,7 +8327,8 @@ "latest_commit": "2023-05-19 12:54:13", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "whisper-large-v3-japanese-4k-steps This model is a fine-tuned version of openai/whisper-large-v3 on the Common Voice 16.1 dataset.", @@ -7913,7 +8341,8 @@ "latest_commit": "2024-02-18 01:31:35", "languages": [], "model_or_dataset": "model", - "model_size": 1.54 + "model_size": 1.54, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "Model Card for Japanese character-level GPT-2 Medium Model description This is a Japanese character-level GPT-2 Medium (310M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", @@ -7926,7 +8355,8 @@ "latest_commit": "2023-06-08 05:34:26", "languages": [], "model_or_dataset": "model", - "model_size": 0.335 + "model_size": 0.335, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Model card for model ID", @@ -7939,7 +8369,8 @@ "latest_commit": "2023-05-10 10:00:54", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "lightblue-Karasu-Mixtral-8x22B-v0.1-gguf lightblueさんが公開しているKarasu-Mixtral-8x22B-v0.1のggufフォーマット変換版です。 ", @@ -7952,7 +8383,8 @@ "latest_commit": "2024-05-07 18:07:43", "languages": [], "model_or_dataset": "model", - "model_size": 141.0 + "model_size": 141.0, + "model_architectures": null }, { "description": "Japanese-TextGen-Kage-v0.1-2x7B Kage is \"影\" in Japanese or \"Shadow\" in English.", @@ -7965,7 +8397,8 @@ "latest_commit": "2024-05-19 08:54:19", "languages": [], "model_or_dataset": "model", - "model_size": 12.9 + "model_size": 12.9, + "model_architectures": null }, { "description": "Cross-Encoder for Natural Language Inference(NLI) for Japanese Considering the results of the JNLI evaluation result, we recommend using akiFQC/bert-base-japanese-v3_nli-jsnli-jnli-jsick for natural language inference in Japanese.", @@ -7978,7 +8411,8 @@ "latest_commit": "2024-04-26 06:27:05", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "To load a language pair which isn't part of the config, all you need to do is specify the language code as pairs.", @@ -7991,7 +8425,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Japanese ELECTRA-small We provide a Japanese ELECTRA-Small model, as described in ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators.", @@ -8004,7 +8439,8 @@ "latest_commit": "2020-12-11 22:26:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "ElectraForPreTraining" }, { "description": "Dataset 5M (5121625) clean Japanese full sentence with the context.", @@ -8017,7 +8453,8 @@ "latest_commit": "2023-07-11 12:22:09", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "ryota39様の Tora-7B-v0.2 をGGUF形式に変換したものです。 ", @@ -8030,7 +8467,8 @@ "latest_commit": "2024-06-15 03:17:32", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "AIBunCho/japanese-novel-gpt-j-6b AI BunChoで利用しているモデルです。", @@ -8043,7 +8481,8 @@ "latest_commit": "2023-08-26 04:20:51", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTJForCausalLM" }, { "description": "モデルの説明(English explanation is below.", @@ -8056,7 +8495,8 @@ "latest_commit": "2024-07-04 07:20:41", "languages": [], "model_or_dataset": "model", - "model_size": 13.7 + "model_size": 13.7, + "model_architectures": null }, { "description": "Finetuned Waseda RoBERTa to evaluate the generated answers on JTruthfulQA.", @@ -8069,7 +8509,8 @@ "latest_commit": "2023-12-06 04:31:12", "languages": [], "model_or_dataset": "model", - "model_size": 0.337 + "model_size": 0.337, + "model_architectures": "RobertaForSequenceClassification" }, { "description": "Japanese stopwords for nagisa", @@ -8082,7 +8523,8 @@ "latest_commit": "2023-08-07 02:58:31", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "doc2query/msmarco-japanese-mt5-base-v1 This is a doc2query model based on mT5 (also known as docT5query).", @@ -8095,7 +8537,8 @@ "latest_commit": "2022-04-29 14:05:37", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MT5ForConditionalGeneration" }, { "description": "Japanese ELECTRA-small We provide a Japanese ELECTRA-Small model, as described in ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators.", @@ -8108,7 +8551,8 @@ "latest_commit": "2020-12-11 22:26:17", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "ElectraForMaskedLM" }, { "description": "Japanese Anime Speech Dataset V2 日本語はこちら japanese-anime-speech-v2 is an audio-text dataset designed for training automatic speech recognition models.", @@ -8121,7 +8565,8 @@ "latest_commit": "2024-07-24 19:06:51", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "https://huggingface.co./kotoba-tech/kotoba-whisper-v1.1 上記のモデルを訓練し、アダルト用語を認識できるようにしたものです。", @@ -8134,7 +8579,8 @@ "latest_commit": "2024-07-24 10:29:47", "languages": [], "model_or_dataset": "model", - "model_size": 0.756 + "model_size": 0.756, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "reazonspeech-espnet-next ReazonSpeech is a project to maintain freely-available Japanese audio datasets and ML models.", @@ -8147,7 +8593,8 @@ "latest_commit": "2023-03-29 17:28:01", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "ELYZA-japanese-CodeLlama-7b Model Description ELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", @@ -8160,7 +8607,8 @@ "latest_commit": "2023-11-15 00:38:12", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "Swallow-8Bは追加の日本語継続事前学習により日本語が大変流暢なLlama-3派生モデルです。", @@ -8173,7 +8621,8 @@ "latest_commit": "2024-07-24 04:03:21", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Llama-3-8B-Instruct-JP-nk2t-v0.2 Model Details: Built with Meta Llama 3", @@ -8186,7 +8635,8 @@ "latest_commit": "2024-05-15 12:56:34", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Dataset Summary SNOW T15:The simplified corpus for the Japanese language.", @@ -8199,7 +8649,8 @@ "latest_commit": "2024-01-18 11:16:01", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "LLM-jp Toxicity Dataset 日本語有害文書データセット「LLM-jp Toxicity Dataset」 See https://gitlab.llm-jp.nii.ac.jp/datasets/llm-jp-toxicity-dataset", @@ -8212,7 +8663,8 @@ "latest_commit": "2024-08-07 07:21:07", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "GGUF conversion of NTQAI/chatntq-ja-7b-v1.0 ChatNTQ-JA-7b-v1.0 is a Japanese chat fine-tuned model built on top of the stabilityai/japanese-stablelm-base-gamma-7b, which is originally based on Mistral 7B v0.1.", @@ -8225,7 +8677,8 @@ "latest_commit": "2024-04-04 23:10:54", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "Wikidata parallel descriptions en-ja Parallel corpus for machine translation generated from wikidata dump (2024-05-06).", @@ -8238,7 +8691,8 @@ "latest_commit": "2024-05-17 00:25:10", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "alpaca-guanaco-japanese-gpt-1b 1.3Bパラメータの日本語GPTモデルを使用した対話AIです。", @@ -8251,7 +8705,8 @@ "latest_commit": "2023-04-13 10:25:48", "languages": [], "model_or_dataset": "model", - "model_size": 1.33 + "model_size": 1.33, + "model_architectures": "GPT2LMHeadModel" }, { "description": "RoBERTa base Japanese - JaQuAD Description A Japanese Question Answering model fine-tuned on JaQuAD.", @@ -8264,7 +8719,8 @@ "latest_commit": "2022-04-08 11:38:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForQuestionAnswering" }, { "description": "Model Card for Japanese DeBERTa V2 large Model description This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", @@ -8277,7 +8733,8 @@ "latest_commit": "2023-05-12 14:10:35", "languages": [], "model_or_dataset": "model", - "model_size": 0.373 + "model_size": 0.373, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Synthetic-JP-Conversations-Magpie-Nemotron-4-10k Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、約10000件の日本語instruction tuning用データセットです。 ", @@ -8290,7 +8747,8 @@ "latest_commit": "2024-07-05 13:57:08", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Dataset Details Dataset Type:Japanese LLaVA Instruct 150K is a localized version of the original LLaVA Visual Instruct 150K dataset.", @@ -8303,7 +8761,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "We provide an Amazon product reviews dataset for multilingual text classification.", @@ -8316,7 +8775,8 @@ "latest_commit": "2023-11-02 14:52:21", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "日本語 gpt2 蒸留モデル このモデルはrinna/japanese-gpt2-meduimを教師として蒸留したものです。 ", @@ -8329,7 +8789,8 @@ "latest_commit": "2022-04-15 06:00:51", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Model Card for Japanese character-level GPT-2 Small Model description This is a Japanese character-level GPT-2 Small (90M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", @@ -8342,7 +8803,8 @@ "latest_commit": "2023-05-08 10:08:13", "languages": [], "model_or_dataset": "model", - "model_size": 0.10300000000000001 + "model_size": 0.10300000000000001, + "model_architectures": "GPT2LMHeadModel" }, { "description": "albert-base-japanese-v1-with-japanese 日本語事前学習済みALBERTモデルですこのモデルではTokenizerにBertJapaneseTokenizerクラスを利用していますalbert-base-japanese-v1よりトークナイズ処理が楽になっています How to use ファインチューニング このモデルはPreTrainedモデルです基本的には各種タスク用にファインチューニングして使用されることを想定しています Fill-Mask for PyTorch from transformers import ( AutoModelForMaskedLM, AutoTokenizer ) tokenizer = AutoTokenizer.from_pretrained(\"ken11/albert-base-japanese-v1-with-japanese-tokenizer\")", @@ -8355,7 +8817,8 @@ "latest_commit": "2022-04-21 02:28:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "AlbertForMaskedLM" }, { "description": "Orion-14B 🌐English | 🇨", @@ -8368,7 +8831,8 @@ "latest_commit": "2024-03-07 19:33:53", "languages": [], "model_or_dataset": "model", - "model_size": 14.5 + "model_size": 14.5, + "model_architectures": null }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", @@ -8381,7 +8845,8 @@ "latest_commit": "2022-08-27 09:21:01", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Model card for model ID", @@ -8394,7 +8859,8 @@ "latest_commit": "2023-05-10 10:01:16", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "sonoisa/t5-base-japaneseをファインチューニングして、タイトル生成に用いれるようにしたモデルです。 ", @@ -8407,7 +8873,8 @@ "latest_commit": "2023-07-21 14:11:13", "languages": [], "model_or_dataset": "model", - "model_size": 0.223 + "model_size": 0.223, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "このモデルはluke-japanese-base-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。 ", @@ -8420,7 +8887,8 @@ "latest_commit": "2023-07-21 14:11:02", "languages": [], "model_or_dataset": "model", - "model_size": 0.132 + "model_size": 0.132, + "model_architectures": "LukeForQuestionAnswering" }, { "description": "Japanese-LLaMA-3-8B Japanese-LLaMA-3-8Bは基盤モデル、フルモデルです。 ", @@ -8433,7 +8901,8 @@ "latest_commit": "2024-06-21 06:35:41", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Orion-14B 🌐English | 🇨", @@ -8446,7 +8915,8 @@ "latest_commit": "2024-03-26 10:08:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "OrionForCausalLM" }, { "description": "Dataset overview This dataset identifies whether a GitHub repository description pertains to Japanese natural language processing (NLP).", @@ -8459,7 +8929,8 @@ "latest_commit": "2023-09-09 20:09:04", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "recruit-jp/japanese-typo-detector-roberta-base モデルの概要 日本語の文章を入力すると各文字ごとに誤字脱字である確率を出力します 各ラベルの意味は以下の通りです id label meaning 0 OK 誤字なし 1 deletion 1文字の抜け 2 insertion_a 余分な1文字の挿入 3 insertion_b 直前の文字列と一致する2文字以上の余分な文字の挿入 4 kanji-conversion_a 同一の読みを持つ漢字の入れ替え(誤変換) 5 kanji-conversion_b 近い読みを持つ漢字の入れ替え(誤変換) 6 substitution 1文字の入れ替え 7 transposition 隣接する2文字間の転置 8 others その他の入力誤り 誤り種類の詳細については学習データセットの元論文をご参照ください 日本語 Wikipedia の編集履歴に基づく 入力誤りデータセットと訂正システムの改良 その他、モデルの詳細については当社ブログ記事をご参照ください 誤字脱字検出モデルをHugging Face Hubに公開しました (Re", @@ -8472,7 +8943,8 @@ "latest_commit": "2023-12-21 03:07:31", "languages": [], "model_or_dataset": "model", - "model_size": 0.0996 + "model_size": 0.0996, + "model_architectures": "RobertaForTokenClassification" }, { "description": "alabnii/jmedroberta-base-sentencepiece-vocab50000 Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", @@ -8485,7 +8957,8 @@ "latest_commit": "2023-06-27 03:44:17", "languages": [], "model_or_dataset": "model", - "model_size": 0.124 + "model_size": 0.124, + "model_architectures": "BertForMaskedLM" }, { "description": "Wikipediaを用いた日本語の固有表現抽出データセット GitHub: https://github.com/stockmarkteam/ner-wikipedia-dataset/ LICENSE: CC-BY-SA 3.0 Developed by Stockmark Inc.", @@ -8498,7 +8971,8 @@ "latest_commit": "2023-09-02 14:42:18", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Orion-14B 🌐English | 🇨", @@ -8511,7 +8985,8 @@ "latest_commit": "2024-03-26 10:10:34", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "OrionForCausalLM" }, { "description": "zenz-v1 zenz-v1はGPT-2アーキテクチャに基づくかな漢字変換タスクに特化した言語モデルです。", @@ -8524,7 +8999,8 @@ "latest_commit": "2024-05-13 16:34:02", "languages": [], "model_or_dataset": "model", - "model_size": 0.09509999999999999 + "model_size": 0.09509999999999999, + "model_architectures": "GPT2LMHeadModel" }, { "description": "概要 elyza/Llama-3-ELYZA-JP-8Bを元にchat vectorを用いて改良しAItuberに特化させました。 ", @@ -8537,7 +9013,8 @@ "latest_commit": "2024-07-07 14:18:02", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Llama 3 Youko 70B (rinna/llama-3-youko-70b)", @@ -8550,7 +9027,8 @@ "latest_commit": "2024-07-25 05:16:28", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": "LlamaForCausalLM" }, { "description": "Umievo-itr012-Gleipnir-7B このモデルは強力な4つの日本語モデルを進化的アルゴリズムで進化的マージしたものです。", @@ -8563,7 +9041,8 @@ "latest_commit": "2024-05-29 13:51:31", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "Heron BLIP Japanese StableLM", @@ -8576,7 +9055,8 @@ "latest_commit": "2024-02-27 13:59:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "VideoBlipForConditionalGeneration" }, { "description": "c4ai-command-r-v01-japanese-instruct GGUF版はこちら/Click here for the GGUF version 概要 CohereForAI/c4ai-command-r-v01を、ichikara-instructionを使って追加で日本語インストラクションチューニングを施したモデルです。 ", @@ -8589,7 +9069,8 @@ "latest_commit": "2024-04-07 15:18:37", "languages": [], "model_or_dataset": "model", - "model_size": 35.0 + "model_size": 35.0, + "model_architectures": "CohereForCausalLM" }, { "description": "Miwa-Keita/zenz-v1-checkpoints を optimum 用に ONNX に変換したモデルです。", @@ -8602,7 +9083,8 @@ "latest_commit": "2024-06-29 03:40:34", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Aerner LM-v2 事前学習から全部日本語で学習させたモデルのバージョン2です。 ", @@ -8615,7 +9097,8 @@ "latest_commit": "2023-06-09 16:08:47", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "OpenLlamaForCausalLM" }, { "description": "[EZO model card]", @@ -8628,7 +9111,8 @@ "latest_commit": "2024-08-23 10:56:47", "languages": [], "model_or_dataset": "model", - "model_size": 25.5 + "model_size": 25.5, + "model_architectures": "InternVLChatModel" }, { "description": "このデータセットについて このデータは、日本の官公庁のWebサイトに掲載されている「よくある質問」を手作業で抽出し、インストラクション用のデータセットとしたものです。 ", @@ -8641,7 +9125,8 @@ "latest_commit": "2024-02-29 02:51:20", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Japanese-LLaMA-2-13B-GGUF Japanese-LLaMA-2-13B-GGUFはJapanese-LLaMA-2-13BのGGUF形式です。 ", @@ -8654,7 +9139,8 @@ "latest_commit": "2023-12-26 11:45:15", "languages": [], "model_or_dataset": "model", - "model_size": 13.3 + "model_size": 13.3, + "model_architectures": null }, { "description": "Heron GIT Japanese StableLM", @@ -8667,7 +9153,8 @@ "latest_commit": "2024-05-02 07:55:57", "languages": [], "model_or_dataset": "model", - "model_size": 7.32 + "model_size": 7.32, + "model_architectures": "GitJapaneseStableLMAlphaForCausalLM" }, { "description": "japanese-large-lm-3.6b-instruction-sft-8bit-1g-actorder_True", @@ -8680,7 +9167,8 @@ "latest_commit": "2023-09-28 00:02:06", "languages": [], "model_or_dataset": "model", - "model_size": 1.17 + "model_size": 1.17, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Fine-tuned XLSR-53 large model for speech diarization in Japanese phone-call 2 speakers diarization model which was fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using phone-call data CallHome.", @@ -8693,7 +9181,8 @@ "latest_commit": "2023-05-10 00:32:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "Wav2Vec2ForAudioFrameClassification" }, { "description": "Japanese-LLaMA-2-7B-GGUF Japanese-LLaMA-2-7B-GGUFはJapanese-LLaMA-2-7BのGGUF形式です。 ", @@ -8706,7 +9195,8 @@ "latest_commit": "2024-06-05 02:30:01", "languages": [], "model_or_dataset": "model", - "model_size": 6.97 + "model_size": 6.97, + "model_architectures": null }, { "description": "Japanese-Alpaca-2-13B-GGUF Japanese-Alpaca-2-13B-GGUFはJapanese-Alpaca-2-13BのGGUF形式です。 ", @@ -8719,7 +9209,8 @@ "latest_commit": "2023-12-26 11:46:41", "languages": [], "model_or_dataset": "model", - "model_size": 13.3 + "model_size": 13.3, + "model_architectures": null }, { "description": "line-corporation/japanese-large-lm-3.6b line-corporationさんが公開しているjapanese-large-lm-3.6bのgguf変換版です。 ", @@ -8732,7 +9223,8 @@ "latest_commit": "2023-09-08 02:53:05", "languages": [], "model_or_dataset": "model", - "model_size": 3.71 + "model_size": 3.71, + "model_architectures": null }, { "description": "range3/cc100-ja This dataset consists of parquet files from the cc100 dataset with only the Japanese language extracted and sharded.", @@ -8745,7 +9237,8 @@ "latest_commit": "2023-02-04 05:43:32", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "AutoWikiQA 東工大が公開しているSwallow-MXを用いて、Wikipedia中のテキストを入力として「質問(query)」と「回答(answer)」を生成し、生成された質問と回答についてフィルタリングを行ったデータセットです。", @@ -8758,7 +9251,8 @@ "latest_commit": "2024-04-20 12:17:33", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Kanji Diffusion v1-4 Model Card Kanji Diffusion is a latent text-to-image diffusion model capable of hallucinating Kanji characters given any English prompt.", @@ -8771,7 +9265,8 @@ "latest_commit": "2024-08-16 12:14:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "This is a Japanese+English sentence-BERT model.", @@ -8784,7 +9279,8 @@ "latest_commit": "2022-05-08 03:29:28", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertModel" }, { "description": "自動生成Q&A 種々のデータソースから、MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです。 CC-BY系またはApatch-2.0のデータソースを改変して生成しています。 ", @@ -8797,7 +9293,8 @@ "latest_commit": "2024-05-19 03:22:08", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Oumuamua-7b-instruct-v2 🚨 If you want to avoid outputs that appear to be literal translations, please prompt this model to role-play as a Japanese person.", @@ -8810,7 +9307,8 @@ "latest_commit": "2024-06-19 22:29:07", "languages": [], "model_or_dataset": "model", - "model_size": 7.33 + "model_size": 7.33, + "model_architectures": "MistralForCausalLM" }, { "description": "GPT-2 small Japanese model This repository contains a GPT2-small model trained on Japanese Wikipedia dataset.", @@ -8823,7 +9321,8 @@ "latest_commit": "2021-09-27 20:50:17", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "ChatNTQ JA 7B V1.0 Model Description", @@ -8836,7 +9335,8 @@ "latest_commit": "2023-12-26 09:22:34", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "roberta_qa_japanese (Japanese caption : 日本語の (抽出型) 質問応答のモデル)", @@ -8849,7 +9349,8 @@ "latest_commit": "2024-07-12 00:00:07", "languages": [], "model_or_dataset": "model", - "model_size": 0.11 + "model_size": 0.11, + "model_architectures": "RobertaForQuestionAnswering" }, { "description": "t5-base-japanese-web-8k (with Byte-fallback, 8K) Description megagonlabs/t5-base-japanese-web-8k is a T5 (Text-to-Text Transfer Transformer) model pre-trained on Japanese web texts.", @@ -8862,7 +9363,8 @@ "latest_commit": "2023-07-04 07:05:38", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "The English document is here. ", @@ -8875,7 +9377,8 @@ "latest_commit": "2024-03-04 05:24:31", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "Model card for model ID", @@ -8888,7 +9391,8 @@ "latest_commit": "2023-05-10 10:00:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "モデル概要 このモデルは、 sonoisa/sentence-luke-japanese-base-lite をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました。 ", @@ -8901,7 +9405,8 @@ "latest_commit": "2024-03-24 12:35:36", "languages": [], "model_or_dataset": "model", - "model_size": 0.133 + "model_size": 0.133, + "model_architectures": "OffensivenessEstimationModel" }, { "description": "実験モデルです。", @@ -8914,7 +9419,8 @@ "latest_commit": "2024-06-15 14:58:10", "languages": [], "model_or_dataset": "model", - "model_size": 7.62 + "model_size": 7.62, + "model_architectures": "Qwen2ForCausalLM" }, { "description": "Japanese-LLaMA-3-8B-Instruct-v2 Japanese-LLaMA-3-8B-Instruct-v2は指示実行モデル、フルモデルです。 ", @@ -8927,7 +9433,8 @@ "latest_commit": "2024-06-21 06:35:31", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", @@ -8940,7 +9447,8 @@ "latest_commit": "2024-03-07 06:48:27", "languages": [], "model_or_dataset": "model", - "model_size": 6.95 + "model_size": 6.95, + "model_architectures": null }, { "description": "つくよみちゃんデータセットを用いて calm-2-7b-chat をファインチューニングしたモデルです。", @@ -8953,7 +9461,8 @@ "latest_commit": "2023-12-27 04:07:20", "languages": [], "model_or_dataset": "model", - "model_size": 7.01 + "model_size": 7.01, + "model_architectures": "LlamaForCausalLM" }, { "description": "bert-base-japanese-v3-jcommonsenseqa 「大規模言語モデル入門」の第5章で紹介している(多肢選択式質問応答)のモデルです。 ", @@ -8966,7 +9475,8 @@ "latest_commit": "2023-07-24 06:49:16", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMultipleChoice" }, { "description": "whisper-large-v2-mix-jp model for CTranslate2 This repository contains the conversion of vumichien/whisper-large-v2-mix-jp to the CTranslate2 model format.", @@ -8979,7 +9489,8 @@ "latest_commit": "2023-07-07 17:56:03", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "BERT base Japanese - JaQuAD Description A Japanese Question Answering model fine-tuned on JaQuAD.", @@ -8992,7 +9503,8 @@ "latest_commit": "2022-02-04 02:39:25", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForQuestionAnswering" }, { "description": "bert-japanese_finetuned-sentiment-analysis This model was trained from scratch on the Japanese Sentiment Polarity Dictionary dataset.", @@ -9005,7 +9517,8 @@ "latest_commit": "2023-03-31 13:13:37", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "nlp-waseda/bigbird-base-japanese Model description This is a Japanese BigBird base model pretrained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", @@ -9018,7 +9531,8 @@ "latest_commit": "2023-06-20 10:49:17", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BigBirdForMaskedLM" }, { "description": "BERT for Japanese Twitter", @@ -9031,7 +9545,8 @@ "latest_commit": "2024-08-09 12:24:35", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForMaskedLM" }, { "description": "Japanese to Korean translator Japanese to Korean translator model based on EncoderDecoderModel(bert-japanese+kogpt2)", @@ -9044,7 +9559,8 @@ "latest_commit": "2024-06-28 06:38:39", "languages": [], "model_or_dataset": "model", - "model_size": 0.265 + "model_size": 0.265, + "model_architectures": "EncoderDecoderModel" }, { "description": "japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", @@ -9057,7 +9573,8 @@ "latest_commit": "2023-09-27 23:54:44", "languages": [], "model_or_dataset": "model", - "model_size": 0.771 + "model_size": 0.771, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "自動生成Q&A データソースから、MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです。 Common Crawlをもとに生成しています。 ", @@ -9070,7 +9587,8 @@ "latest_commit": "2024-05-19 09:25:43", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "japanese-large-lm-1.7b-instruction-sft-8bit-1g-actorder_True", @@ -9083,7 +9601,8 @@ "latest_commit": "2023-09-29 03:09:03", "languages": [], "model_or_dataset": "model", - "model_size": 0.625 + "model_size": 0.625, + "model_architectures": "GPT2LMHeadModel" }, { "description": "japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", @@ -9096,7 +9615,8 @@ "latest_commit": "2023-09-29 03:19:23", "languages": [], "model_or_dataset": "model", - "model_size": 0.446 + "model_size": 0.446, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Original Model Optical character recognition for Japanese text, with the main focus being Japanese manga.", @@ -9109,7 +9629,8 @@ "latest_commit": "2024-06-03 05:10:11", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "VisionEncoderDecoderModel" }, { "description": "japanese-large-lm-3.6b-instruction-sft-4bit-32g-actorder_False", @@ -9122,7 +9643,8 @@ "latest_commit": "2023-09-27 23:56:05", "languages": [], "model_or_dataset": "model", - "model_size": 0.861 + "model_size": 0.861, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanpp Model description", @@ -9135,7 +9657,8 @@ "latest_commit": "2022-10-21 15:56:38", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "Oumuamua-7b-instruct This is a merge of pre-trained language models created using mergekit. ", @@ -9148,7 +9671,8 @@ "latest_commit": "2024-06-01 15:55:51", "languages": [], "model_or_dataset": "model", - "model_size": 7.33 + "model_size": 7.33, + "model_architectures": "MistralForCausalLM" }, { "description": "Japanese InstructBLIP Alpha Model Details Japanese InstructBLIP Alpha is a vision-language instruction-following model that enables to generate Japanese descriptions for input images and optionally input texts such as questions.", @@ -9161,7 +9685,8 @@ "latest_commit": "2023-11-17 03:57:41", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "JapaneseInstructBlipAlphaForConditionalGeneration" }, { "description": "名言推論モデル", @@ -9174,7 +9699,8 @@ "latest_commit": "2021-10-26 01:19:59", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "roberta-long-japanese (jumanpp + sentencepiece, mC4 Japanese)", @@ -9187,7 +9713,8 @@ "latest_commit": "2022-10-04 23:36:27", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "Heron BLIP Japanese StableLM", @@ -9200,7 +9727,8 @@ "latest_commit": "2024-02-27 13:57:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "VideoBlipForConditionalGeneration" }, { "description": "bert-base-japanese-v3-bpr-question-aio 「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRの質問エンコーダです。 ", @@ -9213,7 +9741,8 @@ "latest_commit": "2023-07-24 07:12:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertModel" }, { "description": "jpn-heb source group: Japanese target group:", @@ -9226,7 +9755,8 @@ "latest_commit": "2023-08-16 11:59:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MarianMTModel" }, { "description": "bert-base-japanese-jsnli This model is a fine-tuned version of cl-tohoku/bert-base-japanese-v2 on the JSNLI dataset.", @@ -9239,7 +9769,8 @@ "latest_commit": "2022-10-18 12:13:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "モデル ベースモデル:microsoft/Phi-3-mini-4k-instruct 学習データセット:llm-jp/hh-rlhf-12k-ja 学習方式:フルパラメータチューニング サンプル import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", trust_remote_code=True, ) model = AutoModelForCausalLM.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", device_map=\"auto\", torch_dtype='auto', trust_remote_code=True, ) text = \"<|user|>\\n与えられた質問に対して英語で思考し、日本語で答えてください。", @@ -9252,7 +9783,8 @@ "latest_commit": "2024-05-01 07:41:46", "languages": [], "model_or_dataset": "model", - "model_size": 3.82 + "model_size": 3.82, + "model_architectures": "Phi3ForCausalLM" }, { "description": "275.86Mのmixtralを日本語データセットでpretrainingしたものです sample from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained(\"if001/tiny_mixtral_ja\")", @@ -9265,7 +9797,8 @@ "latest_commit": "2024-07-20 05:33:38", "languages": [], "model_or_dataset": "model", - "model_size": 0.276 + "model_size": 0.276, + "model_architectures": "MixtralForCausalLM" }, { "description": "Synthetic-JP-10-Turns-Roleplay-Dialogues-Nemotron-4-1k nvidia/Nemotron-4-340B-Instructを用いて作成した、約1000件・各10ターンの日本語ロールプレイの対話を収録した合成対話データセットです。 ", @@ -9278,7 +9811,8 @@ "latest_commit": "2024-07-03 13:53:20", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "One more step before getting this model.", @@ -9291,7 +9825,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "studio-ousia/luke-japanese-baseに対して次の変更を加えたモデルです。 ", @@ -9304,7 +9839,8 @@ "latest_commit": "2023-11-28 13:35:07", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeForMaskedLM" }, { "description": "Overview of bert-japanese-12M The bert-japanese-12M model is a transformer-based model with BERT architecture, which is designed to be used on Japanese text.", @@ -9317,7 +9853,8 @@ "latest_commit": "2024-08-19 02:56:14", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "📄 ライセンス / License 修正 CreativeML OpenRAIL-M ライセンス / Modified CreativeML OpenRAIL-M license このモデルのクレジットを入れずに使用する Use the model without crediting the creator このモデルで生成した画像を商用利用する Sell images they generate このモデルを商用の画像生成サービスで利用する Run on services that generate images for money このモデルを使用したマージモデルを共有する Share merges using this model このモデル、またはこのモデルをマージしたモデルを販売する Sell this model or merges using this model このモデルをマージしたモデルに異なる権限を設定する Have different permissions when sharing merges", @@ -9330,7 +9867,8 @@ "latest_commit": "2023-08-25 07:32:49", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "COMET-GPT2 ja Finetuned GPT-2 on ATOMIC ja using a causal language modeling (CLM) objective.", @@ -9343,7 +9881,8 @@ "latest_commit": "2023-02-13 10:26:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "nlp-waseda/roberta-large-japanese Model description This is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.", @@ -9356,7 +9895,8 @@ "latest_commit": "2022-10-21 14:48:46", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "自動生成Q&A データソースから、MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです。 チームで作成したデータおよび「Common Crawlをもとに生成しています。 ", @@ -9369,7 +9909,8 @@ "latest_commit": "2024-05-19 14:17:58", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Asian Language Treebank (ALT) Project ALT Parallel Corpusのうち、日英対訳部分のみを抽出したデータセットです。", @@ -9382,7 +9923,8 @@ "latest_commit": "2024-03-21 12:40:15", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "japanese-large-lm-1.7b-instruction-sft-4bit-32g-actorder_False", @@ -9395,7 +9937,8 @@ "latest_commit": "2023-09-27 01:23:34", "languages": [], "model_or_dataset": "model", - "model_size": 0.487 + "model_size": 0.487, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Model Card for Japanese character-level GPT-2 Large Model description", @@ -9408,7 +9951,8 @@ "latest_commit": "2023-12-27 12:07:30", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -9421,7 +9965,8 @@ "latest_commit": "2023-11-02 20:04:07", "languages": [], "model_or_dataset": "model", - "model_size": 9.1 + "model_size": 9.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-LLaMA-3-8B-Instruct-v2-GGUF Japanese-LLaMA-3-8B-Instruct-v2-GGUFはJapanese-LLaMA-3-8B-Instruct-v2のGGUF形式です。 ", @@ -9434,7 +9979,8 @@ "latest_commit": "2024-06-21 06:35:03", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "Danbooru2023:", @@ -9447,7 +9993,8 @@ "latest_commit": "2024-05-22 18:43:24", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "ShareGPT-Processed The RyokoAI/ShareGPT52K dataset, converted to Markdown and labeled with the language used.", @@ -9460,7 +10007,8 @@ "latest_commit": "2023-05-21 03:50:14", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "This dataset is a clarified version of the image, context, and question set included in the Japanese-Heron-Bench for the construction of the Japanese evaluation benchmark suite.", @@ -9473,7 +10021,8 @@ "latest_commit": "2024-07-28 12:33:15", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "cosmopedia-100k のindex 20k ~ 100k を日本語に自動翻訳したデータになります(テキストが長すぎて翻訳エラーになったレコードは除外しています)。", @@ -9486,7 +10035,8 @@ "latest_commit": "2024-03-05 23:30:38", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "ELECTRA base Japanese discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", @@ -9499,7 +10049,8 @@ "latest_commit": "2022-12-09 00:43:19", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "ElectraForPreTraining" }, { "description": "Kokuwa lamettaの改良でマージさせるモデル探しをしていたらKiwiMixという面白そうなモデルを見つけました。 ", @@ -9512,7 +10063,8 @@ "latest_commit": "2023-10-26 04:22:46", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Wikipedia日本語版データセット(izumi-lab/wikipedia-ja-20230720)", @@ -9525,7 +10077,8 @@ "latest_commit": "2023-11-10 22:46:29", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "nlp-waseda/roberta-large-japanese-with-auto-jumanpp Model description", @@ -9538,7 +10091,8 @@ "latest_commit": "2022-10-21 15:55:27", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000 Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", @@ -9551,7 +10105,8 @@ "latest_commit": "2023-03-08 01:47:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "Ninja-v1-RP-expressive GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", @@ -9564,7 +10119,8 @@ "latest_commit": "2024-05-24 15:11:43", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "karasu-lora-jp-qa-chat karasu fine tuned model by lora method with the original Q&A dataset.", @@ -9577,7 +10133,8 @@ "latest_commit": "2024-06-03 01:02:33", "languages": [], "model_or_dataset": "model", - "model_size": 1.1 + "model_size": 1.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "bert-base-japanese-v3-bpr-passage-aio 「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRのパッセージエンコーダです。 ", @@ -9590,7 +10147,8 @@ "latest_commit": "2023-07-24 07:14:59", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertModel" }, { "description": "rinna/japanese-data2vec-audio-base Overview This is a Japanese data2vec Audio Base model trained by rinna Co.", @@ -9603,7 +10161,8 @@ "latest_commit": "2024-07-22 08:12:56", "languages": [], "model_or_dataset": "model", - "model_size": 0.0932 + "model_size": 0.0932, + "model_architectures": "Data2VecAudioModel" }, { "description": "mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-4bit", @@ -9616,7 +10175,8 @@ "latest_commit": "2024-07-26 21:37:02", "languages": [], "model_or_dataset": "model", - "model_size": 11.0 + "model_size": 11.0, + "model_architectures": "LlamaForCausalLM" }, { "description": "Google's mt5-base fine-tuned in Japanese to solve error detection and correction task. ", @@ -9629,7 +10189,8 @@ "latest_commit": "2022-05-26 13:50:56", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MT5ForConditionalGeneration" }, { "description": "J-ResearchCorpus Update: 2024/3/16言語処理学会第30回年次大会(NLP2024)を含む、論文 1,343 本のデータを追加 2024/2/25言語処理学会誌「自然言語処理」のうち CC-BY-4.0 で公開されている論文 360 本のデータを追加 概要 CC-BY-* ライセンスで公開されている日本語論文や学会誌等から抜粋した高品質なテキストのデータセットです。", @@ -9642,7 +10203,8 @@ "latest_commit": "2024-03-16 07:55:08", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "japanese-gpt-1b-PII-masking Model Description japanese-gpt-1b-PII-masking は、 日本語事前学習済み1B GPTモデルをベースとして、日本語の文章から個人情報をマスキングするように学習したモデルです。 ", @@ -9655,7 +10217,8 @@ "latest_commit": "2024-05-17 11:42:00", "languages": [], "model_or_dataset": "model", - "model_size": 1.3 + "model_size": 1.3, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Ninja-v1-RP GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", @@ -9668,7 +10231,8 @@ "latest_commit": "2024-05-24 15:10:41", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "This is my conversion of NilanE/ParallelFiction-Ja_En-100k into json which can be read by text-generation-webui when training a model.", @@ -9681,7 +10245,8 @@ "latest_commit": "2024-04-02 04:46:10", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "llm-japanese-dataset-vanilla LLM構築用の日本語チャットデータセット izumi-lab/llm-japanese-dataset から,日英翻訳のデータセット等を抜いたものです. ", @@ -9694,7 +10259,8 @@ "latest_commit": "2024-02-17 16:17:18", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Model Trained Using AutoNLP Problem type: Binary Classification Model ID: 59362 Validation Metrics Loss: 0.13092292845249176 Accuracy: 0.9527127414314258 Precision: 0.9634070704982427 Recall: 0.9842171959602166 AUC: 0.9667289746092403 F1: 0.9737009564152002 Usage You can use cURL to access this model: $ curl -X POST -H \"Authorization: Bearer YOUR_API_KEY\" -H \"Content-Type: application/json\" -d '{\"inputs\": \"I love AutoNLP\"}' https://api-inference.huggingface.co/models/abhishek/autonlp-japanese-sentiment-5936", @@ -9707,7 +10273,8 @@ "latest_commit": "2021-05-18 22:55:03", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "jpn-msa source group: Japanese target group: Malay (macrolanguage) OPUS readme: jpn-msa model: transformer-align source language(s): jpn jpn_Hani jpn_Hira jpn_Kana target language(s): ind", @@ -9720,7 +10287,8 @@ "latest_commit": "2023-08-16 11:59:16", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MarianMTModel" }, { "description": "English - Japanese pairs taken from https://tatoeba.org/en/downloads and then deduplicated.", @@ -9733,7 +10301,8 @@ "latest_commit": "2024-03-06 08:34:02", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Heron BLIP Japanese StableLM", @@ -9746,7 +10315,8 @@ "latest_commit": "2023-09-07 16:59:14", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "VideoBlipForConditionalGeneration" }, { "description": "Model trained on 800,000 Japanese sentences after reducing oshizo/japanese-e5-mistral-7b_slerp to 8 layers.", @@ -9759,7 +10329,8 @@ "latest_commit": "2024-02-03 00:28:28", "languages": [], "model_or_dataset": "model", - "model_size": 1.88 + "model_size": 1.88, + "model_architectures": "MistralForEmbedding" }, { "description": "JSNLI Version 1.1 のデータセットのうち、フィルタリング後の訓練セット (train_w_filtering)", @@ -9772,7 +10343,8 @@ "latest_commit": "2023-10-25 15:22:46", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "cyberagent/calm2-7b-chatの出力を人手でチェック・修正することで作成した日本語Instructionデータセットです。 ", @@ -9785,7 +10357,8 @@ "latest_commit": "2024-04-25 12:49:28", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "This is a little bit different version of kunishou/hh-rlhf-49k-ja without ng_translation == 1 examples.", @@ -9798,7 +10371,8 @@ "latest_commit": "2023-05-28 06:08:04", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8k 概要 gpt-4o-miniを用いて作成した、約19800件の日本語ロールプレイの対話を収録した合成データセットです。", @@ -9811,7 +10385,8 @@ "latest_commit": "2024-08-16 16:45:26", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Model Card for Model ID このモデルはrinna/japanese-gpt-1bをベースモデルとして、 コンテキストからの抽出型QAと、解答を新たなコンテキストでリファインするための学習を行ったモデルです。 ", @@ -9824,7 +10399,8 @@ "latest_commit": "2023-01-19 10:14:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "japanese-sexual-moderation-v2は、studio-ousia/luke-japanese-large-liteをファインチューニングしたモデルです。", @@ -9837,7 +10413,8 @@ "latest_commit": "2024-01-03 07:09:05", "languages": [], "model_or_dataset": "model", - "model_size": 0.41400000000000003 + "model_size": 0.41400000000000003, + "model_architectures": "LukeForSequenceClassification" }, { "description": "Llama-3-8B-Instruct-JP-nk2t-v0.3 Model Details: Built with Meta Llama 3 llama-3-8bの日本語継続学習モデルにChatVectorを適用し、さらにQLoraでファインチューニングしたモデルです。 ", @@ -9850,7 +10427,8 @@ "latest_commit": "2024-05-22 11:02:28", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Mixtral-8x7B-v0.1-japanese Mixtral-8x7B-v0.1-japaneseはMixtral-8x7B-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", @@ -9863,7 +10441,8 @@ "latest_commit": "2024-04-20 09:14:10", "languages": [], "model_or_dataset": "model", - "model_size": 46.9 + "model_size": 46.9, + "model_architectures": "MixtralForCausalLM" }, { "description": "rinna/nekomata-14b-gguf Overview The model is the GGUF version of rinna/nekomata-14b.", @@ -9876,7 +10455,8 @@ "latest_commit": "2024-07-20 08:29:58", "languages": [], "model_or_dataset": "model", - "model_size": 14.2 + "model_size": 14.2, + "model_architectures": null }, { "description": "electra-base-cyberbullying This is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", @@ -9889,7 +10469,8 @@ "latest_commit": "2022-11-01 07:20:52", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "Synthetic-JP-EN-Translation-Dataset-Magpie-Nemotron-4-20k Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、20000件の日⇔英翻訳データセットです。 ", @@ -9902,7 +10483,8 @@ "latest_commit": "2024-07-07 11:13:47", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese Mixtral-8x7B-Instruct-v0.1-japaneseはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", @@ -9915,7 +10497,8 @@ "latest_commit": "2024-04-20 09:14:27", "languages": [], "model_or_dataset": "model", - "model_size": 46.9 + "model_size": 46.9, + "model_architectures": "MixtralForCausalLM" }, { "description": "モデルの説明(English explanation is below.", @@ -9928,7 +10511,8 @@ "latest_commit": "2024-06-11 07:39:45", "languages": [], "model_or_dataset": "model", - "model_size": 13.7 + "model_size": 13.7, + "model_architectures": "MixtralForCausalLM" }, { "description": "Japanese Stable Diffusion Pokemon Model Card Stable-Diffusion-Pokemon-ja is a Japanese-specific latent text-to-image diffusion model capable of generating Pokemon images given any text input.", @@ -9941,7 +10525,8 @@ "latest_commit": "2023-05-16 09:23:49", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "mqaデータセットのquery--passageのペアについて重複を削除したデータセットです。 ", @@ -9954,7 +10539,8 @@ "latest_commit": "2024-04-07 15:16:42", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "日本語T5 Prefix Language Model", @@ -9967,7 +10553,8 @@ "latest_commit": "2022-11-05 09:34:10", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", @@ -9980,7 +10567,8 @@ "latest_commit": "2024-04-16 22:32:15", "languages": [], "model_or_dataset": "model", - "model_size": 6.95 + "model_size": 6.95, + "model_architectures": "LlamaForCausalLM" }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged Mixtral-8x7B-Instruct-v0.1-japanese-alpha-mergedはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施した学習途中のモデルに対して、差分マージを実施したモデルです。", @@ -9993,7 +10581,8 @@ "latest_commit": "2024-04-20 09:14:59", "languages": [], "model_or_dataset": "model", - "model_size": 46.9 + "model_size": 46.9, + "model_architectures": "MixtralForCausalLM" }, { "description": "This is a Japanese sentence-T5 model.", @@ -10006,7 +10595,8 @@ "latest_commit": "2022-07-31 07:54:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5Model" }, { "description": "JaWiki WikipediaのHTML形式のダンプファイルから抽出したテキストデータセットです。 ", @@ -10019,7 +10609,8 @@ "latest_commit": "2024-02-13 15:19:49", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Ninja-v1-RP-expressive-breadcrumbs GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", @@ -10032,7 +10623,8 @@ "latest_commit": "2024-06-01 11:54:18", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "zenz-v2 zenz-v2はGPT-2アーキテクチャに基づくかな漢字変換タスクに特化した言語モデルです。", @@ -10045,7 +10637,8 @@ "latest_commit": "2024-08-04 09:35:48", "languages": [], "model_or_dataset": "model", - "model_size": 0.09509999999999999 + "model_size": 0.09509999999999999, + "model_architectures": null }, { "description": "rinna/nekomata-7b-instruction-gguf Overview The model is the GGUF version of rinna/nekomata-7b-instruction.", @@ -10058,7 +10651,8 @@ "latest_commit": "2024-07-20 08:38:34", "languages": [], "model_or_dataset": "model", - "model_size": 7.72 + "model_size": 7.72, + "model_architectures": null }, { "description": "Genji-JP 6B Please check our blog post for more details, samples, evaluations and more: Blogpost Model Description Genji-JP 6B is a model finetuned on our Japanese storytelling dataset based on EleutherAI's GPT-J 6B model.", @@ -10071,7 +10665,8 @@ "latest_commit": "2022-08-09 17:36:02", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTJForCausalLM" }, { "description": "日本語医療固有表現抽出モデル 概要 ソーシャル・コンピューティング研究室さまより公開されているMedTxt-CRを用いて、alabniiさまより公開されているRoBERTaをfine-tuningした固有表現抽出モデルです。 ", @@ -10084,7 +10679,8 @@ "latest_commit": "2023-02-15 13:43:48", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "Heron GIT Japanese ELYZA Llama 2 Fast 7B Model Details Heron GIT Japanese ELYZA Llama 2 Fast 7B is a vision-language model that can converse about input images.", @@ -10097,7 +10693,8 @@ "latest_commit": "2023-09-11 16:56:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GitLlamaForCausalLM" }, { "description": "About This model is Lightblue's QLoRA finetune of OpenOrca's Open-Orca/OpenOrcaxOpenChat-Preview2-13B model on Japanese fine-tuning datasets.", @@ -10110,7 +10707,8 @@ "latest_commit": "2023-10-02 10:25:36", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "nlp-waseda/gpt2-xl-japanese This is Japanese GPT2 with approximately 1.5B parameters pretrained on Japanese Wikipedia and CC-100", @@ -10123,7 +10721,8 @@ "latest_commit": "2023-06-21 04:29:10", "languages": [], "model_or_dataset": "model", - "model_size": 1.61 + "model_size": 1.61, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Dataset Details Dataset Type:Japanese LLaVA v1.5", @@ -10136,7 +10735,8 @@ "latest_commit": "2024-04-12 09:18:42", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "GPT-Neo 1.3B pre-trained model for Japanese Model Description GPT2/GPT3 like model trained on Japanese.corpus.", @@ -10149,7 +10749,8 @@ "latest_commit": "2021-12-09 17:59:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoForCausalLM" }, { "description": "Model card for model ID", @@ -10162,7 +10763,8 @@ "latest_commit": "2023-05-10 10:01:04", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "このモデルはcl-tohoku/bert-large-japanese-v2をファインチューニングして、固有表現抽出(NER)に用いれるようにしたものです。 ", @@ -10175,7 +10777,8 @@ "latest_commit": "2023-07-21 14:10:18", "languages": [], "model_or_dataset": "model", - "model_size": 0.336 + "model_size": 0.336, + "model_architectures": "BertForTokenClassification" }, { "description": "t5-base-xlsum-ja", @@ -10188,7 +10791,8 @@ "latest_commit": "2023-11-20 09:25:16", "languages": [], "model_or_dataset": "model", - "model_size": 0.248 + "model_size": 0.248, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -10201,7 +10805,8 @@ "latest_commit": "2023-11-09 18:16:05", "languages": [], "model_or_dataset": "model", - "model_size": 9.68 + "model_size": 9.68, + "model_architectures": "LlamaForCausalLM" }, { "description": "rinna/nekomata-7b-gguf Overview The model is the GGUF version of rinna/nekomata-7b.", @@ -10214,7 +10819,8 @@ "latest_commit": "2024-07-20 08:36:15", "languages": [], "model_or_dataset": "model", - "model_size": 7.72 + "model_size": 7.72, + "model_architectures": null }, { "description": "モデル説明 (model explanation) V1 = MoeDiffusion 1.0 + (HassanBlend 1.5 - VMix03) * 0.2 V2 = MoeDiffusion 0.6 : HassanBlend 1.5 0.2 : VMix03 : 0.2 マージ元のルーツにNAIリークやInsta系モデルが含まれるという噂があるので、NAIリークアンチ・Insta系モデルアンチには非推奨 理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。 ", @@ -10227,7 +10833,8 @@ "latest_commit": "2023-01-21 02:05:54", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "electra-base-cyberbullying This is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", @@ -10240,7 +10847,8 @@ "latest_commit": "2022-11-01 07:18:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "yacis-electra-small-cyberbullying", @@ -10253,7 +10861,8 @@ "latest_commit": "2022-01-16 13:51:28", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "ElectraForSequenceClassification" }, { "description": "Wav2Vec2-Large-XLSR-53-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.", @@ -10266,7 +10875,8 @@ "latest_commit": "2023-02-08 00:15:23", "languages": [], "model_or_dataset": "model", - "model_size": 0.318 + "model_size": 0.318, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "GPT2 Japanese base model version 2 Prerequisites transformers==4.19.2 Model architecture This model uses GPT2 base setttings except vocabulary size.", @@ -10279,7 +10889,8 @@ "latest_commit": "2022-06-25 15:36:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "英語+日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on English and Japanese balanced corpus. ", @@ -10292,7 +10903,8 @@ "latest_commit": "2022-08-27 09:07:53", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-base Fine-tuned openai/whisper-base on Japanese using Common Voice, JVS and JSUT.", @@ -10305,7 +10917,8 @@ "latest_commit": "2023-06-08 00:17:50", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "Convert from: drewschaub/whisper-large-v3-japanese-4k-steps Whisper large-v3 model for CTranslate2 This repository contains the conversion of drewschaub/whisper-large-v3-japanese-4k-steps to the CTranslate2 model format.", @@ -10318,7 +10931,8 @@ "latest_commit": "2024-02-22 01:11:59", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "BERT for Sentiment Analysis of Japanese Twitter", @@ -10331,7 +10945,8 @@ "latest_commit": "2024-08-09 12:03:25", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "This model is a merged version of qwen-14b-vntl and Qwen1.5-14B-Chat , aiming for the translation of Japanese context into Chinese.", @@ -10344,7 +10959,8 @@ "latest_commit": "2024-03-03 03:17:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "Qwen2ForCausalLM" }, { "description": "Deepreneur-blue-lizard Model Description Deepreneur-blue-lizardは、MetaのLlama-2-7bに対して、Wikipediaや書籍等の日本語の学習データを用いて追加事前学習と独自データによるファインチューニングを実施したモデルです。", @@ -10357,7 +10973,8 @@ "latest_commit": "2024-02-12 14:43:33", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -10370,7 +10987,8 @@ "latest_commit": "2023-11-09 18:16:16", "languages": [], "model_or_dataset": "model", - "model_size": 9.68 + "model_size": 9.68, + "model_architectures": "LlamaForCausalLM" }, { "description": "日本語でtrainingしたllama2 model size: 417.12M trainingは以下のscript参照https://github.com/Lightning-AI/lit-gpt/tree/main use from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"if001/sentencepiece_ja\", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(\"if001/llama2_ja_small\")", @@ -10383,7 +11001,8 @@ "latest_commit": "2023-10-14 13:50:54", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "luke-large-defamation-detection-japanese 日本語誹謗中傷検出器", @@ -10396,7 +11015,8 @@ "latest_commit": "2023-02-07 15:49:33", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LukeForSequenceClassification" }, { "description": "deberta-large-japanese-wikipedia Model Description", @@ -10409,7 +11029,8 @@ "latest_commit": "2023-02-27 10:15:35", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "nlp-waseda/gpt2-small-japanese This model is Japanese GPT-2 pretrained on Japanese Wikipedia and CC-100.", @@ -10422,7 +11043,8 @@ "latest_commit": "2022-03-30 04:28:17", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "alpaca_jp_python alpaca_jp_pythonは、 Stanford Alpacaの手法 mistralai/Mixtral-8x22B-Instruct-v0.1 で作った合成データ(Synthetic data)です。", @@ -10435,7 +11057,8 @@ "latest_commit": "2024-05-20 01:44:32", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、MARC-ja(positive or negativeの二値分類)に用いれるようにしたものです。 ", @@ -10448,7 +11071,8 @@ "latest_commit": "2023-07-21 14:10:48", "languages": [], "model_or_dataset": "model", - "model_size": 0.279 + "model_size": 0.279, + "model_architectures": "LukeForSequenceClassification" }, { "description": "Model card for model ID", @@ -10461,7 +11085,8 @@ "latest_commit": "2023-05-10 10:00:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "HuggingFaceFW/fineweb-edu-classifierを再現するために、日本語データでtohoku-nlp/bert-base-japanese-v3を学習したモデルです。 ", @@ -10474,7 +11099,8 @@ "latest_commit": "2024-06-14 13:28:08", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "rinna/nekomata-14b-instruction-gguf Overview The model is the GGUF version of rinna/nekomata-14b-instruction.", @@ -10487,7 +11113,8 @@ "latest_commit": "2024-07-20 08:34:05", "languages": [], "model_or_dataset": "model", - "model_size": 14.2 + "model_size": 14.2, + "model_architectures": null }, { "description": "NLLB-200 1.3B fine-tuned on Ascendance of a Bookworm", @@ -10500,7 +11127,8 @@ "latest_commit": "2024-04-14 18:45:22", "languages": [], "model_or_dataset": "model", - "model_size": 1.37 + "model_size": 1.37, + "model_architectures": "M2M100ForConditionalGeneration" }, { "description": "記事本文からタイトルを生成するモデル SEE: https://qiita.com/sonoisa/items/30876467ad5a8a81821f", @@ -10513,7 +11141,8 @@ "latest_commit": "2022-02-21 13:39:01", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Kendamarron/jimba-wiki-instruction-calm3 grapevine-AI/CALM3-22B-Chat-GGUFのQ4_K_Mを��った合成instructionデータセットです。 ", @@ -10526,7 +11155,8 @@ "latest_commit": "2024-07-20 12:57:05", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "This pre-trained model is work in progress!", @@ -10539,7 +11169,8 @@ "latest_commit": "2021-11-10 15:28:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTJForCausalLM" }, { "description": "deberta-base-japanese-wikipedia Model Description", @@ -10552,7 +11183,8 @@ "latest_commit": "2023-01-27 17:51:51", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "deberta-large-japanese-unidic-ud-head Model Description", @@ -10565,7 +11197,8 @@ "latest_commit": "2023-11-05 17:51:08", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "Donut (base-sized model, fine-tuned on visual novel like synthetic dataset ) ビジュアルノベル風画像の合成データセットでnaver-clova-ix/donut-baseを訓練したモデルです。 ", @@ -10578,7 +11211,8 @@ "latest_commit": "2023-05-03 09:25:19", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "VisionEncoderDecoderModel" }, { "description": "JAINU-Model (T5 fine-tuned model) JAINU is a Japanese - Ainu language machine translation model. ", @@ -10591,7 +11225,8 @@ "latest_commit": "2022-05-22 05:51:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Orion-14B 🌐English | 🇨", @@ -10604,7 +11239,8 @@ "latest_commit": "2024-03-26 09:55:37", "languages": [], "model_or_dataset": "model", - "model_size": 2.69 + "model_size": 2.69, + "model_architectures": "OrionForCausalLM" }, { "description": "Chat-Vector-LLaVA-v1.5-7b-JA Model Card Model detail Model type: Chat-Vector-LLaVA-v1.5-7b-JA is a vision-language model that can converse about input images in Japanese.", @@ -10617,7 +11253,8 @@ "latest_commit": "2024-05-06 11:33:32", "languages": [], "model_or_dataset": "model", - "model_size": 7.06 + "model_size": 7.06, + "model_architectures": "LlavaLlamaForCausalLM" }, { "description": "[Under Construction]", @@ -10630,7 +11267,8 @@ "latest_commit": "2023-12-24 18:52:04", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "This is for (private) DEMO only.", @@ -10643,7 +11281,8 @@ "latest_commit": "2023-10-19 01:31:17", "languages": [], "model_or_dataset": "model", - "model_size": 0.316 + "model_size": 0.316, + "model_architectures": "HubertForSequenceClassification" }, { "description": "Japanese GPT2 Lyric Model Model description", @@ -10656,7 +11295,8 @@ "latest_commit": "2023-10-21 14:53:57", "languages": [], "model_or_dataset": "model", - "model_size": 0.361 + "model_size": 0.361, + "model_architectures": "GPT2LMHeadModel" }, { "description": "COMET-T5 ja Finetuned T5 on ATOMIC ja using a text-to-text language modeling objective.", @@ -10669,7 +11309,8 @@ "latest_commit": "2023-02-08 09:26:55", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", @@ -10682,7 +11323,8 @@ "latest_commit": "2023-10-28 15:16:25", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -10695,7 +11337,8 @@ "latest_commit": "2023-10-28 20:24:40", "languages": [], "model_or_dataset": "model", - "model_size": 1.2 + "model_size": 1.2, + "model_architectures": "MistralForCausalLM" }, { "description": "Japanese DialoGPT trained with Aozora (ja) 青��文庫のセリフで学習した日本語のDialoGPT Smallです(en) Japanese DialoGPT Small trained on Aozora Bunko.", @@ -10708,7 +11351,8 @@ "latest_commit": "2023-02-09 00:55:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Summary This is a text classifier for assigning a JLPT level.", @@ -10721,7 +11365,8 @@ "latest_commit": "2024-07-10 13:41:08", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "BERT large Japanese (character-level tokenization with whole word masking, jawiki-20200831)", @@ -10734,7 +11379,8 @@ "latest_commit": "2021-09-23 15:45:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "yuyuyui-chatbot", @@ -10747,7 +11393,8 @@ "latest_commit": "2021-05-23 13:27:10", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Fine-tuned Japanese Wav2Vec2 model for speech recognition using XLSR-53 large Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using Common Voice, JVS and JSUT.", @@ -10760,7 +11407,8 @@ "latest_commit": "2023-05-12 02:15:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Aerner LM-v1 事前学習から全部日本語で学習させたモデルです。 ", @@ -10773,7 +11421,8 @@ "latest_commit": "2023-05-25 13:35:34", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Heron GIT Japanese StableLM", @@ -10786,7 +11435,8 @@ "latest_commit": "2023-09-11 16:55:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GitJapaneseStableLMAlphaForCausalLM" }, { "description": "llm-jp-13b-instruct-lora-jaster-v1.0", @@ -10799,7 +11449,8 @@ "latest_commit": "2023-10-20 08:41:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", @@ -10812,7 +11463,8 @@ "latest_commit": "2023-12-07 18:54:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "youhansun/Llama-3-70B-japanese-suzume-vector-v0.1-Q2_K-GGUF", @@ -10825,7 +11477,8 @@ "latest_commit": "2024-06-02 04:52:45", "languages": [], "model_or_dataset": "model", - "model_size": 70.6 + "model_size": 70.6, + "model_architectures": null }, { "description": "SpiralAI Spiral-RetNet-3b-base We have conducted pre-training from scratch on the RetNet (https://arxiv.org/abs/2307.08621)", @@ -10838,7 +11491,8 @@ "latest_commit": "2024-05-01 04:54:26", "languages": [], "model_or_dataset": "model", - "model_size": 2.86 + "model_size": 2.86, + "model_architectures": "RetNetForCausalLM" }, { "description": "モデルの概略 東方Projectのキャラクターである霧雨魔理沙とおしゃべりできるモデルです。 ", @@ -10851,7 +11505,8 @@ "latest_commit": "2023-09-06 18:42:50", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTJForCausalLM" }, { "description": "データセットについて オープンソースLLMの出力を人手でチェック・修正したinstructionにSwallow-MXでoutputを生成したデータセットです。 ", @@ -10864,7 +11519,8 @@ "latest_commit": "2024-04-01 04:30:44", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for Ninja-v1.0 The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 Ninja has the following changes compared to Mistral-7B-v0.1.", @@ -10877,7 +11533,8 @@ "latest_commit": "2024-05-04 04:07:09", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", @@ -10890,7 +11547,8 @@ "latest_commit": "2024-03-07 06:51:42", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", @@ -10903,7 +11561,8 @@ "latest_commit": "2021-09-23 18:29:58", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": ", 2023) was trained on.", @@ -10916,7 +11575,8 @@ "latest_commit": "2023-07-08 13:39:45", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "NLLB 1.3B fine-tuned on Japanese to English Light Novel translation This model was fine-tuned on light and web novel for Japanese to English translation.", @@ -10929,7 +11589,8 @@ "latest_commit": "2023-06-04 13:38:43", "languages": [], "model_or_dataset": "model", - "model_size": 1.37 + "model_size": 1.37, + "model_architectures": "M2M100ForConditionalGeneration" }, { "description": "このモデルはdeberta-v2-large-japaneseをファインチューニングして固有表現抽出(NER)に用いれるようにしたものです。 ", @@ -10942,7 +11603,8 @@ "latest_commit": "2023-07-21 14:10:02", "languages": [], "model_or_dataset": "model", - "model_size": 0.339 + "model_size": 0.339, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "MPT-7B-base このモデルは、MosaicMLのllm-foundryリポジトリを使用してmosaicml/mpt-7bをファインチューニングしたモデルです。 ", @@ -10955,7 +11617,8 @@ "latest_commit": "2023-06-26 01:08:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MPTForCausalLM" }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", @@ -10968,7 +11631,8 @@ "latest_commit": "2023-12-07 18:54:33", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "Model Card for Model ID Fine tunned ASR model from distil-whisper/distil-large-v2.", @@ -10981,7 +11645,8 @@ "latest_commit": "2024-08-12 12:39:52", "languages": [], "model_or_dataset": "model", - "model_size": 0.756 + "model_size": 0.756, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "NikolayKozloff/h2o-Llama-3-8B-Japanese-Instruct-Q8_0-GGUF", @@ -10994,7 +11659,8 @@ "latest_commit": "2024-06-24 13:28:33", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": null }, { "description": "BERT for Sentiment Analysis of Japanese Twitter", @@ -11007,7 +11673,8 @@ "latest_commit": "2024-08-09 12:10:35", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -11020,7 +11687,8 @@ "latest_commit": "2023-11-03 12:54:41", "languages": [], "model_or_dataset": "model", - "model_size": 1.13 + "model_size": 1.13, + "model_architectures": "LlamaForCausalLM" }, { "description": "This model is traned with llm-japanese-dataset dataset.", @@ -11033,7 +11701,8 @@ "latest_commit": "2023-08-17 16:51:41", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-small Fine-tuned openai/whisper-small on Japanese using Common Voice, JVS and JSUT.", @@ -11046,7 +11715,8 @@ "latest_commit": "2023-05-19 10:50:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "WhisperForConditionalGeneration" }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングして固有表現抽出(NER)に用いれるようにしたものです。 ", @@ -11059,7 +11729,8 @@ "latest_commit": "2023-03-27 08:05:06", "languages": [], "model_or_dataset": "model", - "model_size": 0.112 + "model_size": 0.112, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "japanese-soseki-gpt2-1b", @@ -11072,7 +11743,8 @@ "latest_commit": "2023-03-27 12:09:04", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Model Overview: 日本語で質問すると、日本語で回答を得られます。", @@ -11085,7 +11757,8 @@ "latest_commit": "2024-08-21 15:31:43", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "データセット概要 手動で作成したDatabricksに関する質問と回答ペアの日本語データセットです。 ", @@ -11098,7 +11771,8 @@ "latest_commit": "2023-05-15 14:55:06", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "friendly_JA-Model (T5 fine-tuned model) MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon Examples input output 最適化を応用した機械翻訳モデルは高精度だ オプティマイゼーションを応用したマシントランスレーションモデルは高いアキュラシーだ 彼は架空の世界に住んでいる 彼はイマジナリー世界に住んでいる 新型コロナウイルスに感染してしまった コロナウイルスにかかってしまった 深層学習は難しい ディープラーニングはむずかしい 新たな概念を紹介する 新しいコンセプトを紹介する 津波の警報が流れた ツナミのアラートが流れた 南海トラフの災害は震源地による 南海トラフのディザスターはエピ", @@ -11111,7 +11785,8 @@ "latest_commit": "2022-05-22 14:57:21", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "yacis-electra-small", @@ -11124,7 +11799,8 @@ "latest_commit": "2022-01-13 01:43:17", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -11137,7 +11813,8 @@ "latest_commit": "2023-11-06 16:00:08", "languages": [], "model_or_dataset": "model", - "model_size": 9.1 + "model_size": 9.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "This is a BERT Base model for emotion analysis in Japanese additionally fine-tuned for emotion detection and classification.", @@ -11150,7 +11827,8 @@ "latest_commit": "2024-06-17 01:44:16", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForSequenceClassification" }, { "description": "モデルについて Qwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したモデルです。 ", @@ -11163,7 +11841,8 @@ "latest_commit": "2024-05-08 12:44:05", "languages": [], "model_or_dataset": "model", - "model_size": 0.464 + "model_size": 0.464, + "model_architectures": "Qwen2ForCausalLM" }, { "description": "Wav2Vec2-XLS-R-300M-Japanese-Hiragana Fine-tuned facebook/wav2vec2-xls-r-300m on Japanese Hiragana characters using the Common Voice and JSUT.", @@ -11176,7 +11855,8 @@ "latest_commit": "2022-09-16 11:01:54", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "GitHub リポジトリ singletongue/wikipedia-utils で公開されているデータセットを利用しています。 ", @@ -11189,7 +11869,8 @@ "latest_commit": "2023-06-03 03:04:43", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "BERT base Japanese model This repository contains a BERT base model trained on Japanese Wikipedia dataset.", @@ -11202,7 +11883,8 @@ "latest_commit": "2021-09-23 15:46:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForPreTraining" }, { "description": "ELECTRA small Japanese finance generator This is a ELECTRA model pretrained on texts in the Japanese language.", @@ -11215,7 +11897,8 @@ "latest_commit": "2023-10-21 13:21:24", "languages": [], "model_or_dataset": "model", - "model_size": 0.00491 + "model_size": 0.00491, + "model_architectures": "ElectraForMaskedLM" }, { "description": "シサム語による説明 アイヌ語と日本語の双方向機械翻訳モデルです。 ", @@ -11228,7 +11911,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "transformer-lm-japanese-0.1b", @@ -11241,7 +11925,8 @@ "latest_commit": "2024-06-03 06:17:19", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "TransformerLMForCausalLM" }, { "description": "(English part follows Japanese one.", @@ -11254,7 +11939,8 @@ "latest_commit": "2023-11-06 05:37:01", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "zenz-v1 Checkpoints zenz-v1 is a language model specialized for kana-kanji conversion tasks based on the GPT-2 architecture.", @@ -11267,7 +11953,8 @@ "latest_commit": "2024-06-28 14:53:43", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "gpt2-small-japanese-upos Model Description", @@ -11280,7 +11967,8 @@ "latest_commit": "2024-07-27 07:49:34", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2ForTokenClassification" }, { "description": "llm-jp-1.3b-v1.0-aya llm-jp's llm-jp-1.3b-v1.0 model fine-tuned on the Japanese examples from Cohere's aya dataset Model llm-jp-eval AVG kcoopermiller/llm-jp-1.3b-v1.0-aya 0.0698 llm-jp/llm-jp-1.3b-v1.0 0.047 How to use import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"kcoopermiller/llm-jp-1.3b-v1.0-aya\")", @@ -11293,7 +11981,8 @@ "latest_commit": "2024-02-29 23:48:58", "languages": [], "model_or_dataset": "model", - "model_size": 1.32 + "model_size": 1.32, + "model_architectures": "GPT2LMHeadModel" }, { "description": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 is a merge of the following models: mistralai/Mistral-7B-Instruct-v0.1 stabilityai/japanese-stablelm-base-gamma-7b 🧩 Configuration slices: - sources: - model: mistralai/Mistral-7B-Instruct-v0.1 layer_range:", @@ -11306,7 +11995,8 @@ "latest_commit": "2024-01-17 04:46:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 is a merge of the following models: mistralai/Mistral-7B-Instruct-v0.1 stabilityai/japanese-stablelm-instruct-gamma-7b 🧩 Configuration slices: - sources: - model: mistralai/Mistral-7B-Instruct-v0.1 layer_range:", @@ -11319,7 +12009,8 @@ "latest_commit": "2024-01-16 12:27:54", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "JPNsensei-V2 Model Application", @@ -11332,7 +12023,8 @@ "latest_commit": "2024-03-11 10:19:14", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "こちらでアップロードできないので、civitaiにて先に公開しています。 ", @@ -11345,7 +12037,8 @@ "latest_commit": "2023-10-01 18:12:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "whisper-large-v2-jp model for CTranslate2 This repository contains the conversion of vumichien/whisper-large-v2-jp to the CTranslate2 model format.", @@ -11358,7 +12051,8 @@ "latest_commit": "2023-07-07 18:09:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "nagisa_bert A BERT model for nagisa.", @@ -11371,7 +12065,8 @@ "latest_commit": "2023-09-15 01:28:14", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForPreTraining" }, { "description": "bert-base-japanese-unidic-luw-upos Model Description", @@ -11384,7 +12079,8 @@ "latest_commit": "2023-11-05 18:44:10", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "ELECTRA base Japanese generator This is a ELECTRA model pretrained on texts in the Japanese language.", @@ -11397,7 +12093,8 @@ "latest_commit": "2023-10-21 13:21:16", "languages": [], "model_or_dataset": "model", - "model_size": 0.035500000000000004 + "model_size": 0.035500000000000004, + "model_architectures": "ElectraForMaskedLM" }, { "description": "nlp-waseda/gpt2-small-japanese-wikipedia This model is Japanese GPT-2 pretrained on Japanese Wikipedia.", @@ -11410,7 +12107,8 @@ "latest_commit": "2021-12-28 15:31:38", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "This model is traned with guanaco dataset.", @@ -11423,7 +12121,8 @@ "latest_commit": "2023-08-10 13:00:34", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "日本語でtrainingしたllama2をinstruction用のデータセットでsftしたものになります base: https://huggingface.co./if001/llama2_ja_small trainingは以下のscript参照 https://github.com/Lightning-AI/lit-gpt/tree/main use from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"if001/sentencepiece_ja\", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(\"if001/llama2_ja_small\")", @@ -11436,7 +12135,8 @@ "latest_commit": "2023-10-23 19:39:51", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", @@ -11449,7 +12149,8 @@ "latest_commit": "2023-12-07 18:54:26", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf ELYZA-japanese-Llama-2-13b-fast-instructの GGUF 変換モデルです。", @@ -11462,7 +12163,8 @@ "latest_commit": "2024-01-25 06:30:57", "languages": [], "model_or_dataset": "model", - "model_size": 13.1 + "model_size": 13.1, + "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1の量子化済みGGUF版です。", @@ -11475,7 +12177,8 @@ "latest_commit": "2024-03-07 13:47:58", "languages": [], "model_or_dataset": "model", - "model_size": 11.2 + "model_size": 11.2, + "model_architectures": null }, { "description": "Introduction Who am I: Qishen Ha", @@ -11488,7 +12191,8 @@ "latest_commit": "2024-05-02 03:36:10", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "This repository contains a model trained (QLoRA-SFT)", @@ -11501,7 +12205,8 @@ "latest_commit": "2024-05-31 11:28:45", "languages": [], "model_or_dataset": "model", - "model_size": 3.82 + "model_size": 3.82, + "model_architectures": null }, { "description": "gpt2-large-japanese-upos Model Description", @@ -11514,7 +12219,8 @@ "latest_commit": "2024-07-27 07:49:47", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2ForTokenClassification" }, { "description": "Introduction Who am I: Qishen Ha", @@ -11527,7 +12233,8 @@ "latest_commit": "2024-06-24 08:57:49", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "はじめに GoogleのGemma-2Bを日本語で使えるように継続事前学習を施した、商用利用可能なベースモデルです。 ", @@ -11540,7 +12247,8 @@ "latest_commit": "2024-03-17 15:05:20", "languages": [], "model_or_dataset": "model", - "model_size": 2.51 + "model_size": 2.51, + "model_architectures": "GemmaForCausalLM" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", @@ -11553,7 +12261,8 @@ "latest_commit": "2024-03-07 07:00:51", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", @@ -11566,7 +12275,8 @@ "latest_commit": "2024-03-07 06:57:49", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", @@ -11579,7 +12289,8 @@ "latest_commit": "2023-10-28 15:44:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", @@ -11592,7 +12303,8 @@ "latest_commit": "2023-10-28 15:37:11", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "Model Card for Model ID Original model elyza/ELYZA-japanese-Llama-2-7b-instruct which is based on Meta's \"Llama 2\" and has undergone additional pre-training in Japanese instruction.", @@ -11605,7 +12317,8 @@ "latest_commit": "2023-09-17 04:24:55", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "TakoMT", @@ -11618,7 +12331,8 @@ "latest_commit": "2023-08-15 17:32:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MarianMTModel" }, { "description": "日本語ByT5事前学習済みモデル This is a ByT5 (a tokenizer-free extension of the Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", @@ -11631,7 +12345,8 @@ "latest_commit": "2021-09-23 18:29:53", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MT5ForConditionGeneration" }, { "description": "It covers multiple fields such as tourism, medical treatment, daily life, news, etc. ", @@ -11644,7 +12359,8 @@ "latest_commit": "2024-08-05 03:14:27", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "固有表現ラベルはllm-book/ner-wikipedia-datasetと同様のものを採用しており、全部で8種類 (人名、法人名、地名、製品名、政治的組織名、施設名、その他の組織名、イベント名)あります。 ", @@ -11657,7 +12373,8 @@ "latest_commit": "2023-12-12 11:22:26", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "deberta-base-japanese-juman-ud-goeswith Model Description", @@ -11670,7 +12387,8 @@ "latest_commit": "2023-05-12 01:16:53", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングしてCommonsenseQA(選択式の質問)に用いれるようにしたものです。 ", @@ -11683,7 +12401,8 @@ "latest_commit": "2023-05-26 15:05:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMultipleChoice" }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", @@ -11696,7 +12415,8 @@ "latest_commit": "2023-10-28 15:23:16", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -11709,7 +12429,8 @@ "latest_commit": "2023-11-09 18:16:33", "languages": [], "model_or_dataset": "model", - "model_size": 1.2 + "model_size": 1.2, + "model_architectures": "MistralForCausalLM" }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", @@ -11722,7 +12443,8 @@ "latest_commit": "2023-12-07 18:58:23", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", @@ -11735,7 +12457,8 @@ "latest_commit": "2023-12-07 18:54:27", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", @@ -11748,7 +12471,8 @@ "latest_commit": "2024-03-07 06:55:27", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", @@ -11761,7 +12485,8 @@ "latest_commit": "2023-10-28 15:30:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "タイトルから記事本文を生成するモデル SEE: https://qiita.com/sonoisa/items/a9af64ff641f0bbfed44", @@ -11774,7 +12499,8 @@ "latest_commit": "2024-04-17 11:39:12", "languages": [], "model_or_dataset": "model", - "model_size": 0.223 + "model_size": 0.223, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "Japanese-Heron-Bench Dataset Description Japanese-Heron-Bench is a benchmark for evaluating Japanese VLMs (Vision-Language Models).", @@ -11787,7 +12513,8 @@ "latest_commit": "2024-04-12 08:59:36", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "deberta-large-japanese-wikipedia-luw-upos Model Description", @@ -11800,7 +12527,8 @@ "latest_commit": "2024-08-20 17:54:58", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "roberta-base-japanese-aozora Model Description", @@ -11813,7 +12541,8 @@ "latest_commit": "2022-10-15 14:20:11", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForMaskedLM" }, { "description": "bert-base-japanese-luw-upos Model Description", @@ -11826,7 +12555,8 @@ "latest_commit": "2022-09-18 19:43:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "deberta-large-japanese-upos Model Description", @@ -11839,7 +12569,8 @@ "latest_commit": "2024-07-26 16:00:59", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "japanese-gpt2-medium-unidic This is a medium-sized Japanese GPT-2 model using BERT-like tokenizer.", @@ -11852,7 +12583,8 @@ "latest_commit": "2023-03-22 06:22:32", "languages": [], "model_or_dataset": "model", - "model_size": 0.362 + "model_size": 0.362, + "model_architectures": "GPT2LMHeadModel" }, { "description": "Model Card for Model ID", @@ -11865,7 +12597,8 @@ "latest_commit": "2023-10-15 10:56:23", "languages": [], "model_or_dataset": "model", - "model_size": 0.08040000000000001 + "model_size": 0.08040000000000001, + "model_architectures": "MT5ForConditionalGeneration" }, { "description": "Model card for model ID", @@ -11878,7 +12611,8 @@ "latest_commit": "2023-05-10 10:00:45", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "T5ForConditionalGeneration" }, { "description": "このモデルはdeberta-v2-tiny-japaneseをファインチューニングしてCommonsenseQA(選択式の質問)に用いれるようにしたものです。 ", @@ -11891,7 +12625,8 @@ "latest_commit": "2023-05-26 15:01:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMultipleChoice" }, { "description": "Only for Japanese Please use AutoTokenizer and AutoModelForCausalLM And must use Unifine format to input and output. ", @@ -11904,7 +12639,8 @@ "latest_commit": "2023-11-17 16:44:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "COMET-GPT2 ja v2 Finetuned GPT-2 xl on the large version of ATOMIC ja using a causal language modeling (CLM) objective.", @@ -11917,7 +12653,8 @@ "latest_commit": "2024-03-11 04:16:02", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "お知らせ より回答が適切になるように学習させたモデル、https://huggingface.co./hotchpotch/youri-7b-stf-qa-context-jaqket-jsquad-gptq もあります。 ", @@ -11930,7 +12667,8 @@ "latest_commit": "2024-02-25 06:40:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-novel-gpt-j-6b https://huggingface.co./AIBunCho/japanese-novel-gpt-j-6b\" に合計216個の評価の高いなろう小説、青空文庫、ウィキペディアなどの文章をQLoRA学習させた小説生成用モデルです。 ", @@ -11943,7 +12681,8 @@ "latest_commit": "2024-03-16 15:00:14", "languages": [], "model_or_dataset": "model", - "model_size": 6.05 + "model_size": 6.05, + "model_architectures": "GPTJForCausalLM" }, { "description": "モデルについて Qwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したTokara-0.5B-v0.1にchat vectorで対話能力を加えたモデルになります。 ", @@ -11956,7 +12695,8 @@ "latest_commit": "2024-05-08 13:30:12", "languages": [], "model_or_dataset": "model", - "model_size": 0.464 + "model_size": 0.464, + "model_architectures": "Qwen2ForCausalLM" }, { "description": "Overview This model is based on rinna's [rinna/llama-3-youko-8b], fine-tuned using LoRA on a small number of parallel sentences from English to Japanese.", @@ -11969,7 +12709,8 @@ "latest_commit": "2024-05-21 14:54:46", "languages": [], "model_or_dataset": "model", - "model_size": 8.03 + "model_size": 8.03, + "model_architectures": "LlamaForCausalLM" }, { "description": "Sarashina2-7B Instruct sarashina2-7Bを会話できるようにフルファインチューニングしたものです。", @@ -11982,7 +12723,8 @@ "latest_commit": "2024-06-12 03:00:35", "languages": [], "model_or_dataset": "model", - "model_size": 7.32 + "model_size": 7.32, + "model_architectures": "LlamaForCausalLM" }, { "description": "ChatGLM3-6B是一个中英双语大模型,本项目为ChatGLM3-6B加入日文能力。", @@ -11995,7 +12737,8 @@ "latest_commit": "2024-06-09 15:37:04", "languages": [], "model_or_dataset": "model", - "model_size": 6.35 + "model_size": 6.35, + "model_architectures": "ChatGLMForConditionalGeneration" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", @@ -12008,7 +12751,8 @@ "latest_commit": "2024-03-07 06:53:26", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "In-progess long-context Japanese-English translation model based on tinyllama.", @@ -12021,7 +12765,8 @@ "latest_commit": "2024-03-28 16:36:13", "languages": [], "model_or_dataset": "model", - "model_size": 1.1 + "model_size": 1.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "This model is a voice clone of myself created specifically for Style Bert VITS2.", @@ -12034,7 +12779,8 @@ "latest_commit": "2024-03-04 10:43:27", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "お知らせ より回答が適切になるように学習させたモデル、https://huggingface.co./hotchpotch/youri-7b-stf-qa-context-jaqket-jsquad-gptq もあります。 ", @@ -12047,7 +12793,8 @@ "latest_commit": "2024-02-25 06:40:30", "languages": [], "model_or_dataset": "model", - "model_size": 1.13 + "model_size": 1.13, + "model_architectures": "LlamaForCausalLM" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", @@ -12060,7 +12807,8 @@ "latest_commit": "2023-11-09 18:16:12", "languages": [], "model_or_dataset": "model", - "model_size": 1.13 + "model_size": 1.13, + "model_architectures": "LlamaForCausalLM" }, { "description": "◆REV-Mix \"レボリューション\"なモデルです。 ", @@ -12073,7 +12821,8 @@ "latest_commit": "2023-08-26 16:19:02", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "deberta-base-japanese-upos Model Description", @@ -12086,7 +12835,8 @@ "latest_commit": "2024-07-26 15:59:24", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "Details: https://spacy.io/models/ja#ja_core_news_lg Japanese pipeline optimized for CPU.", @@ -12099,7 +12849,8 @@ "latest_commit": "2023-10-10 06:46:01", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Google's mt5-base fine-tuned in Japanese to summarize patent claims in a limited Pharmaceutical domain. ", @@ -12112,7 +12863,8 @@ "latest_commit": "2022-05-19 06:50:32", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MT5ForConditionalGeneration" }, { "description": "The JaNLI (Japanese Adversarial NLI) dataset, inspired by the English HANS dataset, is designed to necessitate an understanding of Japanese linguistic phenomena and to illuminate the vulnerabilities of models.", @@ -12125,7 +12877,8 @@ "latest_commit": "2023-04-11 13:40:37", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "cosmopedia-japanese-20kのデータに、kunishou様から20k-100kをご提供いただけることになり100kまで拡大しました。 ", @@ -12138,7 +12891,8 @@ "latest_commit": "2024-03-03 16:20:35", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "gpt2-small-japanese-ud-causal Model Description", @@ -12151,7 +12905,8 @@ "latest_commit": "2024-08-25 17:54:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2ForTokenClassification" }, { "description": "bert-large-japanese-unidic-luw-upos Model Description", @@ -12164,7 +12919,8 @@ "latest_commit": "2023-11-05 18:44:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "ELECTRA small Japanese generator This is a ELECTRA model pretrained on texts in the Japanese language.", @@ -12177,7 +12933,8 @@ "latest_commit": "2023-10-21 13:21:28", "languages": [], "model_or_dataset": "model", - "model_size": 0.013800000000000002 + "model_size": 0.013800000000000002, + "model_architectures": "ElectraForMaskedLM" }, { "description": "ELECTRA small Japanese discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", @@ -12190,7 +12947,8 @@ "latest_commit": "2022-12-09 00:41:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "ElectraForPreTraining" }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、JNLI(文章の関係性判別)に用いれるようにしたものです。 ", @@ -12203,7 +12961,8 @@ "latest_commit": "2023-07-21 14:09:44", "languages": [], "model_or_dataset": "model", - "model_size": 0.279 + "model_size": 0.279, + "model_architectures": "LukeForSequenceClassification" }, { "description": "MPT-7B-inst このモデルは、MosaicMLのllm-foundryリポジトリを使用してmosaicml/mpt-7b-instructをファインチューニングしたモデルです。 ", @@ -12216,7 +12975,8 @@ "latest_commit": "2023-06-26 01:09:06", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MPTForCausalLM" }, { "description": "OpenCALM-LARGE Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by CyberAgent, Inc. ", @@ -12229,7 +12989,8 @@ "latest_commit": "2023-07-02 14:30:47", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "This model is traned with guanaco dataset.", @@ -12242,7 +13003,8 @@ "latest_commit": "2023-08-10 13:11:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "日本語でtrainingしたllama2 model size: 130.78M trainingは以下のscript参照 https://github.com/Lightning-AI/lit-gpt/tree/main use from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"if001/sentencepiece_ja\", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(\"if001/llama2_ja_ss\")", @@ -12255,7 +13017,8 @@ "latest_commit": "2023-10-16 13:49:48", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Model description Cyberagent様のcyberagent/calm2-7b-chatを追加学習した、作家さん用アシスタントAIのアルファ版です。 ", @@ -12268,7 +13031,8 @@ "latest_commit": "2023-12-09 15:19:57", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "Orion-14B 🌐English | 🇨", @@ -12281,7 +13045,8 @@ "latest_commit": "2024-01-25 21:01:29", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "OrionForCausalLM" }, { "description": "The English document is here モデル概要 Watashiha-Llama-2-13B-Ogiri-sftをAWSのinf2インスタンスで動作するようにコンパイルされたモデルです。 ", @@ -12294,7 +13059,8 @@ "latest_commit": "2024-02-02 06:39:21", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "モデル概要 Watashiha-Llama-2-13B-Ogiri-sftをLLaVAで学習し、画像に対応した大喜利言語モデルです。", @@ -12307,7 +13073,8 @@ "latest_commit": "2024-03-04 05:22:43", "languages": [], "model_or_dataset": "model", - "model_size": 13.3 + "model_size": 13.3, + "model_architectures": "LlavaLlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1の量子化済みGGUF版です。", @@ -12320,7 +13087,8 @@ "latest_commit": "2024-03-03 13:39:01", "languages": [], "model_or_dataset": "model", - "model_size": 21.5 + "model_size": 21.5, + "model_architectures": null }, { "description": "TaCOMET_ja", @@ -12333,7 +13101,8 @@ "latest_commit": "2024-06-05 09:41:05", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "ELYZA-japanese-Llama-2-MoE-2x7B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-MoE-2x7B-v0.1の量子化済みGGUF版です。", @@ -12346,7 +13115,8 @@ "latest_commit": "2024-03-07 13:23:01", "languages": [], "model_or_dataset": "model", - "model_size": 11.1 + "model_size": 11.1, + "model_architectures": null }, { "description": "このモデルはcl-tohoku/bert-large-japanese-v2をファインチューニングしてCommonsenseQA(選択式の質問)に用いれるようにしたものです。 ", @@ -12359,7 +13129,8 @@ "latest_commit": "2023-05-26 15:02:41", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMultipleChoice" }, { "description": "Bloom model trained on Japanese corpus.", @@ -12372,7 +13143,8 @@ "latest_commit": "2023-04-24 05:12:10", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BloomForCausalLM" }, { "description": "deberta-large-japanese-wikipedia-ud-goeswith Model Description", @@ -12385,7 +13157,8 @@ "latest_commit": "2023-05-12 01:29:13", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "ku-nlp/roberta-large-japanese-char-wwm Model description This is a Japanese RoBERTa large model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.", @@ -12398,7 +13171,8 @@ "latest_commit": "2023-03-19 01:58:12", "languages": [], "model_or_dataset": "model", - "model_size": 0.323 + "model_size": 0.323, + "model_architectures": "RobertaForMaskedLM" }, { "description": "deberta-large-japanese-aozora Model Description", @@ -12411,7 +13185,8 @@ "latest_commit": "2023-01-14 00:27:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "bert-japanese-ner このモデルは日本語の固有表現抽出タスクを目的として、京都大学 黒橋・褚・村脇研究室が公開しているBERT日本語Pretrainedモデルをベースにストックマーク株式会社が公開しているner-wikipedia-datasetでファインチューニングしたものです。 ", @@ -12424,7 +13199,8 @@ "latest_commit": "2021-11-14 02:34:01", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "Tanuki-Zero Base model: llm-jp/llm-jp-13b-v1.0 Instruction data: Randomly sampled, 15k Jaster dataset (train) Code is here.", @@ -12437,7 +13213,8 @@ "latest_commit": "2024-03-30 00:51:03", "languages": [], "model_or_dataset": "model", - "model_size": 12.9 + "model_size": 12.9, + "model_architectures": "GPT2LMHeadModel" }, { "description": "mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-8bit The Model mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-8bit was converted to MLX format from cyberagent/Llama-3.1-70B-Japanese-Instruct-2407 using mlx-lm version 0.16.1.", @@ -12450,7 +13227,8 @@ "latest_commit": "2024-07-26 14:05:31", "languages": [], "model_or_dataset": "model", - "model_size": 19.8 + "model_size": 19.8, + "model_architectures": "LlamaForCausalLM" }, { "description": "This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - JA dataset.", @@ -12463,7 +13241,8 @@ "latest_commit": "2022-03-23 18:34:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "Wav2Vec2ForCTC" }, { "description": "ELECTRA small Japanese finance discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", @@ -12476,7 +13255,8 @@ "latest_commit": "2022-12-09 00:42:10", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "ElectraForPreTraining" }, { "description": "Details: https://spacy.io/models/ja#ja_core_news_trf Japanese transformer pipeline (Transformer(name='cl-tohoku/bert-base-japanese-char-v2', piece_encoder='char', stride=160, type='bert', width=768, window=216, vocab_size=6144)).", @@ -12489,7 +13269,8 @@ "latest_commit": "2023-10-10 06:27:03", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "deberta-large-japanese-unidic Model Description", @@ -12502,7 +13283,8 @@ "latest_commit": "2022-06-19 09:15:35", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "ESを書くAI Japanese GPT-2 modelをファインチューニングしました ファインチューニングには、内定者の二万件以上のESを用いました。 ", @@ -12515,7 +13297,8 @@ "latest_commit": "2022-08-14 05:47:18", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2LMHeadModel" }, { "description": "ELYZA-japanese-CodeLlama-7b-instruct-GPTQ-calib-ja-1k elyzaさんが公開しているELYZA-japanese-CodeLlama-7b-instructを 日本語のキャリブレーションセットで生成したGPTQモデルになります。 ", @@ -12528,7 +13311,8 @@ "latest_commit": "2023-11-16 14:28:39", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "This model was created by merging intfloat/e5-mistral-7b-instruct and stabilityai/japanese-stablelm-base-gamma-7b.", @@ -12541,7 +13325,8 @@ "latest_commit": "2024-01-05 15:48:24", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralModel" }, { "description": "This model is the fine-tuned version of Helsinki-NLP/opus-mt-ja-en on bsd_ja_en dataset.", @@ -12554,7 +13339,8 @@ "latest_commit": "2024-03-20 05:41:04", "languages": [], "model_or_dataset": "model", - "model_size": 0.07529999999999999 + "model_size": 0.07529999999999999, + "model_architectures": "MarianMTModel" }, { "description": "SambaLingo-Japanese-Base SambaLingo-Japanese-Base is a pretrained Bi-lingual Japanese and English model that adapts Llama-2-7b to Japanese by training on 42 billion tokens from the Japanese split of the Cultura-X dataset.", @@ -12567,7 +13353,8 @@ "latest_commit": "2024-04-16 22:33:28", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "LlamaForCausalLM" }, { "description": "DavidAU/alpaca-guanaco-japanese-gpt-1b-Q8_0-GGUF", @@ -12580,7 +13367,8 @@ "latest_commit": "2024-04-20 08:49:19", "languages": [], "model_or_dataset": "model", - "model_size": 1.39 + "model_size": 1.39, + "model_architectures": null }, { "description": "Model Card for Model ID 料理を検索するための質問文から、検索検索用キーワードである固有表現を抽出します Model Details Model Description 例えば、「東京の肉料理で、春に食べられる、鶏肉を使った料理を教えてください」という文章を入力すると、 「東京 → 都道府県/地方(AREA)」 「肉料理 → 種類(TYPE)」 「春 → 季節(SZN)", @@ -12593,7 +13381,8 @@ "latest_commit": "2024-05-12 07:20:39", "languages": [], "model_or_dataset": "model", - "model_size": 0.111 + "model_size": 0.111, + "model_architectures": "BertForTokenClassification" }, { "description": "Assistance のGGUF版 Our Models for GGUF Vecteus-GGUF Ninja-v1-GGUF Ninja-v1-NSFW-GGUF Ninja-v1-128k-GGUF Ninja-v1-NSFW-128k-GGUF", @@ -12606,7 +13395,8 @@ "latest_commit": "2024-05-03 04:30:45", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": null }, { "description": "VecteusをベースにLLavaに対応させたモデルです。 ", @@ -12619,7 +13409,8 @@ "latest_commit": "2024-05-10 05:39:04", "languages": [], "model_or_dataset": "model", - "model_size": 7.57 + "model_size": 7.57, + "model_architectures": "LlavaMistralForCausalLM" }, { "description": "Ninja-v1-RP-WIP 概要 Local-Novel-LLM-project/Ninja-v1-NSFWをロールプレイ用にLoRAでファインチューニングしたモデルです。 ", @@ -12632,7 +13423,8 @@ "latest_commit": "2024-05-20 16:56:00", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "spekulatius マージしているとたまに出てくる「目的の意図とは違うのだけどなんだか消すにはもったいないモデル」をおすそ分けするシリーズです。 ", @@ -12645,7 +13437,8 @@ "latest_commit": "2023-10-26 04:21:35", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "bart-large-japanese This model is converted from the original Japanese BART Pretrained model released by Kyoto University.", @@ -12658,7 +13451,8 @@ "latest_commit": "2022-11-07 12:06:32", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MBartForConditionalGeneration" }, { "description": "deberta-base-japanese-wikipedia-ud-goeswith Model Description", @@ -12671,7 +13465,8 @@ "latest_commit": "2024-08-20 19:38:50", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "deberta-small-japanese-aozora Model Description", @@ -12684,7 +13479,8 @@ "latest_commit": "2023-01-15 15:25:14", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Details: https://spacy.io/models/ja#ja_core_news_md Japanese pipeline optimized for CPU.", @@ -12697,7 +13493,8 @@ "latest_commit": "2023-10-10 06:45:12", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Model Trained Using AutoNLP Problem type: Binary Classification Model ID: 59363 Validation Metrics Loss: 0.12651239335536957 Accuracy: 0.9532079853817648 Precision: 0.9729688278823665 Recall: 0.9744633462616643 AUC: 0.9717333684823413 F1: 0.9737155136027014 Usage You can use cURL to access this model: $ curl -X POST -H \"Authorization: Bearer YOUR_API_KEY\" -H \"Content-Type: application/json\" -d '{\"inputs\": \"I love AutoNLP\"}' https://api-inference.huggingface.co/models/abhishek/autonlp-japanese-sentiment-5936", @@ -12710,7 +13507,8 @@ "latest_commit": "2021-05-18 22:56:15", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForSequenceClassification" }, { "description": "roberta-large-japanese-char-luw-upos Model Description", @@ -12723,7 +13521,8 @@ "latest_commit": "2022-09-18 19:44:49", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForTokenClassification" }, { "description": "roberta-base-japanese-luw-upos Model Description", @@ -12736,7 +13535,8 @@ "latest_commit": "2022-09-18 19:44:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "RobertaForTokenClassification" }, { "description": "bert-large-japanese-luw-upos Model Description", @@ -12749,7 +13549,8 @@ "latest_commit": "2022-09-18 19:43:45", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForTokenClassification" }, { "description": "bert-base-japanese-char-extended Model Description", @@ -12762,7 +13563,8 @@ "latest_commit": "2022-06-21 07:21:54", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "BertForMaskedLM" }, { "description": "deberta-base-japanese-unidic Model Description", @@ -12775,7 +13577,8 @@ "latest_commit": "2022-06-18 23:02:31", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "fasttext-jp-embedding This model is experimental.", @@ -12788,7 +13591,8 @@ "latest_commit": "2022-11-16 22:21:49", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "FastTextJpModel" }, { "description": "モデル説明 (model explanation) YaguruMagiku 0.6 : AbyssOrangeMix2_sfw 0.4 マージ元のルーツにNAIリークが含まれるという噂があるので、NAIリークアンチには非推奨 理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。 ", @@ -12801,7 +13605,8 @@ "latest_commit": "2023-01-21 02:10:41", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "ebisuke/liz-nojaloli-ja License MIT Licenseベースとしてrinna/japanese-gpt-neox-3.6bを使用しています。 ", @@ -12814,7 +13619,8 @@ "latest_commit": "2023-05-30 16:01:20", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Model Card Summary This model was trained using H2O LLM Studio.", @@ -12827,7 +13633,8 @@ "latest_commit": "2023-06-08 00:48:50", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Japanese Stable LM Instruct Gamma 7B +", @@ -12840,7 +13647,8 @@ "latest_commit": "2024-03-21 14:33:07", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "概要 「LOCAL AI HACKATHON」における、チームDataPilot,4つめの成果品です。", @@ -12853,7 +13661,8 @@ "latest_commit": null, "languages": [], "model_or_dataset": "model", - "model_size": 3.56 + "model_size": 3.56, + "model_architectures": "MistralForCausalLM" }, { "description": "TigerBot-7B Japanese", @@ -12866,7 +13675,8 @@ "latest_commit": "2024-04-22 09:05:05", "languages": [], "model_or_dataset": "model", - "model_size": 6.74 + "model_size": 6.74, + "model_architectures": "LlamaForCausalLM" }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for Ninja-v1-128k The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 Ninja-128k has the following changes compared to Mistral-7B-v0.1.", @@ -12879,7 +13689,8 @@ "latest_commit": "2024-05-04 04:07:00", "languages": [], "model_or_dataset": "model", - "model_size": 7.24 + "model_size": 7.24, + "model_architectures": "MistralForCausalLM" }, { "description": "Style-Bert-VITS2 Japanese Only Sakura Miko こちらは「さくらみこ」の音声データセットに基づいて学習されたVITS-TTSモデルです。 ", @@ -12892,7 +13703,8 @@ "latest_commit": "2024-05-28 03:02:14", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "TinySlime-1.1B-Chat-v1.0 TinySlime は日本語に特化した小規模言語モデルです。 ", @@ -12905,7 +13717,8 @@ "latest_commit": "2024-07-02 08:53:11", "languages": [], "model_or_dataset": "model", - "model_size": 1.1 + "model_size": 1.1, + "model_architectures": "LlamaForCausalLM" }, { "description": "gpt2-medium-japanese-upos Model Description", @@ -12918,7 +13731,8 @@ "latest_commit": "2024-07-27 07:49:41", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "GPT2ForTokenClassification" }, { "description": "モデルの説明(English explanation is below.", @@ -12931,7 +13745,8 @@ "latest_commit": "2024-06-11 07:41:22", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MixtralForCausalLM" }, { "description": "Japanese-Starling-ChatV-7B このモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", @@ -12944,7 +13759,8 @@ "latest_commit": "2024-04-22 09:39:09", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "MistralForCausalLM" }, { "description": "dolly-japanese-gpt-1b-clone 概要 rinna社の「japanese-gpt-1b」を、日本語データセット「databricks-dolly-15k-ja」を使用して学習させた推論モデルです。 ", @@ -12957,7 +13773,8 @@ "latest_commit": "2023-05-07 15:47:23", "languages": [], "model_or_dataset": "model", - "model_size": 1.33 + "model_size": 1.33, + "model_architectures": "GPT2LMHeadModel" }, { "description": "distilhubert-ft-japanese-50k Fine-tuned (more precisely, continue trained)", @@ -12970,7 +13787,8 @@ "latest_commit": "2023-04-21 18:00:04", "languages": [], "model_or_dataset": "model", - "model_size": null + "model_size": null, + "model_architectures": "HubertModel" }, { "description": "bart-base-japanese-news(base-sized model)", @@ -12983,7 +13801,8 @@ "latest_commit": "2023-12-08 03:39:50", "languages": [], "model_or_dataset": "model", - "model_size": 0.125 + "model_size": 0.125, + "model_architectures": "BartForConditionalGeneration" }, { "description": "Dataset Details Dataset Type:Japanese LLaVA Pretrain is a localized version of the original LLaVA Pretrain dataset.", @@ -12996,7 +13815,8 @@ "latest_commit": "2024-04-12 09:15:37", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "HF Datasets version of Tanaka Corpus.", @@ -13009,7 +13829,8 @@ "latest_commit": "2024-03-21 12:50:28", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "common voice, google fleurs, JSUTv1.1, JAS_v2 (joujiboi/japanese-anime-speech-v2)", @@ -13022,7 +13843,8 @@ "latest_commit": "2024-07-24 18:58:08", "languages": [], "model_or_dataset": "dataset", - "model_size": null + "model_size": null, + "model_architectures": null }, { "description": "Japanese Dictionary",