Update README.md
Browse files
README.md
CHANGED
@@ -114,6 +114,8 @@ model-index:
|
|
114 |
This model is a fine-tuned version of [fblgit/juanako-7b-UNA-v2-phase-1](https://huggingface.co/fblgit/juanako-7b-UNA-v2-phase-1) on the HuggingFaceH4/ultrafeedback_binarized dataset.
|
115 |
It outperforms in many aspects most of the current Mistral based models.
|
116 |
|
|
|
|
|
117 |
## Scoring and records (26-November-2023)
|
118 |
Here are some results:
|
119 |
* Scores #1 7B Model
|
@@ -259,15 +261,82 @@ The following hyperparameters were used during training:
|
|
259 |
- Tokenizers 0.14.1
|
260 |
|
261 |
## Citations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
```
|
263 |
@misc{lin2021truthfulqa,
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
archivePrefix={arXiv},
|
269 |
-
primaryClass={cs.CL}
|
270 |
}
|
271 |
```
|
272 |
-
|
273 |
-
Author [Xavier M.](mailto:[email protected]) @fblgit
|
|
|
114 |
This model is a fine-tuned version of [fblgit/juanako-7b-UNA-v2-phase-1](https://huggingface.co/fblgit/juanako-7b-UNA-v2-phase-1) on the HuggingFaceH4/ultrafeedback_binarized dataset.
|
115 |
It outperforms in many aspects most of the current Mistral based models.
|
116 |
|
117 |
+
Author [Xavier M.](mailto:[email protected]) @fblgit
|
118 |
+
|
119 |
## Scoring and records (26-November-2023)
|
120 |
Here are some results:
|
121 |
* Scores #1 7B Model
|
|
|
261 |
- Tokenizers 0.14.1
|
262 |
|
263 |
## Citations
|
264 |
+
If you find juanako useful please:
|
265 |
+
```
|
266 |
+
@misc{juanako7buna,
|
267 |
+
title={Juanako: Uniform Neural Alignment},
|
268 |
+
author={Xavier Murias},
|
269 |
+
year={2023},
|
270 |
+
publisher = {HuggingFace},
|
271 |
+
journal = {HuggingFace repository},
|
272 |
+
howpublished = {\url{https://huggingface.co/fblgit/juanako-7b-UNA}},
|
273 |
+
}
|
274 |
+
```
|
275 |
+
|
276 |
```
|
277 |
@misc{lin2021truthfulqa,
|
278 |
+
title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
|
279 |
+
author={Stephanie Lin and Jacob Hilton and Owain Evans},
|
280 |
+
year={2021},
|
281 |
+
eprint={2109.07958},
|
282 |
+
archivePrefix={arXiv},
|
283 |
+
primaryClass={cs.CL}
|
284 |
+
}
|
285 |
+
@misc{tunstall2023zephyr,
|
286 |
+
title={Zephyr: Direct Distillation of LM Alignment},
|
287 |
+
author={Lewis Tunstall and Edward Beeching and Nathan Lambert and Nazneen Rajani and Kashif Rasul and Younes Belkada and Shengyi Huang and Leandro von Werra and Clémentine Fourrier and Nathan Habib and Nathan Sarrazin and Omar Sanseviero and Alexander M. Rush and Thomas Wolf},
|
288 |
+
year={2023},
|
289 |
+
eprint={2310.16944},
|
290 |
+
archivePrefix={arXiv},
|
291 |
+
primaryClass={cs.LG}
|
292 |
+
}
|
293 |
+
@article{cobbe2021gsm8k,
|
294 |
+
title={Training Verifiers to Solve Math Word Problems},
|
295 |
+
author={Cobbe, Karl and Kosaraju, Vineet and Bavarian, Mohammad and Chen, Mark and Jun, Heewoo and Kaiser, Lukasz and Plappert, Matthias and Tworek, Jerry and Hilton, Jacob and Nakano, Reiichiro and Hesse, Christopher and Schulman, John},
|
296 |
+
journal={arXiv preprint arXiv:2110.14168},
|
297 |
+
year={2021}
|
298 |
+
}
|
299 |
+
@inproceedings{Bisk2020,
|
300 |
+
author = {Yonatan Bisk and Rowan Zellers and
|
301 |
+
Ronan Le Bras and Jianfeng Gao
|
302 |
+
and Yejin Choi},
|
303 |
+
title = {PIQA: Reasoning about Physical Commonsense in
|
304 |
+
Natural Language},
|
305 |
+
booktitle = {Thirty-Fourth AAAI Conference on
|
306 |
+
Artificial Intelligence},
|
307 |
+
year = {2020},
|
308 |
+
}
|
309 |
+
@software{eval-harness,
|
310 |
+
author = {Gao, Leo and
|
311 |
+
Tow, Jonathan and
|
312 |
+
Biderman, Stella and
|
313 |
+
Black, Sid and
|
314 |
+
DiPofi, Anthony and
|
315 |
+
Foster, Charles and
|
316 |
+
Golding, Laurence and
|
317 |
+
Hsu, Jeffrey and
|
318 |
+
McDonell, Kyle and
|
319 |
+
Muennighoff, Niklas and
|
320 |
+
Phang, Jason and
|
321 |
+
Reynolds, Laria and
|
322 |
+
Tang, Eric and
|
323 |
+
Thite, Anish and
|
324 |
+
Wang, Ben and
|
325 |
+
Wang, Kevin and
|
326 |
+
Zou, Andy},
|
327 |
+
title = {A framework for few-shot language model evaluation},
|
328 |
+
month = sep,
|
329 |
+
year = 2021,
|
330 |
+
publisher = {Zenodo},
|
331 |
+
version = {v0.0.1},
|
332 |
+
doi = {10.5281/zenodo.5371628},
|
333 |
+
url = {https://doi.org/10.5281/zenodo.5371628}
|
334 |
+
}
|
335 |
+
@misc{rafailov2023direct,
|
336 |
+
title={Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
|
337 |
+
author={Rafael Rafailov and Archit Sharma and Eric Mitchell and Stefano Ermon and Christopher D. Manning and Chelsea Finn},
|
338 |
+
year={2023},
|
339 |
+
eprint={2305.18290},
|
340 |
archivePrefix={arXiv},
|
|
|
341 |
}
|
342 |
```
|
|
|
|