Shaylin Chetty
commited on
Commit
•
9b74f2d
1
Parent(s):
2bfe655
Initial commit
Browse files- .gitattributes +2 -0
- README.md +71 -3
- config.json +32 -0
- generation_config.json +7 -0
- img/dut.png +0 -0
- img/mosaic.jpg +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +30 -0
- tokenizer.json +3 -0
- tokenizer_config.json +45 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,3 +1,71 @@
|
|
1 |
-
---
|
2 |
-
license: bigscience-bloom-rail-1.0
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: bigscience-bloom-rail-1.0
|
3 |
+
tags:
|
4 |
+
- bloom
|
5 |
+
- text-generation
|
6 |
+
- recipe-generation
|
7 |
+
widget:
|
8 |
+
- text: '{"prompt": ["onion", "pepper", "brown rice", "egg", "soy sauce"]'
|
9 |
+
- text: '{"prompt": ["whole wheat flour", "berries", "baking powder", "milk"]'
|
10 |
+
---
|
11 |
+
|
12 |
+
# Durban University of Technology Recipe Generator
|
13 |
+
|
14 |
+
![image/png](img/dut.png)
|
15 |
+
|
16 |
+
## Model Details
|
17 |
+
|
18 |
+
### Description
|
19 |
+
|
20 |
+
This model is a fine-tuned version of [bloom560m](https://huggingface.co/bigscience/bloom-560m) aimed at recipe generation for a given set of ingredients.
|
21 |
+
Its focus is on the creation of healthier, diabetic-friendly recipes with the set of ingredients that someone has on
|
22 |
+
hand.
|
23 |
+
|
24 |
+
This model further differentiates itself by prioritising common kitchen measurements (e.g. 1 cup, 1 teaspoon) and the
|
25 |
+
metric system (e.g. 500g, 100ml) over US customary or imperial measurement systems.
|
26 |
+
|
27 |
+
During model training, unfamiliar recipes were tested by the Department of Food and Nutrition, with feedback incorporated to enhance the model's performance.
|
28 |
+
|
29 |
+
|
30 |
+
![image/png](img/mosaic.jpg)
|
31 |
+
|
32 |
+
### Usage Example
|
33 |
+
```python
|
34 |
+
import json
|
35 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
36 |
+
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained("shaylinc/dut-recipe-generator")
|
38 |
+
model = AutoModelForCausalLM.from_pretrained("shaylinc/dut-recipe-generator")
|
39 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
40 |
+
|
41 |
+
input_ingredients = ["mushrooms", "cabbage", "soy sauce", "sesame seeds", "honey"]
|
42 |
+
|
43 |
+
input_text = '{"prompt": ' + json.dumps(input_ingredients)
|
44 |
+
|
45 |
+
output = pipe(input_text, max_length=1024, temperature=0.2, do_sample=True, truncation=True)[0]["generated_text"]
|
46 |
+
|
47 |
+
#JSON formatted output with "title", "ingredients" and "method" nodes available
|
48 |
+
print(output)
|
49 |
+
```
|
50 |
+
|
51 |
+
### Application Example
|
52 |
+
You may test out the model on HuggingFace spaces here:
|
53 |
+
https://huggingface.co/spaces/Ashikan/dut-recipe-generator
|
54 |
+
|
55 |
+
### Training Data
|
56 |
+
This model was first trained on a heavily filtered and processed version of the existing [Recipe NLG](https://huggingface.co/datasets/mbien/recipe_nlg) dataset.
|
57 |
+
Pre-processing involved the removal of invalid recipes, conversion of units such as degrees Fahrenheit to degrees Celsius and the filtering out of recipes completely written in US customary or imperial measurements.
|
58 |
+
|
59 |
+
After the initial training, the model was then trained on a new recipe dataset written by the Durban University of Technology.
|
60 |
+
This dataset was tailored to feature healthier, diabetic-friendly recipes with a focus on ease of preparation and nutritional content.
|
61 |
+
These recipes can be found within the [Dut Diabetic Friendly Recipes](https://huggingface.co/datasets/Ashikan/diabetic-friendly-recipes) dataset.
|
62 |
+
|
63 |
+
|
64 |
+
## Citation Information
|
65 |
+
|
66 |
+
```
|
67 |
+
Prof Ashika Naicker*, Mr Shaylin Chetty, Ms Riashnee Thaver*, Ms. Anjellah Reddy*, Dr. Evonne Singh* Dr. Imana Pal*, Dr. Lisebo Mothepu*.
|
68 |
+
|
69 |
+
*Durban University of Technology, Faculty of Applied Sciences, Department of Food and Nutrition, Durban, South Africa
|
70 |
+
|
71 |
+
```
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "./output/checkpoint-340906",
|
3 |
+
"apply_residual_connection_post_layernorm": false,
|
4 |
+
"architectures": [
|
5 |
+
"BloomForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"attention_softmax_in_fp32": true,
|
9 |
+
"bias_dropout_fusion": true,
|
10 |
+
"bos_token_id": 1,
|
11 |
+
"eos_token_id": 2,
|
12 |
+
"hidden_dropout": 0.0,
|
13 |
+
"hidden_size": 1024,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"masked_softmax_fusion": true,
|
17 |
+
"model_type": "bloom",
|
18 |
+
"n_head": 16,
|
19 |
+
"n_inner": null,
|
20 |
+
"n_layer": 24,
|
21 |
+
"offset_alibi": 100,
|
22 |
+
"pad_token_id": 3,
|
23 |
+
"pretraining_tp": 1,
|
24 |
+
"skip_bias_add": true,
|
25 |
+
"skip_bias_add_qkv": false,
|
26 |
+
"slow_but_exact": false,
|
27 |
+
"torch_dtype": "float32",
|
28 |
+
"transformers_version": "4.43.1",
|
29 |
+
"unk_token_id": 0,
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 250880
|
32 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 3,
|
6 |
+
"transformers_version": "4.43.1"
|
7 |
+
}
|
img/dut.png
ADDED
img/mosaic.jpg
ADDED
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:958f49012f6f43cf2257f210bf28ad35fa66cab9fd053118fdb6fa1bf173e3b9
|
3 |
+
size 2236892304
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<pad>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d88424e6275945e41b83991c2dea7a2fce7b65bc995961300cd77512aecff2a
|
3 |
+
size 14500598
|
tokenizer_config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<unk>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<s>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<pad>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
}
|
36 |
+
},
|
37 |
+
"bos_token": "<s>",
|
38 |
+
"clean_up_tokenization_spaces": false,
|
39 |
+
"eos_token": "</s>",
|
40 |
+
"model_max_length": 1000000000000000019884624838656,
|
41 |
+
"pad_token": "<pad>",
|
42 |
+
"padding_side": "left",
|
43 |
+
"tokenizer_class": "BloomTokenizer",
|
44 |
+
"unk_token": "<unk>"
|
45 |
+
}
|