Shaylin Chetty commited on
Commit
9b74f2d
1 Parent(s): 2bfe655

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,71 @@
1
- ---
2
- license: bigscience-bloom-rail-1.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: bigscience-bloom-rail-1.0
3
+ tags:
4
+ - bloom
5
+ - text-generation
6
+ - recipe-generation
7
+ widget:
8
+ - text: '{"prompt": ["onion", "pepper", "brown rice", "egg", "soy sauce"]'
9
+ - text: '{"prompt": ["whole wheat flour", "berries", "baking powder", "milk"]'
10
+ ---
11
+
12
+ # Durban University of Technology Recipe Generator
13
+
14
+ ![image/png](img/dut.png)
15
+
16
+ ## Model Details
17
+
18
+ ### Description
19
+
20
+ This model is a fine-tuned version of [bloom560m](https://huggingface.co/bigscience/bloom-560m) aimed at recipe generation for a given set of ingredients.
21
+ Its focus is on the creation of healthier, diabetic-friendly recipes with the set of ingredients that someone has on
22
+ hand.
23
+
24
+ This model further differentiates itself by prioritising common kitchen measurements (e.g. 1 cup, 1 teaspoon) and the
25
+ metric system (e.g. 500g, 100ml) over US customary or imperial measurement systems.
26
+
27
+ During model training, unfamiliar recipes were tested by the Department of Food and Nutrition, with feedback incorporated to enhance the model's performance.
28
+
29
+
30
+ ![image/png](img/mosaic.jpg)
31
+
32
+ ### Usage Example
33
+ ```python
34
+ import json
35
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
36
+
37
+ tokenizer = AutoTokenizer.from_pretrained("shaylinc/dut-recipe-generator")
38
+ model = AutoModelForCausalLM.from_pretrained("shaylinc/dut-recipe-generator")
39
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
40
+
41
+ input_ingredients = ["mushrooms", "cabbage", "soy sauce", "sesame seeds", "honey"]
42
+
43
+ input_text = '{"prompt": ' + json.dumps(input_ingredients)
44
+
45
+ output = pipe(input_text, max_length=1024, temperature=0.2, do_sample=True, truncation=True)[0]["generated_text"]
46
+
47
+ #JSON formatted output with "title", "ingredients" and "method" nodes available
48
+ print(output)
49
+ ```
50
+
51
+ ### Application Example
52
+ You may test out the model on HuggingFace spaces here:
53
+ https://huggingface.co/spaces/Ashikan/dut-recipe-generator
54
+
55
+ ### Training Data
56
+ This model was first trained on a heavily filtered and processed version of the existing [Recipe NLG](https://huggingface.co/datasets/mbien/recipe_nlg) dataset.
57
+ Pre-processing involved the removal of invalid recipes, conversion of units such as degrees Fahrenheit to degrees Celsius and the filtering out of recipes completely written in US customary or imperial measurements.
58
+
59
+ After the initial training, the model was then trained on a new recipe dataset written by the Durban University of Technology.
60
+ This dataset was tailored to feature healthier, diabetic-friendly recipes with a focus on ease of preparation and nutritional content.
61
+ These recipes can be found within the [Dut Diabetic Friendly Recipes](https://huggingface.co/datasets/Ashikan/diabetic-friendly-recipes) dataset.
62
+
63
+
64
+ ## Citation Information
65
+
66
+ ```
67
+ Prof Ashika Naicker*, Mr Shaylin Chetty, Ms Riashnee Thaver*, Ms. Anjellah Reddy*, Dr. Evonne Singh* Dr. Imana Pal*, Dr. Lisebo Mothepu*.
68
+
69
+ *Durban University of Technology, Faculty of Applied Sciences, Department of Food and Nutrition, Durban, South Africa
70
+
71
+ ```
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./output/checkpoint-340906",
3
+ "apply_residual_connection_post_layernorm": false,
4
+ "architectures": [
5
+ "BloomForCausalLM"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "attention_softmax_in_fp32": true,
9
+ "bias_dropout_fusion": true,
10
+ "bos_token_id": 1,
11
+ "eos_token_id": 2,
12
+ "hidden_dropout": 0.0,
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "masked_softmax_fusion": true,
17
+ "model_type": "bloom",
18
+ "n_head": 16,
19
+ "n_inner": null,
20
+ "n_layer": 24,
21
+ "offset_alibi": 100,
22
+ "pad_token_id": 3,
23
+ "pretraining_tp": 1,
24
+ "skip_bias_add": true,
25
+ "skip_bias_add_qkv": false,
26
+ "slow_but_exact": false,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.43.1",
29
+ "unk_token_id": 0,
30
+ "use_cache": true,
31
+ "vocab_size": 250880
32
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 3,
6
+ "transformers_version": "4.43.1"
7
+ }
img/dut.png ADDED
img/mosaic.jpg ADDED
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:958f49012f6f43cf2257f210bf28ad35fa66cab9fd053118fdb6fa1bf173e3b9
3
+ size 2236892304
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d88424e6275945e41b83991c2dea7a2fce7b65bc995961300cd77512aecff2a
3
+ size 14500598
tokenizer_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<unk>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<pad>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ }
36
+ },
37
+ "bos_token": "<s>",
38
+ "clean_up_tokenization_spaces": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "<pad>",
42
+ "padding_side": "left",
43
+ "tokenizer_class": "BloomTokenizer",
44
+ "unk_token": "<unk>"
45
+ }