Update README.md

7a0daf8 verified about 1 month ago

5.81 kB

	---
	base_model:
	- meta-llama/Llama-3.2-3B-Instruct
	---


	### HOW TO USE


	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import torch
	from jinja2 import Template

	# 1. Define the chat template using Jinja2 syntax
	chat_template = """
	<\|start_header_id\|>system<\|end_header_id\|>

	Cutting Knowledge Date: December 2023

	{% if System %}
	{{ System }}
	{% endif %}
	{% if Tools %}
	When you receive a tool call response, use the output to format an answer to the original user question.

	You are a helpful assistant with tool calling capabilities.
	{% endif %}<\|eot_id\|>
	{% for message in Messages %}
	{% set last = loop.last %}
	{% if message.role == "user" %}
	<\|start_header_id\|>user<\|end_header_id\|>
	{% if Tools and last %}
	Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.

	Respond in the format {"name": function_name, "parameters": {"arg1": "value1", "arg2": "value2"}}. Do not use variables.

	{% for tool in Tools %}
	{{ tool }}
	{% endfor %}
	{{ message.content }}<\|eot_id\|>
	{% else %}
	{{ message.content }}<\|eot_id\|>
	{% endif %}
	{% if last %}
	<\|start_header_id\|>assistant<\|end_header_id\|>
	{% endif %}
	{% elif message.role == "assistant" %}
	<\|start_header_id\|>assistant<\|end_header_id\|>
	{% if message.ToolCalls %}
	{% for call in message.ToolCalls %}
	{"name": "{{ call.Function.Name }}", "parameters": {{ call.Function.Arguments }}}
	{% endfor %}
	{% else %}
	{{ message.content }}
	{% endif %}
	{% if not last %}
	<\|eot_id\|>
	{% endif %}
	{% elif message.role == "tool" %}
	<\|start_header_id\|>ipython<\|end_header_id\|>

	{{ message.content }}<\|eot_id\|>{% if last %}<\|start_header_id\|>assistant<\|end_header_id\|>

	{% endif %}
	{% endif %}
	{% endfor %}
	"""

	# 2. Initialize the Jinja2 template
	template = Template(chat_template)

	# 3. Define conversation messages and tools (if any)
	messages = [
	{
	"role": "system",
	"content": """

	YOU ARE AN INGREDIENT DATA ANALYZER, A LEADING AUTHORITY IN EVALUATING AND CLASSIFYING RISKS ASSOCIATED WITH INGREDIENTS BASED ON CANCER, ALLERGEN, AND ENDOCRINE DISRUPTOR SCORES. YOUR TASK IS TO PROCESS THE PROVIDED INGREDIENT DATA AND OUTPUT A FULL, COMPLETE, AND WELL-STRUCTURED JSON OBJECT.

	###INSTRUCTIONS###
	1. ANALYZE THE INGREDIENT DATA:
	- IDENTIFY any potential risks associated with the ingredient.
	- CALCULATE or CLASSIFY the cancer, allergen, and endocrine disruptor scores based on the input data.

	2. ASSIGN A COLOR CODE:
	- DETERMINE the appropriate risk color for the ingredient:
	- G (Green): Low or no risk.
	- Y (Yellow): Moderate risk.
	- R (Red): High risk.

	3. OUTPUT RESULTS IN JSON FORMAT:
	- ENSURE the JSON object includes:
	- `cancer_score`: Numerical or categorical representation of cancer risk.
	- `allergen_score`: Numerical or categorical representation of allergen risk.
	- `endocrine_disruptor_score`: Numerical or categorical representation of endocrine disruptor risk.
	- `risk_color`: The assigned color based on the evaluated scores.

	4. PROVIDE COMPLETE INFORMATION:
	- NEVER leave any field empty or incomplete.
	- IF any score cannot be determined, PROVIDE an explicit reason or placeholder in the JSON (e.g., `"unknown"` or `"not applicable"`).

	###JSON STRUCTURE FORMAT###
	```json
	{
	"cancer_score": <True/False/Unknown>,
	"allergen_score": <True/False/Unknown>,
	"endocrine_disruptor_score": <True/False/Unknown>,
	"risk_color": "<G/Y/R>",
	"description":"",
	}
	"""
	},
	# Few-shot example 1


	{
	"role": "user",
	"content": """


	"""
	},
	]

	# Define any tools if applicable (empty in this case)
	tools = [] # Populate this list if you have any tool definitions

	# 4. Prepare the context for the template
	context = {
	"System": messages[0]["content"],
	"Tools": tools, # Add tool definitions if any
	"Messages": messages[1:], # Exclude the system message
	}

	# 5. Render the template with the context
	formatted_input = template.render(context)

	# 6. Load the model and tokenizer
	model_id = "/home/marl/Music/kungul-instruct" # Replace with your actual model ID
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	# Add any special tokens used in the template to the tokenizer
	special_tokens = ["<\|start_header_id\|>", "<\|end_header_id\|>", "<\|eot_id\|>"]
	tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})

	# Load the model with the updated tokenizer
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="cuda",
	)

	# Resize the token embeddings to accommodate new special tokens
	model.resize_token_embeddings(len(tokenizer))

	# 7. Initialize the pipeline
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)

	# 8. Generate the response
	response = pipe(
	formatted_input,
	max_new_tokens=256,
	temperature=0.01, # Adjust temperature as needed
	top_p=0.95, # Adjust top_p as needed
	do_sample=True, # Enable sampling for more diverse outputs
	)

	# 9. Extract and print the generated text
	generated_text = response[0]["generated_text"]

	# Optionally, extract only the assistant's reply after the last <\|start_header_id\|>assistant<\|end_header_id\|>
	import re

	# Regex pattern to find the assistant's response
	pattern = r"<\\|start_header_id\\|>assistant<\\|end_header_id\\|>(.*?)$"
	match = re.search(pattern, generated_text, re.DOTALL)
	if match:
	assistant_reply = match.group(1).strip()
	print("Assistant's Reply:")
	print(assistant_reply)
	else:
	print("Full Generated Text:")
	print(generated_text)