Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Sleeping

App Files Files Community

MultiModal_OpenSource_AI / app.py

VanguardAI

Update app.py

b1f3cf3 verified 7 months ago

raw

history blame

6.84 kB

	import torch
	import spaces
	import re
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr
	import os
	import logging
	from unsloth import FastLanguageModel
	import subprocess

	# Set up logging for debugging
	logging.basicConfig(
	level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler() # Logs will be output to the console
	]
	)
	logger = logging.getLogger(__name__)
	logger.info("HELLO WORLD...")

	# Get environment variable for Hugging Face access
	READ_HF = os.environ["read_hf"]

	# Alpaca prompt template
	alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	{}

	### Input:
	{}

	### Response:
	{}"""

	# Inventory management instructions
	string = '''
	You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.

	Here's your step-by-step thought process:

	1. Identify the Function: Carefully examine the user's input to determine the primary function they want to perform. The available functions are:
	- `transaction`: Record a new item transaction.
	- `last n days transactions`: Retrieve transaction records within a specific timeframe.
	- `view inventory`: View inventory details for a specific category and risk level.
	- `generate report`: Generate an inventory report.

	2. Extract Parameters: Once you've identified the function, carefully extract the necessary parameters from the user's input. Each function requires specific parameters:

	`transaction`:
	- `ItemName`: (string) Must be an exact match from the provided Item List.
	- `ItemQt`: (integer) The quantity of the item.
	- `Type`: (string) "sale", "purchase", or "return".

	`last n days transactions`:
	- `ItemCategory`: (string) Must be from the provided Item Category List.
	- `Duration`: (integer) Number of days (convert weeks, months, years to days).

	`view inventory`:
	- `ItemCategory`: (string) Must be from the provided Item Category List.
	- `RiskType`: (string) "overstock", "understock", or "Null" (if risk inventory is not asked), or "All" for both overstock and understock.

	`generate report`:
	- `ItemCategory`: (string) Must be from the provided Item Category List.
	- `Duration`: (integer) Number of days (convert weeks, months, years to days).
	- `ReportType`: (string): "profit", "revenue", "inventory", or "Null" (for all reports).

	3. Validate Inputs: Before proceeding, validate the extracted parameters:

	- ItemName: Ensure the `ItemName` is an exact match from the provided Item List.
	- ItemCategory: Ensure the `ItemCategory` is from the provided Category List.
	- Data Types: Verify that all parameters are of the correct data type (string or integer).

	4. Output in JSON: Always format your response as a JSON object.

	Additional Notes:
	- Pay close attention to the case and spelling of function names and parameters.

	Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
	'''

	@spaces.GPU()
	def chunk_it(inventory_list, user_input_text):
	# Check for CUDA and NVIDIA-related errors
	try:
	# Check for GPU devices
	device_count = torch.cuda.device_count()
	logger.info(f"Number of GPU devices: {device_count}")
	if device_count == 0:
	raise RuntimeError("No GPU devices found.") # Raise an error if no GPUs are detected

	# Check CUDA version using subprocess
	process = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
	cuda_version = process.stdout.strip()
	logger.info(f"CUDA version: {cuda_version}")
	if 'not found' in cuda_version.lower():
	raise RuntimeError("CUDA not found.") # Raise an error if CUDA is not found

	# Load model and tokenizer (your original code)
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
	max_seq_length = 2048,
	dtype = torch.bfloat16,
	load_in_4bit = True,
	token = READ_HF
	)
	logger.info("Model and tokenizer loaded.")

	# ... (rest of your code)

	formatted_prompt = alpaca_prompt.format(
	string + inventory_list, # instruction
	user_input_text, # input
	"", # output - leave this blank for generation!
	)
	logger.debug(f"Formatted prompt: {formatted_prompt}")
	try:
	inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
	logger.debug(f"Tokenized inputs: {inputs}")
	except Exception as e:
	logger.error(f"Failed to tokenize inputs: {e}")
	raise
	logger.info("Generating output...")
	try:
	outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
	logger.info("Output generated.")
	except Exception as e:
	logger.error(f"Failed to generate output: {e}")
	raise
	try:
	reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
	logger.debug(f"Decoded output: {reply}")
	except Exception as e:
	logger.error(f"Failed to decode output: {e}")
	raise
	logger.debug(f"Final reply: {reply}")
	return reply

	except Exception as e:
	logger.error(f"Error loading model or CUDA issues: {e}")
	return "There seems to be an issue with CUDA or the model. Please check the Hugging Face Spaces environment."


	# Interface for inputs
	iface = gr.Interface(
	fn=chunk_it,
	inputs=[
	gr.Textbox(label="user_input_text", lines=3),
	gr.Textbox(label="inventory_list", lines=5)
	],
	outputs=gr.Textbox(label="output", lines=23),
	title="Testing",
	)

	# Set up logging to display in Gradio
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.DEBUG) # Set the logging level
	ch = logging.StreamHandler(gr.Log()) # Create a StreamHandler and send logs to gr.Log
	formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	ch.setFormatter(formatter)
	logger.addHandler(ch)

	logger.info("Launching Gradio interface...")
	try:
	iface.launch(inline=False)
	logger.info("Gradio interface launched.")
	except Exception as e:
	logger.error(f"Failed to launch Gradio interface: {e}")