import gradio as gr from langchain.prompts import PromptTemplate from langchain_huggingface import HuggingFaceEndpoint from langchain_core.output_parsers import JsonOutputParser import time # Initialize the LLM and other components llm = HuggingFaceEndpoint( repo_id="mistralai/Mistral-7B-Instruct-v0.3", task="text-generation", max_new_tokens=128, temperature=0.7, do_sample=False, ) # Provide the family labels directly in the prompt family_labels = ['Containers and storage' 'Components for information technology or broadcasting or telecommunications' 'Industrial process machinery and equipment and supplies' 'Clothing' 'Computer Equipment and Accessories' 'Data Voice or Multimedia Network Equipment or Platforms and Accessories' 'Software' 'Communications Devices and Accessories' 'Timepieces' 'Transport services' 'Measuring and observing and testing instruments' 'Heating and ventilation and air circulation' 'Power sources' 'Structural components and basic shapes' 'Electronic hardware and component parts and accessories' 'Insurance and retirement services' 'Electrical wire and cable and harness' 'Electrical equipment and components and supplies' 'Hand tools' 'Resins and rosins and other resin derived materials' 'Part cleaning machinery and accessories' 'Heavy construction machinery and equipment' 'Power generation' 'Pneumatic machinery and equipment' 'Vehicle bodies and trailers' 'Workplace safety equipment and supplies and training materials' 'Public safety and control' 'Motor vehicles' 'Batteries and generators and kinetic power transmission' 'Domestic appliances' 'Lamps and lightbulbs and lamp components' 'Automotive specialty tools' 'Vocational training' 'Electrical wire management devices and accessories and supplies' 'Lighting Fixtures and Accessories' 'Automation control devices and components and accessories' 'Fluid and gas distribution' 'Industrial filtering and purification' 'Printed circuits and integrated circuits and microassemblies' 'Portable Structure Building Components' 'Security surveillance and detection' 'Structural building products' 'Work related organizations' 'Management advisory services' 'Civic organizations and associations and movements' 'Printed media' 'Clubs' 'Industrial pumps and compressors' 'Elements and gases' 'Office machines and their supplies and accessories' 'Consumer electronics' 'Audio and visual presentation and composing equipment' 'Photographic or filming or video equipment' 'Printing and publishing equipment' 'Rubber and elastomers' 'Compounds and mixtures' 'Paper products' 'Laboratory and scientific equipment' 'Building and facility maintenance and repair services' 'Nonresidential building construction services' 'Specialized trade construction and maintenance services' 'Heavy construction services' 'Insurable interest contracts' 'Computer services' 'Information Technology Service Delivery' 'Real estate services' 'Seasonings and preservatives' 'Legal services' 'Business administration services' 'General agreements and contracts' 'Law enforcement' 'Prepared and preserved foods' 'Office and desk accessories' 'Office supply' 'Security and personal safety' 'Commercial and industrial furniture' 'Fuels' 'Passenger transport' 'Mail and cargo transport' 'Medical facility products' 'Paper materials' 'Beverages' 'Cleaning and janitorial supplies' 'Janitorial equipment' 'Marketing and distribution' 'Vending machines' 'Monetary instruments or currency' 'Institutional food services equipment' 'Comprehensive health services' 'Human resources services' 'Bank offered products' 'Advertising' 'Signage and accessories' 'Fabrics and leather materials' 'Luggage and handbags and packs and cases' 'Jewelry' 'Chocolate and sugars and sweeteners and confectionary products' 'Merchandising furniture and accessories' 'Bread and bakery products' 'Photographic services' 'Manufacturing support services' 'Electronic reference material' 'Banking and investment' 'Professional service robot' 'Material packing and handling' 'Transportation components and systems' 'Professional engineering services' 'Alternative educational systems' 'Specialized educational services' 'Restaurants and catering' 'Travel facilitation' 'Hotels and lodging and meeting facilities' 'Public relations and professional communications services' 'Independent living aids for the physically challenged' 'Structural materials' 'Material handling machinery and equipment' 'Marine construction and installation equipment' 'Construction and maintenance support equipment' 'Electronic manufacturing machinery and equipment and accessories' 'Concrete and cement and plaster' 'Roads and landscape' 'Administrative interventions or procedures, physiological systems and anatomical regions, introduction and irrigation and circulatory' 'Fire protection' 'Passive discrete components' 'Footwear' 'Public order and safety' 'Industrial use papers' 'Electron tube devices and accessories' 'Transportation services equipment' 'Machine made parts' 'Accounting and bookkeeping services' 'Commercial sports' 'Performing arts' 'Governmental property right conferrals' 'Non edible plant and forestry products' 'Doors and windows and glass' 'Rope and chain and cable and wire and strap' 'Machinery and transport equipment manufacture' 'Castings and casting assemblies' 'Industrial food and beverage equipment' 'Earth and stone' 'Alloys' 'Hardware' 'Housings and cabinets and casings' 'Residential building construction services' 'Discrete semiconductor devices' 'Packings glands boots and covers' 'Wire machinery and equipment' 'Entertainment services' 'Explosive materials' 'Industrial optics' 'Diagnostic and microbiological devices' 'Trade policy and services' 'Machined castings' 'Decorative adornments' 'Medical diagnostic imaging and nuclear medicine products' 'Graphic design' 'Dairy products and eggs' 'Film and theater production support services' 'Metal and mineral industries' 'Welding and soldering and brazing machinery and accessories and supplies' 'Packaging materials' 'Machined raw stock'] # Modify the prompt to focus on selecting a UNSPSC family label from the given list template_classify = ''' You are a classifier bot that assigns a UNSPSC family label to the given text. Your task is to classify the text into one of the following UNSPSC family labels: {family_labels} Provide only the family label in your answer. If unsure, label as "Unknown". Convert it to JSON format using 'Answer' as the key and return it. Your final response MUST contain only the response, no other text. Example: {{"Answer":["Family Label"]}} What is the UNSPSC family label for the following text?: {TEXT} ''' json_output_parser = JsonOutputParser() # Define the classify_text function def classify_text(text): global llm start = time.time() # Join the family labels into a string for the prompt family_labels_str = "\n".join(family_labels) prompt_classify = PromptTemplate( template=template_classify, input_variables=["TEXT", "family_labels"] ) formatted_prompt = prompt_classify.format(TEXT=text, family_labels=family_labels_str) classify = llm.invoke(formatted_prompt) parsed_output = json_output_parser.parse(classify) end = time.time() duration = end - start return parsed_output["Answer"][0], duration # Create the Gradio interface def create_gradio_interface(): with gr.Blocks() as iface: text_input = gr.Textbox(label="Text") output_text = gr.Textbox(label="Detected UNSPSC Family") time_taken = gr.Textbox(label="Time Taken (seconds)") submit_btn = gr.Button("Classify UNSPSC Family") def on_submit(text): classification, duration = classify_text(text) return classification, f"Time taken: {duration:.2f} seconds" submit_btn.click(fn=on_submit, inputs=text_input, outputs=[output_text, time_taken]) iface.launch() if __name__ == "__main__": create_gradio_interface()