Spaces:
Sleeping
Sleeping
File size: 8,109 Bytes
ae5819c bd8d834 ae5819c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
import time
# Initialize the LLM and other components
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
max_new_tokens=128,
temperature=0.7,
do_sample=False,
)
# Provide the family labels directly in the prompt
family_labels = ['Containers and storage'
'Components for information technology or broadcasting or telecommunications'
'Industrial process machinery and equipment and supplies' 'Clothing'
'Computer Equipment and Accessories'
'Data Voice or Multimedia Network Equipment or Platforms and Accessories'
'Software' 'Communications Devices and Accessories' 'Timepieces'
'Transport services' 'Measuring and observing and testing instruments'
'Heating and ventilation and air circulation' 'Power sources'
'Structural components and basic shapes'
'Electronic hardware and component parts and accessories'
'Insurance and retirement services'
'Electrical wire and cable and harness'
'Electrical equipment and components and supplies' 'Hand tools'
'Resins and rosins and other resin derived materials'
'Part cleaning machinery and accessories'
'Heavy construction machinery and equipment' 'Power generation'
'Pneumatic machinery and equipment' 'Vehicle bodies and trailers'
'Workplace safety equipment and supplies and training materials'
'Public safety and control' 'Motor vehicles'
'Batteries and generators and kinetic power transmission'
'Domestic appliances' 'Lamps and lightbulbs and lamp components'
'Automotive specialty tools' 'Vocational training'
'Electrical wire management devices and accessories and supplies'
'Lighting Fixtures and Accessories'
'Automation control devices and components and accessories'
'Fluid and gas distribution' 'Industrial filtering and purification'
'Printed circuits and integrated circuits and microassemblies'
'Portable Structure Building Components'
'Security surveillance and detection' 'Structural building products'
'Work related organizations' 'Management advisory services'
'Civic organizations and associations and movements' 'Printed media'
'Clubs' 'Industrial pumps and compressors' 'Elements and gases'
'Office machines and their supplies and accessories'
'Consumer electronics'
'Audio and visual presentation and composing equipment'
'Photographic or filming or video equipment'
'Printing and publishing equipment' 'Rubber and elastomers'
'Compounds and mixtures' 'Paper products'
'Laboratory and scientific equipment'
'Building and facility maintenance and repair services'
'Nonresidential building construction services'
'Specialized trade construction and maintenance services'
'Heavy construction services' 'Insurable interest contracts'
'Computer services' 'Information Technology Service Delivery'
'Real estate services' 'Seasonings and preservatives' 'Legal services'
'Business administration services' 'General agreements and contracts'
'Law enforcement' 'Prepared and preserved foods'
'Office and desk accessories' 'Office supply'
'Security and personal safety' 'Commercial and industrial furniture'
'Fuels' 'Passenger transport' 'Mail and cargo transport'
'Medical facility products' 'Paper materials' 'Beverages'
'Cleaning and janitorial supplies' 'Janitorial equipment'
'Marketing and distribution' 'Vending machines'
'Monetary instruments or currency'
'Institutional food services equipment' 'Comprehensive health services'
'Human resources services' 'Bank offered products' 'Advertising'
'Signage and accessories' 'Fabrics and leather materials'
'Luggage and handbags and packs and cases' 'Jewelry'
'Chocolate and sugars and sweeteners and confectionary products'
'Merchandising furniture and accessories' 'Bread and bakery products'
'Photographic services' 'Manufacturing support services'
'Electronic reference material' 'Banking and investment'
'Professional service robot' 'Material packing and handling'
'Transportation components and systems'
'Professional engineering services' 'Alternative educational systems'
'Specialized educational services' 'Restaurants and catering'
'Travel facilitation' 'Hotels and lodging and meeting facilities'
'Public relations and professional communications services'
'Independent living aids for the physically challenged'
'Structural materials' 'Material handling machinery and equipment'
'Marine construction and installation equipment'
'Construction and maintenance support equipment'
'Electronic manufacturing machinery and equipment and accessories'
'Concrete and cement and plaster' 'Roads and landscape'
'Administrative interventions or procedures, physiological systems and anatomical regions, introduction and irrigation and circulatory'
'Fire protection' 'Passive discrete components' 'Footwear'
'Public order and safety' 'Industrial use papers'
'Electron tube devices and accessories'
'Transportation services equipment' 'Machine made parts'
'Accounting and bookkeeping services' 'Commercial sports'
'Performing arts' 'Governmental property right conferrals'
'Non edible plant and forestry products' 'Doors and windows and glass'
'Rope and chain and cable and wire and strap'
'Machinery and transport equipment manufacture'
'Castings and casting assemblies'
'Industrial food and beverage equipment' 'Earth and stone' 'Alloys'
'Hardware' 'Housings and cabinets and casings'
'Residential building construction services'
'Discrete semiconductor devices' 'Packings glands boots and covers'
'Wire machinery and equipment' 'Entertainment services'
'Explosive materials' 'Industrial optics'
'Diagnostic and microbiological devices' 'Trade policy and services'
'Machined castings' 'Decorative adornments'
'Medical diagnostic imaging and nuclear medicine products'
'Graphic design' 'Dairy products and eggs'
'Film and theater production support services'
'Metal and mineral industries'
'Welding and soldering and brazing machinery and accessories and supplies'
'Packaging materials' 'Machined raw stock']
# Modify the prompt to focus on selecting a UNSPSC family label from the given list
template_classify = '''
You are a classifier bot that assigns a UNSPSC family label to the given text.
Your task is to classify the text into one of the following UNSPSC family labels:
{family_labels}
Provide only the family label in your answer. If unsure, label as "Unknown".
Convert it to JSON format using 'Answer' as the key and return it.
Your final response MUST contain only the response, no other text.
Example:
{{"Answer":["Family Label"]}}
What is the UNSPSC family label for the following text?:
<text>
{TEXT}
</text>
'''
json_output_parser = JsonOutputParser()
# Define the classify_text function
def classify_text(text):
global llm
start = time.time()
# Join the family labels into a string for the prompt
family_labels_str = "\n".join(family_labels)
prompt_classify = PromptTemplate(
template=template_classify,
input_variables=["TEXT", "family_labels"]
)
formatted_prompt = prompt_classify.format(TEXT=text, family_labels=family_labels_str)
classify = llm.invoke(formatted_prompt)
parsed_output = json_output_parser.parse(classify)
end = time.time()
duration = end - start
return parsed_output["Answer"][0], duration
# Create the Gradio interface
def create_gradio_interface():
with gr.Blocks() as iface:
text_input = gr.Textbox(label="Text")
output_text = gr.Textbox(label="Detected UNSPSC Family")
time_taken = gr.Textbox(label="Time Taken (seconds)")
submit_btn = gr.Button("Classify UNSPSC Family")
def on_submit(text):
classification, duration = classify_text(text)
return classification, f"Time taken: {duration:.2f} seconds"
submit_btn.click(fn=on_submit, inputs=text_input, outputs=[output_text, time_taken])
iface.launch()
if __name__ == "__main__":
create_gradio_interface()
|