Spaces:

alpcansoydas
/

irrelevant-content-detection

Sleeping

File size: 3,177 Bytes

4a0ad23
 
 
 
 
 
 
 
 
 
 
a08048c
4a0ad23
 
 
 
495b6d0
512f3d4
 
495b6d0
4a0ad23
512f3d4
495b6d0
a08048c
 
 
495b6d0
4a0ad23
495b6d0
4a0ad23
 
 
 
 
 
 
 
a08048c
4a0ad23
 
 
 
 
a08048c
4a0ad23
 
 
 
 
 
a08048c
4a0ad23
495b6d0
a08048c
9ccf0ee
a08048c
 
 
495b6d0
4a0ad23
 
 
 
 
 
 
9ccf0ee
 
4a0ad23
 
 
 
a08048c
 
 
4a0ad23
a08048c
4a0ad23
a08048c
4a0ad23
ff9b078
4a0ad23
 
 
 
512f3d4

import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
import time

# Initialize the LLM and other components
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    max_new_tokens=128,
    temperature=0.5,
    do_sample=False,
)

template_classify = '''
You are an twitter irrelevant text detector.
You will be provided company informations such as company name, company sector and information about company.
Using this informations about company, think about its services and sector, if given text is relevant to the company classify it as RELEVANT,
if the given text is not relevant to the company classify it as IRRELEVANT.

Provided information:
[
Company name: {COMPANY_NAME}
Company sector: {COMPANY_SECTOR}
About Company: {ABOUT_COMPANY}
]

Detect following text as RELEVANT OR IRRELEVANT based provided information:

<text>  
{TEXT}  
</text>

convert it to json format using 'Answer' as key and return it.
Your final response MUST contain only the response, no other text.
Example:
{{"Answer":["RELEVANT"]}}
'''

json_output_parser = JsonOutputParser()

# Define the classify_text function
def classify_text(text, company_name_input, company_sector_input, about_company_input):
    global llm

    start = time.time()

    prompt_classify = PromptTemplate(
        template=template_classify,
        input_variables=["TEXT", "COMPANY_NAME", "COMPANY_SECTOR", "ABOUT_COMPANY"]
    )
    
    formatted_prompt = prompt_classify.format(TEXT=text,
                                              COMPANY_NAME=company_name_input,
                                              COMPANY_SECTOR=company_sector_input,
                                              ABOUT_COMPANY=about_company_input
                                             )
    print(formatted_prompt, flush=True)
    classify = llm.invoke(formatted_prompt)
    parsed_output = json_output_parser.parse(classify)
    end = time.time()
    duration = end - start
    return parsed_output, duration #['Answer']

# Create the Gradio interface
def gradio_app(text, company_name_input, company_sector_input, about_company_input):
    classification, time_taken = classify_text(text, company_name_input, company_sector_input, about_company_input)
    return classification, f"Time taken: {time_taken:.2f} seconds"

def create_gradio_interface():
    with gr.Blocks() as iface:
        company_name_input = gr.Textbox(label="Enter Company Name")
        company_sector_input = gr.Textbox(label="Enter Company Sector")
        about_company_input = gr.Textbox(label="Enter Information About Company")
        text_input = gr.Textbox(label="Text")
        output_text = gr.Textbox(label="Result")
        time_taken = gr.Textbox(label="Time Taken (seconds)")
        submit_btn = gr.Button("Detect")

        submit_btn.click(fn=classify_text, inputs=[text_input, company_name_input, company_sector_input, about_company_input], outputs=[output_text, time_taken])

    iface.launch()

if __name__ == "__main__":
    create_gradio_interface()