Spaces:

alpcansoydas
/

irrelevant-content-detection

Sleeping

File size: 3,289 Bytes

4a0ad23
 
 
 
 
 
 
 
 
 
 
a08048c
4a0ad23
 
 
 
0c62f77
512f3d4
 
 
 
4a0ad23
512f3d4
a08048c
 
 
4a0ad23
512f3d4
4a0ad23
 
 
 
 
 
 
 
a08048c
4a0ad23
 
 
 
 
a08048c
4a0ad23
 
 
 
 
 
a08048c
4a0ad23
a08048c
9ccf0ee
a08048c
 
 
4a0ad23
 
 
 
 
 
 
9ccf0ee
 
4a0ad23
 
 
 
a08048c
 
 
4a0ad23
a08048c
4a0ad23
a08048c
4a0ad23
a08048c
4a0ad23
 
 
 
512f3d4

import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
import time

# Initialize the LLM and other components
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    max_new_tokens=128,
    temperature=0.5,
    do_sample=False,
)

template_classify = '''
You are an irrelevant text detector bot.
In social media, there are lots of bot accounts or real people and they produce irrelevant/non-related text about company and its services.
You will be provided company informations such as company name, company sector and information about company.
Using this informations about company, think about its services and sector, if given text is relevant to the company classify it as RELEVANT,
if the given text is not relevant to the company classify it as IRRELEVANT

Provided information:
Company name: {COMPANY_NAME}
Company sector: {COMPANY_SECTOR}
About Company: {ABOUT_COMPANY}

Detect following text as RELEVANT OR IRRELEVANT based on Company Name, Company Sector, About Company:

<text>  
{TEXT}  
</text>

convert it to json format using 'Answer' as key and return it.
Your final response MUST contain only the response, no other text.
Example:
{{"Answer":["RELEVANT"]}}
'''

json_output_parser = JsonOutputParser()

# Define the classify_text function
def classify_text(text, company_name_input, company_sector_input, about_company_input):
    global llm

    start = time.time()

    prompt_classify = PromptTemplate(
        template=template_classify,
        input_variables=["TEXT", "COMPANY_NAME", "COMPANY_SECTOR", "ABOUT_COMPANY"]
    )
    formatted_prompt = prompt_classify.format(TEXT=text,
                                              COMPANY_NAME=company_name_input,
                                              COMPANY_SECTOR=company_sector_input,
                                              ABOUT_COMPANY=about_company_input
                                             )
    classify = llm.invoke(formatted_prompt)
    parsed_output = json_output_parser.parse(classify)
    end = time.time()
    duration = end - start
    return parsed_output, duration #['Answer']

# Create the Gradio interface
def gradio_app(text, company_name_input, company_sector_input, about_company_input):
    classification, time_taken = classify_text(text, company_name_input, company_sector_input, about_company_input)
    return classification, f"Time taken: {time_taken:.2f} seconds"

def create_gradio_interface():
    with gr.Blocks() as iface:
        company_name_input = gr.Textbox(label="Enter Company Name")
        company_sector_input = gr.Textbox(label="Enter Company Sector")
        about_company_input = gr.Textbox(label="Enter Information About Company")
        text_input = gr.Textbox(label="Text")
        output_text = gr.Textbox(label="Result")
        time_taken = gr.Textbox(label="Time Taken (seconds)")
        submit_btn = gr.Button("Detect")

        submit_btn.click(fn=classify_text, inputs=[company_name_input, company_sector_input, about_company_input, text_input], outputs=[output_text, time_taken])

    iface.launch()

if __name__ == "__main__":
    create_gradio_interface()