Spaces:

alpcansoydas
/

irrelevant-content-detection

Sleeping

File size: 3,302 Bytes

4a0ad23
 
 
 
 
 
 
 
 
 
 
 
a08048c
4a0ad23
 
 
 
0c62f77
 
 
 
4a0ad23
a08048c
 
 
4a0ad23
a08048c
4a0ad23
 
 
 
 
 
 
 
a08048c
4a0ad23
 
 
 
 
a08048c
4a0ad23
 
 
 
 
 
a08048c
4a0ad23
a08048c
9ccf0ee
a08048c
 
 
4a0ad23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ccf0ee
 
4a0ad23
 
 
 
a08048c
 
 
4a0ad23
a08048c
4a0ad23
a08048c
4a0ad23
a08048c
4a0ad23

import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
from langdetect import detect
import time

# Initialize the LLM and other components
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    max_new_tokens=128,
    temperature=0.5,
    do_sample=False,
)

template_classify = '''
You are an irrelevant text detector bot.
In social media, there are lots of bot accounts and they produce non related text on company hashtag.
You will be provided company informations such as company name, company sector and information about company and text from social media.
You will classify it as RELEVANT or IRRELEVANT.

Company name: {COMPANY_NAME}
Company sector: {COMPANY_SECTOR}
About Company: {ABOUT_COMPANY}

Detect following text as RELEVANT OR IRRELEVANT:

<text>  
{TEXT}  
</text>

convert it to json format using 'Answer' as key and return it.
Your final response MUST contain only the response, no other text.
Example:
{{"Answer":["RELEVANT"]}}
'''

json_output_parser = JsonOutputParser()

# Define the classify_text function
def classify_text(text, company_name_input, company_sector_input, about_company_input):
    global llm

    start = time.time()

    prompt_classify = PromptTemplate(
        template=template_classify,
        input_variables=["TEXT", "COMPANY_NAME", "COMPANY_SECTOR", "ABOUT_COMPANY"]
    )
    formatted_prompt = prompt_classify.format(TEXT=text,
                                              COMPANY_NAME=company_name_input,
                                              COMPANY_SECTOR=company_sector_input,
                                              ABOUT_COMPANY=about_company_input
                                             )
    classify = llm.invoke(formatted_prompt)

    '''
    prompt_json = PromptTemplate(
        template=template_json,
        input_variables=["RESPONSE"]
    )
    '''

    #formatted_prompt = template_json.format(RESPONSE=classify)
    #response = llm.invoke(formatted_prompt)

    parsed_output = json_output_parser.parse(classify)
    end = time.time()
    duration = end - start
    return parsed_output, duration #['Answer']

# Create the Gradio interface
def gradio_app(text, company_name_input, company_sector_input, about_company_input):
    classification, time_taken = classify_text(text, company_name_input, company_sector_input, about_company_input)
    return classification, f"Time taken: {time_taken:.2f} seconds"

def create_gradio_interface():
    with gr.Blocks() as iface:
        company_name_input = gr.Textbox(label="Enter Company Name")
        company_sector_input = gr.Textbox(label="Enter Company Sector")
        about_company_input = gr.Textbox(label="Enter Information About Company")
        text_input = gr.Textbox(label="Text")
        output_text = gr.Textbox(label="Result")
        time_taken = gr.Textbox(label="Time Taken (seconds)")
        submit_btn = gr.Button("Detect")

        submit_btn.click(fn=classify_text, inputs=[company_name_input, company_sector_input, about_company_input, text_input], outputs=[output_text, time_taken])

    iface.launch()

if __name__ == "__main__":
    create_gradio_interface()