alpcansoydas commited on
Commit
4a0ad23
1 Parent(s): 00f486b

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +6 -6
  2. app.py +110 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Irrelevant Content Detection
3
- emoji:
4
- colorFrom: gray
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.40.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Topic Detection
3
+ emoji: 🐨
4
+ colorFrom: pink
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.40.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain_huggingface import HuggingFaceEndpoint
4
+ from langchain_core.output_parsers import JsonOutputParser
5
+ from langdetect import detect
6
+ import time
7
+
8
+ # Initialize the LLM and other components
9
+ llm = HuggingFaceEndpoint(
10
+ repo_id="mistralai/Mistral-7B-Instruct-v0.3",
11
+ task="text-generation",
12
+ max_new_tokens=128,
13
+ temperature=0.7,
14
+ do_sample=False,
15
+ )
16
+
17
+ template_classify = '''
18
+ You are a topic detector bot. Your task is to determine the main topic of given text phrase.
19
+
20
+ Answer general main topic not specific words.
21
+ Your answer does not contain specific information from given text.
22
+ Answer just one general main topic. Do not answer two or more topic.
23
+ Answer shortly with two or three word phrase. Do not answer with long sentence.
24
+ If you do not know the topic just answer as General.
25
+
26
+ What is the main topic of given text?:
27
+
28
+ <text>
29
+ {TEXT}
30
+ </text>
31
+
32
+ convert it to json format using 'Answer' as key and return it.
33
+ Your final response MUST contain only the response, no other text.
34
+ Example:
35
+ {{"Answer":["General"]}}
36
+ '''
37
+
38
+ """
39
+ template_json = '''
40
+ Your task is to read the following text, convert it to json format using 'Answer' as key and return it.
41
+ <text>
42
+ {RESPONSE}
43
+ </text>
44
+
45
+ Your final response MUST contain only the response, no other text.
46
+ Example:
47
+ {{"Answer":["General"]}}
48
+ '''
49
+ """
50
+
51
+ json_output_parser = JsonOutputParser()
52
+
53
+ # Define the classify_text function
54
+ def classify_text(text):
55
+ global llm
56
+
57
+ start = time.time()
58
+ lang = detect(text)
59
+
60
+ language_map = {"tr": "turkish",
61
+ "en": "english",
62
+ "ar": "arabic",
63
+ "es": "spanish",
64
+ "it": "italian",
65
+ }
66
+ try:
67
+ lang = language_map[lang]
68
+ except:
69
+ lang = "en"
70
+
71
+ prompt_classify = PromptTemplate(
72
+ template=template_classify,
73
+ input_variables=["LANG", "TEXT"]
74
+ )
75
+ formatted_prompt = prompt_classify.format(TEXT=text, LANG=lang)
76
+ classify = llm.invoke(formatted_prompt)
77
+
78
+ '''
79
+ prompt_json = PromptTemplate(
80
+ template=template_json,
81
+ input_variables=["RESPONSE"]
82
+ )
83
+ '''
84
+
85
+ #formatted_prompt = template_json.format(RESPONSE=classify)
86
+ #response = llm.invoke(formatted_prompt)
87
+
88
+ parsed_output = json_output_parser.parse(classify)
89
+ end = time.time()
90
+ duration = end - start
91
+ return parsed_output, duration #['Answer']
92
+
93
+ # Create the Gradio interface
94
+ def gradio_app(text):
95
+ classification, time_taken = classify_text(text)
96
+ return classification, f"Time taken: {time_taken:.2f} seconds"
97
+
98
+ def create_gradio_interface():
99
+ with gr.Blocks() as iface:
100
+ text_input = gr.Textbox(label="Text")
101
+ output_text = gr.Textbox(label="Detected Topics")
102
+ time_taken = gr.Textbox(label="Time Taken (seconds)")
103
+ submit_btn = gr.Button("Detect topic")
104
+
105
+ submit_btn.click(fn=classify_text, inputs=text_input, outputs=[output_text, time_taken])
106
+
107
+ iface.launch()
108
+
109
+ if __name__ == "__main__":
110
+ create_gradio_interface()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain==0.2.1
2
+ langchain-community==0.2.1
3
+ langchain-huggingface==0.0.3
4
+ langdetect
5
+ sentencepiece