Update app.py
Browse files
app.py
CHANGED
@@ -94,13 +94,9 @@ def summarize_text(text):
|
|
94 |
if summarizer is None:
|
95 |
return "Summarization model could not be loaded."
|
96 |
logger.info("Successfully loaded summarization Model")
|
97 |
-
logger.info(f"\n\n {text}\n")
|
98 |
|
99 |
-
|
100 |
-
translated_text = ''.join(item['translated'] for item in data if 'translated' in item)
|
101 |
-
# full_text = ''.join(item['translated'] for item in results if 'translated' in item)
|
102 |
-
logger.info(f"\n\nWorking on text:\n{full_text}")
|
103 |
-
summary = summarizer( full_text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
|
104 |
return summary
|
105 |
except Exception as e:
|
106 |
logger.error(f"Summarization failed: {str(e)}")
|
@@ -118,7 +114,280 @@ def answer_question(context, question):
|
|
118 |
return "Please enter your Question"
|
119 |
|
120 |
messages = [
|
121 |
-
{"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
{"role": "user", "content": f"Context: {''.join(item['translated'] for item in context if 'translated' in item)}\n\nQuestion: {question}"}
|
123 |
]
|
124 |
|
|
|
94 |
if summarizer is None:
|
95 |
return "Summarization model could not be loaded."
|
96 |
logger.info("Successfully loaded summarization Model")
|
97 |
+
# logger.info(f"\n\n {text}\n")
|
98 |
|
99 |
+
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
|
|
|
|
|
|
|
|
|
100 |
return summary
|
101 |
except Exception as e:
|
102 |
logger.error(f"Summarization failed: {str(e)}")
|
|
|
114 |
return "Please enter your Question"
|
115 |
|
116 |
messages = [
|
117 |
+
# {"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
|
118 |
+
{"role":"system", "content": """
|
119 |
+
Analyze a translated transcript of a conversation that may contain multiple speakers and summarize the information in a structured intelligence document.
|
120 |
+
|
121 |
+
The input format will include word-level or sentence-level timestamps, each indicating the speaker ID, language, and translated text.
|
122 |
+
|
123 |
+
# Input Format Overview
|
124 |
+
|
125 |
+
Word-Level Timestamps Example:
|
126 |
+
```
|
127 |
+
[Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Word>"
|
128 |
+
```
|
129 |
+
Example:
|
130 |
+
```
|
131 |
+
0.01-0.02 - Speaker 1 - Language: English - Translated Text: "Proceed"
|
132 |
+
0.02-0.025 - Speaker 1 - Language: English - Translated Text: "with"
|
133 |
+
0.025-0.032 - Speaker 2 - Language: English - Translated Text: "caution"
|
134 |
+
```
|
135 |
+
|
136 |
+
Optional Sentence-Level Structure Example:
|
137 |
+
```
|
138 |
+
[Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Sentence>"
|
139 |
+
```
|
140 |
+
Example with Sentence Grouping:
|
141 |
+
```
|
142 |
+
0.01-0.05 - Speaker 1 - Language: English - Translated Text: "Proceed with caution."
|
143 |
+
0.06-0.12 - Speaker 2 - Language: English - Translated Text: "All systems are ready."
|
144 |
+
```
|
145 |
+
|
146 |
+
# Intelligence Summary Document Structure
|
147 |
+
|
148 |
+
Use the format below to create a structured summary for each conversation transcript received:
|
149 |
+
|
150 |
+
### 1. Top-Level Status & Assessment:
|
151 |
+
- **Threat Level Assessment**:
|
152 |
+
- Choose one:
|
153 |
+
- Completely Innocuous
|
154 |
+
- Likely Innocuous
|
155 |
+
- Unclear — Requires Investigation
|
156 |
+
- Likely Dangerous — Immediate Action
|
157 |
+
- Likely Dangerous — Delayed Action
|
158 |
+
- 100% Dangerous — Immediate Action
|
159 |
+
- 100% Dangerous — Delayed Action
|
160 |
+
- **Humanitarian Alert**: Identify any indications of distress, coercion, or need for assistance, such as signs of duress or requests for help.
|
161 |
+
|
162 |
+
### 2. Basic Metadata:
|
163 |
+
- **Number of Speakers**: Total and unique speakers detected.
|
164 |
+
- **Languages**: List of languages used, with indication of who spoke which language.
|
165 |
+
- **Location**: Actual or inferred locations of participants.
|
166 |
+
- **Communication Medium**: Identify the method of interaction (e.g., phone call, direct conversation).
|
167 |
+
|
168 |
+
### 3. Conversation Overview:
|
169 |
+
- **Summary**: Concise breakdown of the main points and context.
|
170 |
+
- **Alarming Keywords**: Identify any concerning words, including but not limited to keywords like "kill," "attack," "weapon," etc.
|
171 |
+
- **Suspicious or Cryptic Phrases**: Statements that appear coded or unclear in the context of the discussion.
|
172 |
+
|
173 |
+
### 4. In-Depth Analysis:
|
174 |
+
- **Network Connections**: Identify mentions of additional individuals or groups involved.
|
175 |
+
- **Intent & Emotional Tone Detection**: Analyze emotional cues (e.g., anger, fear, calmness, urgency). Identify signs of deception or tension.
|
176 |
+
- **Behavioral Patterns**: Highlight repeated themes, phrases, or signals of planning and coordination.
|
177 |
+
- **Code Words & Cryptic Language**: Detect terms that may indicate hidden or covert meaning.
|
178 |
+
- **Geolocation References**: Point out any inferences regarding regional language or place names.
|
179 |
+
- **Sentiment on Strategic Issues**: Identify any indication of radical, dissenting, or anti-national views that could imply unrest or extremism.
|
180 |
+
|
181 |
+
### 5. Resource Mentions & Operational Logistics:
|
182 |
+
- **Resource & Asset Mentions**: List any mention of tools, weapons, vehicles, or supply logistics.
|
183 |
+
- **Behavioral Deviations**: Identify shifts in tone, speech, or demeanor suggesting stress, coercion, urgency, or preparation.
|
184 |
+
|
185 |
+
### 6. Prioritization, Recommendations & Actionables:
|
186 |
+
- **High-Risk Alert Priority**: Identify whether the conversation should be flagged for further attention.
|
187 |
+
- **Recommended Actions**:
|
188 |
+
- **Surveillance**: Suggest surveillance if concerning patterns or keywords are detected.
|
189 |
+
- **Intervention**: Recommend intervention for urgent/high-risk cases.
|
190 |
+
- **Humanitarian Assistance**: Suggest immediate support for any signs of distress.
|
191 |
+
- **Follow-Up Analysis**: Identify statements that need deeper review for clarity or to understand potential hidden meanings.
|
192 |
+
|
193 |
+
# Steps
|
194 |
+
|
195 |
+
1. Analyze the input conversation for participant information and context.
|
196 |
+
2. Fill in each section of the Intelligence Summary Document structure.
|
197 |
+
3. Ensure all details, especially those related to potential risk factors or alerts, are captured and highlighted clearly.
|
198 |
+
|
199 |
+
# Output Format
|
200 |
+
|
201 |
+
Provide one structured Intelligence Summary Document for the conversation in either plain text format or structured JSON.
|
202 |
+
|
203 |
+
# JSON Format Example:
|
204 |
+
```json
|
205 |
+
{
|
206 |
+
"Top-Level Status & Assessment": {
|
207 |
+
"Threat Level Assessment": "Unclear - Requires Investigation",
|
208 |
+
"Humanitarian Alert": "No distress signals detected."
|
209 |
+
},
|
210 |
+
"Basic Metadata": {
|
211 |
+
"Number of Speakers": 2,
|
212 |
+
"Languages": {
|
213 |
+
"Speaker 1": "English",
|
214 |
+
"Speaker 2": "English"
|
215 |
+
},
|
216 |
+
"Location": "Unknown",
|
217 |
+
"Communication Medium": "Direct conversation"
|
218 |
+
},
|
219 |
+
"Conversation Overview": {
|
220 |
+
"Summary": "A cautious approach was suggested by Speaker 1, followed by an assurance from Speaker 2 that systems are ready.",
|
221 |
+
"Alarming Keywords": [],
|
222 |
+
"Suspicious or Cryptic Phrases": []
|
223 |
+
},
|
224 |
+
"In-Depth Analysis": {
|
225 |
+
"Network Connections": "None identified",
|
226 |
+
"Intent & Emotional Tone Detection": "Calm, precautionary tone",
|
227 |
+
"Behavioral Patterns": "Speaker 1 expressing concern, Speaker 2 providing assurance",
|
228 |
+
"Code Words & Cryptic Language": [],
|
229 |
+
"Geolocation References": [],
|
230 |
+
"Sentiment on Strategic Issues": "No radical or dissenting sentiment detected"
|
231 |
+
},
|
232 |
+
"Resource Mentions & Operational Logistics": {
|
233 |
+
"Resource & Asset Mentions": [],
|
234 |
+
"Behavioral Deviations": "None noted"
|
235 |
+
},
|
236 |
+
"Prioritization, Recommendations & Actionables": {
|
237 |
+
"High-Risk Alert Priority": "Low",
|
238 |
+
"Recommended Actions": {
|
239 |
+
"Surveillance": "No further surveillance needed.",
|
240 |
+
"Intervention": "Not required.",
|
241 |
+
"Humanitarian Assistance": "Not required.",
|
242 |
+
"Follow-Up Analysis": "No unusual phrases detected requiring review."
|
243 |
+
}
|
244 |
+
}
|
245 |
+
}
|
246 |
+
```
|
247 |
+
|
248 |
+
# Notes
|
249 |
+
|
250 |
+
- Ensure that you mark any ambiguous segments as requiring further investigation.
|
251 |
+
- Pay attention to emotional tone shifts or sudden changes in behavior.
|
252 |
+
- If any direct or implied threat is detected, prioritize appropriately using the provided classifications.
|
253 |
+
- Err on the side of caution. In case there is even a remote possibility that there might be something that required human attention, flag it.
|
254 |
+
Analyze a translated transcript of a conversation that may contain multiple speakers and summarize the information in a structured intelligence document.
|
255 |
+
|
256 |
+
The input format will include word-level or sentence-level timestamps, each indicating the speaker ID, language, and translated text.
|
257 |
+
|
258 |
+
# Input Format Overview
|
259 |
+
|
260 |
+
Word-Level Timestamps Example:
|
261 |
+
```
|
262 |
+
[Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Word>"
|
263 |
+
```
|
264 |
+
Example:
|
265 |
+
```
|
266 |
+
0.01-0.02 - Speaker 1 - Language: English - Translated Text: "Proceed"
|
267 |
+
0.02-0.025 - Speaker 1 - Language: English - Translated Text: "with"
|
268 |
+
0.025-0.032 - Speaker 2 - Language: English - Translated Text: "caution"
|
269 |
+
```
|
270 |
+
|
271 |
+
Optional Sentence-Level Structure Example:
|
272 |
+
```
|
273 |
+
[Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Sentence>"
|
274 |
+
```
|
275 |
+
Example with Sentence Grouping:
|
276 |
+
```
|
277 |
+
0.01-0.05 - Speaker 1 - Language: English - Translated Text: "Proceed with caution."
|
278 |
+
0.06-0.12 - Speaker 2 - Language: English - Translated Text: "All systems are ready."
|
279 |
+
```
|
280 |
+
|
281 |
+
# Intelligence Summary Document Structure
|
282 |
+
|
283 |
+
Use the format below to create a structured summary for each conversation transcript received:
|
284 |
+
|
285 |
+
### 1. Top-Level Status & Assessment:
|
286 |
+
- **Threat Level Assessment**:
|
287 |
+
- Choose one:
|
288 |
+
- Completely Innocuous
|
289 |
+
- Likely Innocuous
|
290 |
+
- Unclear — Requires Investigation
|
291 |
+
- Likely Dangerous — Immediate Action
|
292 |
+
- Likely Dangerous — Delayed Action
|
293 |
+
- 100% Dangerous — Immediate Action
|
294 |
+
- 100% Dangerous — Delayed Action
|
295 |
+
- **Humanitarian Alert**: Identify any indications of distress, coercion, or need for assistance, such as signs of duress or requests for help.
|
296 |
+
|
297 |
+
### 2. Basic Metadata:
|
298 |
+
- **Number of Speakers**: Total and unique speakers detected.
|
299 |
+
- **Languages**: List of languages used, with indication of who spoke which language.
|
300 |
+
- **Location**: Actual or inferred locations of participants.
|
301 |
+
- **Communication Medium**: Identify the method of interaction (e.g., phone call, direct conversation).
|
302 |
+
|
303 |
+
### 3. Conversation Overview:
|
304 |
+
- **Summary**: Concise breakdown of the main points and context.
|
305 |
+
- **Alarming Keywords**: Identify any concerning words, including but not limited to keywords like "kill," "attack," "weapon," etc.
|
306 |
+
- **Suspicious or Cryptic Phrases**: Statements that appear coded or unclear in the context of the discussion.
|
307 |
+
|
308 |
+
### 4. In-Depth Analysis:
|
309 |
+
- **Network Connections**: Identify mentions of additional individuals or groups involved.
|
310 |
+
- **Intent & Emotional Tone Detection**: Analyze emotional cues (e.g., anger, fear, calmness, urgency). Identify signs of deception or tension.
|
311 |
+
- **Behavioral Patterns**: Highlight repeated themes, phrases, or signals of planning and coordination.
|
312 |
+
- **Code Words & Cryptic Language**: Detect terms that may indicate hidden or covert meaning.
|
313 |
+
- **Geolocation References**: Point out any inferences regarding regional language or place names.
|
314 |
+
- **Sentiment on Strategic Issues**: Identify any indication of radical, dissenting, or anti-national views that could imply unrest or extremism.
|
315 |
+
|
316 |
+
### 5. Resource Mentions & Operational Logistics:
|
317 |
+
- **Resource & Asset Mentions**: List any mention of tools, weapons, vehicles, or supply logistics.
|
318 |
+
- **Behavioral Deviations**: Identify shifts in tone, speech, or demeanor suggesting stress, coercion, urgency, or preparation.
|
319 |
+
|
320 |
+
### 6. Prioritization, Recommendations & Actionables:
|
321 |
+
- **High-Risk Alert Priority**: Identify whether the conversation should be flagged for further attention.
|
322 |
+
- **Recommended Actions**:
|
323 |
+
- **Surveillance**: Suggest surveillance if concerning patterns or keywords are detected.
|
324 |
+
- **Intervention**: Recommend intervention for urgent/high-risk cases.
|
325 |
+
- **Humanitarian Assistance**: Suggest immediate support for any signs of distress.
|
326 |
+
- **Follow-Up Analysis**: Identify statements that need deeper review for clarity or to understand potential hidden meanings.
|
327 |
+
|
328 |
+
# Steps
|
329 |
+
|
330 |
+
1. Analyze the input conversation for participant information and context.
|
331 |
+
2. Fill in each section of the Intelligence Summary Document structure.
|
332 |
+
3. Ensure all details, especially those related to potential risk factors or alerts, are captured and highlighted clearly.
|
333 |
+
|
334 |
+
# Output Format
|
335 |
+
|
336 |
+
Provide one structured Intelligence Summary Document for the conversation in either plain text format or structured JSON.
|
337 |
+
|
338 |
+
# JSON Format Example:
|
339 |
+
```json
|
340 |
+
{
|
341 |
+
"Top-Level Status & Assessment": {
|
342 |
+
"Threat Level Assessment": "Unclear - Requires Investigation",
|
343 |
+
"Humanitarian Alert": "No distress signals detected."
|
344 |
+
},
|
345 |
+
"Basic Metadata": {
|
346 |
+
"Number of Speakers": 2,
|
347 |
+
"Languages": {
|
348 |
+
"Speaker 1": "English",
|
349 |
+
"Speaker 2": "English"
|
350 |
+
},
|
351 |
+
"Location": "Unknown",
|
352 |
+
"Communication Medium": "Direct conversation"
|
353 |
+
},
|
354 |
+
"Conversation Overview": {
|
355 |
+
"Summary": "A cautious approach was suggested by Speaker 1, followed by an assurance from Speaker 2 that systems are ready.",
|
356 |
+
"Alarming Keywords": [],
|
357 |
+
"Suspicious or Cryptic Phrases": []
|
358 |
+
},
|
359 |
+
"In-Depth Analysis": {
|
360 |
+
"Network Connections": "None identified",
|
361 |
+
"Intent & Emotional Tone Detection": "Calm, precautionary tone",
|
362 |
+
"Behavioral Patterns": "Speaker 1 expressing concern, Speaker 2 providing assurance",
|
363 |
+
"Code Words & Cryptic Language": [],
|
364 |
+
"Geolocation References": [],
|
365 |
+
"Sentiment on Strategic Issues": "No radical or dissenting sentiment detected"
|
366 |
+
},
|
367 |
+
"Resource Mentions & Operational Logistics": {
|
368 |
+
"Resource & Asset Mentions": [],
|
369 |
+
"Behavioral Deviations": "None noted"
|
370 |
+
},
|
371 |
+
"Prioritization, Recommendations & Actionables": {
|
372 |
+
"High-Risk Alert Priority": "Low",
|
373 |
+
"Recommended Actions": {
|
374 |
+
"Surveillance": "No further surveillance needed.",
|
375 |
+
"Intervention": "Not required.",
|
376 |
+
"Humanitarian Assistance": "Not required.",
|
377 |
+
"Follow-Up Analysis": "No unusual phrases detected requiring review."
|
378 |
+
}
|
379 |
+
}
|
380 |
+
}
|
381 |
+
```
|
382 |
+
|
383 |
+
# Notes
|
384 |
+
|
385 |
+
- Ensure that you mark any ambiguous segments as requiring further investigation.
|
386 |
+
- Pay attention to emotional tone shifts or sudden changes in behavior.
|
387 |
+
- If any direct or implied threat is detected, prioritize appropriately using the provided classifications.
|
388 |
+
- Err on the side of caution. In case there is even a remote possibility that there might be something that required human attention, flag it.
|
389 |
+
|
390 |
+
"""}
|
391 |
{"role": "user", "content": f"Context: {''.join(item['translated'] for item in context if 'translated' in item)}\n\nQuestion: {question}"}
|
392 |
]
|
393 |
|