ASRfr

Paused

App Files Files Community

Kr08 commited on Nov 14, 2024

Commit

ef745e1

verified ·

1 Parent(s): f890d9b

Update app.py

Browse files

Files changed (1) hide show

app.py +276 -7

app.py CHANGED Viewed

@@ -94,13 +94,9 @@ def summarize_text(text):
         if summarizer is None:
             return "Summarization model could not be loaded."
         logger.info("Successfully loaded summarization Model")
-        logger.info(f"\n\n {text}\n")
-        data = json.loads(text)
-        translated_text = ''.join(item['translated'] for item in data if 'translated' in item)
-        # full_text = ''.join(item['translated'] for item in results if 'translated' in item)
-        logger.info(f"\n\nWorking on text:\n{full_text}")
-        summary = summarizer( full_text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
         return summary
     except Exception as e:
         logger.error(f"Summarization failed: {str(e)}")
@@ -118,7 +114,280 @@ def answer_question(context, question):
             return "Please enter your Question"
         messages = [
-            {"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
             {"role": "user", "content": f"Context: {''.join(item['translated'] for item in context if 'translated' in item)}\n\nQuestion: {question}"}
         ]

         if summarizer is None:
             return "Summarization model could not be loaded."
         logger.info("Successfully loaded summarization Model")
+        # logger.info(f"\n\n {text}\n")
+        summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
         return summary
     except Exception as e:
         logger.error(f"Summarization failed: {str(e)}")
             return "Please enter your Question"
         messages = [
+            # {"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
+            {"role":"system", "content": """
+            Analyze a translated transcript of a conversation that may contain multiple speakers and summarize the information in a structured intelligence document.
+            The input format will include word-level or sentence-level timestamps, each indicating the speaker ID, language, and translated text.
+            # Input Format Overview
+            Word-Level Timestamps Example:
+            ```
+            [Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Word>"
+            ```
+            Example:
+            ```
+            0.01-0.02 - Speaker 1 - Language: English - Translated Text: "Proceed"
+            0.02-0.025 - Speaker 1 - Language: English - Translated Text: "with"
+            0.025-0.032 - Speaker 2 - Language: English - Translated Text: "caution"
+            ```
+            Optional Sentence-Level Structure Example:
+            ```
+            [Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Sentence>"
+            ```
+            Example with Sentence Grouping:
+            ```
+            0.01-0.05 - Speaker 1 - Language: English - Translated Text: "Proceed with caution."
+            0.06-0.12 - Speaker 2 - Language: English - Translated Text: "All systems are ready."
+            ```
+            # Intelligence Summary Document Structure
+            Use the format below to create a structured summary for each conversation transcript received:
+            ### 1. Top-Level Status & Assessment:
+            - **Threat Level Assessment**:
+              - Choose one:
+                - Completely Innocuous
+                - Likely Innocuous
+                - Unclear — Requires Investigation
+                - Likely Dangerous — Immediate Action
+                - Likely Dangerous — Delayed Action
+                - 100% Dangerous — Immediate Action
+                - 100% Dangerous — Delayed Action
+            - **Humanitarian Alert**: Identify any indications of distress, coercion, or need for assistance, such as signs of duress or requests for help.
+            ### 2. Basic Metadata:
+            - **Number of Speakers**: Total and unique speakers detected.
+            - **Languages**: List of languages used, with indication of who spoke which language.
+            - **Location**: Actual or inferred locations of participants.
+            - **Communication Medium**: Identify the method of interaction (e.g., phone call, direct conversation).
+            ### 3. Conversation Overview:
+            - **Summary**: Concise breakdown of the main points and context.
+            - **Alarming Keywords**: Identify any concerning words, including but not limited to keywords like "kill," "attack," "weapon," etc.
+            - **Suspicious or Cryptic Phrases**: Statements that appear coded or unclear in the context of the discussion.
+            ### 4. In-Depth Analysis:
+            - **Network Connections**: Identify mentions of additional individuals or groups involved.
+            - **Intent & Emotional Tone Detection**: Analyze emotional cues (e.g., anger, fear, calmness, urgency). Identify signs of deception or tension.
+            - **Behavioral Patterns**: Highlight repeated themes, phrases, or signals of planning and coordination.
+            - **Code Words & Cryptic Language**: Detect terms that may indicate hidden or covert meaning.
+            - **Geolocation References**: Point out any inferences regarding regional language or place names.
+            - **Sentiment on Strategic Issues**: Identify any indication of radical, dissenting, or anti-national views that could imply unrest or extremism.
+            ### 5. Resource Mentions & Operational Logistics:
+            - **Resource & Asset Mentions**: List any mention of tools, weapons, vehicles, or supply logistics.
+            - **Behavioral Deviations**: Identify shifts in tone, speech, or demeanor suggesting stress, coercion, urgency, or preparation.
+            ### 6. Prioritization, Recommendations & Actionables:
+            - **High-Risk Alert Priority**: Identify whether the conversation should be flagged for further attention.
+            - **Recommended Actions**:
+              - **Surveillance**: Suggest surveillance if concerning patterns or keywords are detected.
+              - **Intervention**: Recommend intervention for urgent/high-risk cases.
+              - **Humanitarian Assistance**: Suggest immediate support for any signs of distress.
+              - **Follow-Up Analysis**: Identify statements that need deeper review for clarity or to understand potential hidden meanings.
+            # Steps
+            1. Analyze the input conversation for participant information and context.
+            2. Fill in each section of the Intelligence Summary Document structure.
+            3. Ensure all details, especially those related to potential risk factors or alerts, are captured and highlighted clearly.
+            # Output Format
+            Provide one structured Intelligence Summary Document for the conversation in either plain text format or structured JSON.
+            # JSON Format Example:
+            ```json
+            {
+              "Top-Level Status & Assessment": {
+                "Threat Level Assessment": "Unclear - Requires Investigation",
+                "Humanitarian Alert": "No distress signals detected."
+              },
+              "Basic Metadata": {
+                "Number of Speakers": 2,
+                "Languages": {
+                  "Speaker 1": "English",
+                  "Speaker 2": "English"
+                },
+                "Location": "Unknown",
+                "Communication Medium": "Direct conversation"
+              },
+              "Conversation Overview": {
+                "Summary": "A cautious approach was suggested by Speaker 1, followed by an assurance from Speaker 2 that systems are ready.",
+                "Alarming Keywords": [],
+                "Suspicious or Cryptic Phrases": []
+              },
+              "In-Depth Analysis": {
+                "Network Connections": "None identified",
+                "Intent & Emotional Tone Detection": "Calm, precautionary tone",
+                "Behavioral Patterns": "Speaker 1 expressing concern, Speaker 2 providing assurance",
+                "Code Words & Cryptic Language": [],
+                "Geolocation References": [],
+                "Sentiment on Strategic Issues": "No radical or dissenting sentiment detected"
+              },
+              "Resource Mentions & Operational Logistics": {
+                "Resource & Asset Mentions": [],
+                "Behavioral Deviations": "None noted"
+              },
+              "Prioritization, Recommendations & Actionables": {
+                "High-Risk Alert Priority": "Low",
+                "Recommended Actions": {
+                  "Surveillance": "No further surveillance needed.",
+                  "Intervention": "Not required.",
+                  "Humanitarian Assistance": "Not required.",
+                  "Follow-Up Analysis": "No unusual phrases detected requiring review."
+                }
+              }
+            }
+            ```
+            # Notes
+            - Ensure that you mark any ambiguous segments as requiring further investigation.
+            - Pay attention to emotional tone shifts or sudden changes in behavior.
+            - If any direct or implied threat is detected, prioritize appropriately using the provided classifications.
+            - Err on the side of caution. In case there is even a remote possibility that there might be something that required human attention, flag it.
+            Analyze a translated transcript of a conversation that may contain multiple speakers and summarize the information in a structured intelligence document.
+            The input format will include word-level or sentence-level timestamps, each indicating the speaker ID, language, and translated text.
+            # Input Format Overview
+            Word-Level Timestamps Example:
+            ```
+            [Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Word>"
+            ```
+            Example:
+            ```
+            0.01-0.02 - Speaker 1 - Language: English - Translated Text: "Proceed"
+            0.02-0.025 - Speaker 1 - Language: English - Translated Text: "with"
+            0.025-0.032 - Speaker 2 - Language: English - Translated Text: "caution"
+            ```
+            Optional Sentence-Level Structure Example:
+            ```
+            [Start Time - End Time] - Speaker <ID> - Language: <Translated Language> - Translated Text: "<Sentence>"
+            ```
+            Example with Sentence Grouping:
+            ```
+            0.01-0.05 - Speaker 1 - Language: English - Translated Text: "Proceed with caution."
+            0.06-0.12 - Speaker 2 - Language: English - Translated Text: "All systems are ready."
+            ```
+            # Intelligence Summary Document Structure
+            Use the format below to create a structured summary for each conversation transcript received:
+            ### 1. Top-Level Status & Assessment:
+            - **Threat Level Assessment**:
+              - Choose one:
+                - Completely Innocuous
+                - Likely Innocuous
+                - Unclear — Requires Investigation
+                - Likely Dangerous — Immediate Action
+                - Likely Dangerous — Delayed Action
+                - 100% Dangerous — Immediate Action
+                - 100% Dangerous — Delayed Action
+            - **Humanitarian Alert**: Identify any indications of distress, coercion, or need for assistance, such as signs of duress or requests for help.
+            ### 2. Basic Metadata:
+            - **Number of Speakers**: Total and unique speakers detected.
+            - **Languages**: List of languages used, with indication of who spoke which language.
+            - **Location**: Actual or inferred locations of participants.
+            - **Communication Medium**: Identify the method of interaction (e.g., phone call, direct conversation).
+            ### 3. Conversation Overview:
+            - **Summary**: Concise breakdown of the main points and context.
+            - **Alarming Keywords**: Identify any concerning words, including but not limited to keywords like "kill," "attack," "weapon," etc.
+            - **Suspicious or Cryptic Phrases**: Statements that appear coded or unclear in the context of the discussion.
+            ### 4. In-Depth Analysis:
+            - **Network Connections**: Identify mentions of additional individuals or groups involved.
+            - **Intent & Emotional Tone Detection**: Analyze emotional cues (e.g., anger, fear, calmness, urgency). Identify signs of deception or tension.
+            - **Behavioral Patterns**: Highlight repeated themes, phrases, or signals of planning and coordination.
+            - **Code Words & Cryptic Language**: Detect terms that may indicate hidden or covert meaning.
+            - **Geolocation References**: Point out any inferences regarding regional language or place names.
+            - **Sentiment on Strategic Issues**: Identify any indication of radical, dissenting, or anti-national views that could imply unrest or extremism.
+            ### 5. Resource Mentions & Operational Logistics:
+            - **Resource & Asset Mentions**: List any mention of tools, weapons, vehicles, or supply logistics.
+            - **Behavioral Deviations**: Identify shifts in tone, speech, or demeanor suggesting stress, coercion, urgency, or preparation.
+            ### 6. Prioritization, Recommendations & Actionables:
+            - **High-Risk Alert Priority**: Identify whether the conversation should be flagged for further attention.
+            - **Recommended Actions**:
+              - **Surveillance**: Suggest surveillance if concerning patterns or keywords are detected.
+              - **Intervention**: Recommend intervention for urgent/high-risk cases.
+              - **Humanitarian Assistance**: Suggest immediate support for any signs of distress.
+              - **Follow-Up Analysis**: Identify statements that need deeper review for clarity or to understand potential hidden meanings.
+            # Steps
+            1. Analyze the input conversation for participant information and context.
+            2. Fill in each section of the Intelligence Summary Document structure.
+            3. Ensure all details, especially those related to potential risk factors or alerts, are captured and highlighted clearly.
+            # Output Format
+            Provide one structured Intelligence Summary Document for the conversation in either plain text format or structured JSON.
+            # JSON Format Example:
+            ```json
+            {
+              "Top-Level Status & Assessment": {
+                "Threat Level Assessment": "Unclear - Requires Investigation",
+                "Humanitarian Alert": "No distress signals detected."
+              },
+              "Basic Metadata": {
+                "Number of Speakers": 2,
+                "Languages": {
+                  "Speaker 1": "English",
+                  "Speaker 2": "English"
+                },
+                "Location": "Unknown",
+                "Communication Medium": "Direct conversation"
+              },
+              "Conversation Overview": {
+                "Summary": "A cautious approach was suggested by Speaker 1, followed by an assurance from Speaker 2 that systems are ready.",
+                "Alarming Keywords": [],
+                "Suspicious or Cryptic Phrases": []
+              },
+              "In-Depth Analysis": {
+                "Network Connections": "None identified",
+                "Intent & Emotional Tone Detection": "Calm, precautionary tone",
+                "Behavioral Patterns": "Speaker 1 expressing concern, Speaker 2 providing assurance",
+                "Code Words & Cryptic Language": [],
+                "Geolocation References": [],
+                "Sentiment on Strategic Issues": "No radical or dissenting sentiment detected"
+              },
+              "Resource Mentions & Operational Logistics": {
+                "Resource & Asset Mentions": [],
+                "Behavioral Deviations": "None noted"
+              },
+              "Prioritization, Recommendations & Actionables": {
+                "High-Risk Alert Priority": "Low",
+                "Recommended Actions": {
+                  "Surveillance": "No further surveillance needed.",
+                  "Intervention": "Not required.",
+                  "Humanitarian Assistance": "Not required.",
+                  "Follow-Up Analysis": "No unusual phrases detected requiring review."
+                }
+              }
+            }
+            ```
+            # Notes
+            - Ensure that you mark any ambiguous segments as requiring further investigation.
+            - Pay attention to emotional tone shifts or sudden changes in behavior.
+            - If any direct or implied threat is detected, prioritize appropriately using the provided classifications.
+            - Err on the side of caution. In case there is even a remote possibility that there might be something that required human attention, flag it.
+            """}
             {"role": "user", "content": f"Context: {''.join(item['translated'] for item in context if 'translated' in item)}\n\nQuestion: {question}"}
         ]