dwb2023 commited on
Commit
c3af845
·
1 Parent(s): 5659037

initial update

Browse files
Files changed (13) hide show
  1. .gitignore +6 -0
  2. .python-version +1 -0
  3. README.md +37 -1
  4. app.py +0 -610
  5. main.py +41 -0
  6. src/__init__.py +0 -0
  7. src/analyzer.py +214 -0
  8. src/ontology.py +57 -0
  9. src/relationships.py +204 -0
  10. templates/results.html +80 -0
  11. ui/__init__.py +0 -0
  12. ui/format.py +106 -0
  13. ui/styles.py +102 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .venv/
2
+ .env/
3
+
4
+ # Exclude Python bytecode and cache directories
5
+ __pycache__/
6
+ *.pyc
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
README.md CHANGED
@@ -10,4 +10,40 @@ pinned: false
10
  license: cc-by-sa-4.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: cc-by-sa-4.0
11
  ---
12
 
13
+ # Requirements (requirements.txt)
14
+ gradio>=4.0.0
15
+ transformers>=4.36.0
16
+ torch>=2.0.0
17
+ protobuf>=4.25.1
18
+ aiohttp>=3.8.0
19
+ python-dateutil>=2.8.2
20
+ sqlite3>=3.35.0
21
+
22
+ # Project Structure
23
+ event_analysis/
24
+ ├── src/
25
+ │ ├── __init__.py
26
+ │ ├── analyzer.py
27
+ │ ├── ontology.py
28
+ │ └── relationships.py
29
+ ├── ui/
30
+ │ ├── __init__.py
31
+ │ ├── format.py
32
+ │ └── styles.py
33
+ ├── templates/
34
+ │ └── results.html
35
+ ├── main.py
36
+ └── requirements.txt
37
+
38
+ # Installation and Running
39
+ ```bash
40
+ # Create and activate virtual environment
41
+ python -m venv .venv
42
+ source .venv/bin/activate # or venv\Scripts\activate on Windows
43
+
44
+ # Install requirements
45
+ pip install -r requirements.txt
46
+
47
+ # Run the application
48
+ python main.py
49
+ ```
app.py DELETED
@@ -1,610 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import json
4
- from datetime import datetime
5
- import sqlite3
6
- import asyncio
7
- from concurrent.futures import ThreadPoolExecutor
8
- import re
9
-
10
- # Initialize NLP pipelines
11
- ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
12
- classifier = pipeline("zero-shot-classification")
13
-
14
- class OntologyRegistry:
15
- def __init__(self):
16
- self.temporal_patterns = [
17
- r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b',
18
- r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b',
19
- r'\btomorrow\b',
20
- r'\bin \d+ (?:days?|weeks?|months?)\b'
21
- ]
22
-
23
- self.location_patterns = [
24
- r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b',
25
- r'\b[A-Z][a-zA-Z]+ Base\b',
26
- r'\bHeadquarters\b',
27
- r'\bHQ\b'
28
- ]
29
-
30
- self.entity_types = {
31
- 'PER': 'person',
32
- 'ORG': 'organization',
33
- 'LOC': 'location',
34
- 'MISC': 'miscellaneous'
35
- }
36
-
37
- def validate_pattern(self, text, pattern_type):
38
- patterns = getattr(self, f"{pattern_type}_patterns", [])
39
- matches = []
40
- for pattern in patterns:
41
- matches.extend(re.finditer(pattern, text))
42
- return [m.group() for m in matches]
43
-
44
- class RelationshipEngine:
45
- def __init__(self, db_path=':memory:'):
46
- self.conn = sqlite3.connect(db_path, check_same_thread=False) # Add this flag
47
- self.setup_database()
48
-
49
- def setup_database(self):
50
- # Events table
51
- self.conn.execute('''
52
- CREATE TABLE IF NOT EXISTS events (
53
- id INTEGER PRIMARY KEY,
54
- text TEXT,
55
- timestamp DATETIME,
56
- confidence REAL
57
- )
58
- ''')
59
-
60
- # Entities table
61
- self.conn.execute('''
62
- CREATE TABLE IF NOT EXISTS entities (
63
- id INTEGER PRIMARY KEY,
64
- entity_text TEXT,
65
- entity_type TEXT, -- person, organization, location, hashtag, temporal
66
- first_seen DATETIME,
67
- last_seen DATETIME,
68
- frequency INTEGER DEFAULT 1,
69
- confidence REAL
70
- )
71
- ''')
72
-
73
- # Event-Entity relationships
74
- self.conn.execute('''
75
- CREATE TABLE IF NOT EXISTS event_entities (
76
- event_id INTEGER,
77
- entity_id INTEGER,
78
- FOREIGN KEY (event_id) REFERENCES events(id),
79
- FOREIGN KEY (entity_id) REFERENCES entities(id),
80
- PRIMARY KEY (event_id, entity_id)
81
- )
82
- ''')
83
-
84
- # Entity relationships (e.g., person-organization affiliations)
85
- self.conn.execute('''
86
- CREATE TABLE IF NOT EXISTS entity_relationships (
87
- id INTEGER PRIMARY KEY,
88
- source_entity_id INTEGER,
89
- target_entity_id INTEGER,
90
- relationship_type TEXT,
91
- confidence REAL,
92
- first_seen DATETIME,
93
- last_seen DATETIME,
94
- FOREIGN KEY (source_entity_id) REFERENCES entities(id),
95
- FOREIGN KEY (target_entity_id) REFERENCES entities(id)
96
- )
97
- ''')
98
-
99
- self.conn.commit()
100
-
101
- def store_entities(self, event_id, entities_dict):
102
- now = datetime.now().isoformat()
103
-
104
- for entity_type, entities in entities_dict.items():
105
- if not isinstance(entities, list):
106
- continue
107
-
108
- for entity_text in entities:
109
- # Check if entity exists
110
- cursor = self.conn.execute(
111
- 'SELECT id, frequency FROM entities WHERE entity_text = ? AND entity_type = ?',
112
- (entity_text, entity_type)
113
- )
114
- result = cursor.fetchone()
115
-
116
- if result:
117
- # Update existing entity
118
- entity_id, freq = result
119
- self.conn.execute('''
120
- UPDATE entities
121
- SET frequency = ?, last_seen = ?
122
- WHERE id = ?
123
- ''', (freq + 1, now, entity_id))
124
- else:
125
- # Insert new entity
126
- cursor = self.conn.execute('''
127
- INSERT INTO entities (entity_text, entity_type, first_seen, last_seen, confidence)
128
- VALUES (?, ?, ?, ?, ?)
129
- ''', (entity_text, entity_type, now, now, 1.0))
130
- entity_id = cursor.lastrowid
131
-
132
- # Create event-entity relationship
133
- self.conn.execute('''
134
- INSERT OR IGNORE INTO event_entities (event_id, entity_id)
135
- VALUES (?, ?)
136
- ''', (event_id, entity_id))
137
-
138
- self.conn.commit()
139
-
140
- def find_related_events(self, event_data):
141
- # Find events sharing entities
142
- entity_texts = []
143
- for entity_type, entities in event_data.get('entities', {}).items():
144
- if isinstance(entities, list):
145
- entity_texts.extend(entities)
146
-
147
- if not entity_texts:
148
- return []
149
-
150
- # Build query using entity relationships
151
- query = '''
152
- SELECT DISTINCT e.*, COUNT(ee.entity_id) as shared_entities
153
- FROM events e
154
- JOIN event_entities ee ON e.id = ee.event_id
155
- JOIN entities ent ON ee.entity_id = ent.id
156
- WHERE ent.entity_text IN ({})
157
- GROUP BY e.id
158
- ORDER BY shared_entities DESC, e.timestamp DESC
159
- LIMIT 5
160
- '''.format(','.join('?' * len(entity_texts)))
161
-
162
- cursor = self.conn.execute(query, entity_texts)
163
- return cursor.fetchall()
164
-
165
- def find_entity_relationships(self, entity_id):
166
- # Find direct relationships
167
- query = '''
168
- SELECT er.*,
169
- e1.entity_text as source_text, e1.entity_type as source_type,
170
- e2.entity_text as target_text, e2.entity_type as target_type
171
- FROM entity_relationships er
172
- JOIN entities e1 ON er.source_entity_id = e1.id
173
- JOIN entities e2 ON er.target_entity_id = e2.id
174
- WHERE er.source_entity_id = ? OR er.target_entity_id = ?
175
- '''
176
- cursor = self.conn.execute(query, (entity_id, entity_id))
177
- return cursor.fetchall()
178
-
179
- def update_entity_relationships(self, event_id):
180
- # Find all entities in the event
181
- query = '''
182
- SELECT e.id, e.entity_text, e.entity_type
183
- FROM entities e
184
- JOIN event_entities ee ON e.id = ee.entity_id
185
- WHERE ee.event_id = ?
186
- '''
187
- cursor = self.conn.execute(query, (event_id,))
188
- entities = cursor.fetchall()
189
-
190
- now = datetime.now().isoformat()
191
-
192
- # Create/update relationships between entities in same event
193
- for i, entity1 in enumerate(entities):
194
- for entity2 in entities[i+1:]:
195
- # Skip same entity type relationships
196
- if entity1[2] == entity2[2]:
197
- continue
198
-
199
- relationship_type = f"{entity1[2]}_to_{entity2[2]}"
200
-
201
- # Check if relationship exists
202
- cursor = self.conn.execute('''
203
- SELECT id FROM entity_relationships
204
- WHERE (source_entity_id = ? AND target_entity_id = ?)
205
- OR (source_entity_id = ? AND target_entity_id = ?)
206
- ''', (entity1[0], entity2[0], entity2[0], entity1[0]))
207
-
208
- result = cursor.fetchone()
209
- if result:
210
- # Update existing relationship
211
- self.conn.execute('''
212
- UPDATE entity_relationships
213
- SET last_seen = ?, confidence = confidence + 0.1
214
- WHERE id = ?
215
- ''', (now, result[0]))
216
- else:
217
- # Create new relationship
218
- self.conn.execute('''
219
- INSERT INTO entity_relationships
220
- (source_entity_id, target_entity_id, relationship_type, confidence, first_seen, last_seen)
221
- VALUES (?, ?, ?, ?, ?, ?)
222
- ''', (entity1[0], entity2[0], relationship_type, 0.5, now, now))
223
-
224
- self.conn.commit()
225
-
226
- class EventAnalyzer:
227
- def __init__(self):
228
- self.ontology = OntologyRegistry()
229
- self.relationship_engine = RelationshipEngine()
230
- self.executor = ThreadPoolExecutor(max_workers=3)
231
-
232
- async def extract_entities(self, text):
233
- def _extract():
234
- return ner_pipeline(text)
235
-
236
- # Run NER in thread pool
237
- ner_results = await asyncio.get_event_loop().run_in_executor(
238
- self.executor, _extract
239
- )
240
-
241
- entities = {
242
- "people": [],
243
- "organizations": [],
244
- "locations": [],
245
- "hashtags": [word for word in text.split() if word.startswith('#')]
246
- }
247
-
248
- for item in ner_results:
249
- if item["entity"].endswith("PER"):
250
- entities["people"].append(item["word"])
251
- elif item["entity"].endswith("ORG"):
252
- entities["organizations"].append(item["word"])
253
- elif item["entity"].endswith("LOC"):
254
- entities["locations"].append(item["word"])
255
-
256
- return entities
257
-
258
- def extract_temporal(self, text):
259
- return self.ontology.validate_pattern(text, 'temporal')
260
-
261
- async def extract_locations(self, text):
262
- entities = await self.extract_entities(text)
263
- ml_locations = entities.get('locations', [])
264
- pattern_locations = self.ontology.validate_pattern(text, 'location')
265
- return list(set(ml_locations + pattern_locations))
266
-
267
- def calculate_confidence(self, entities, temporal_data, related_events):
268
- # Base confidence from entity presence
269
- base_confidence = min(1.0, (
270
- 0.2 * bool(entities["people"]) +
271
- 0.2 * bool(entities["organizations"]) +
272
- 0.3 * bool(entities["locations"]) +
273
- 0.3 * bool(temporal_data)
274
- ))
275
-
276
- # Adjust confidence based on entity frequency
277
- entity_params = [
278
- *entities["people"],
279
- *entities["organizations"],
280
- *entities["locations"]
281
- ]
282
-
283
- cursor = self.relationship_engine.conn.execute(
284
- f'''
285
- SELECT AVG(frequency) as avg_freq
286
- FROM entities
287
- WHERE entity_text IN (
288
- SELECT DISTINCT entity_text
289
- FROM entities
290
- WHERE entity_text IN ({','.join(['?']*len(entity_params))})
291
- )
292
- ''',
293
- entity_params # Pass parameters here
294
- )
295
-
296
- avg_frequency = cursor.fetchone()[0] or 1
297
- frequency_boost = min(0.2, (avg_frequency - 1) * 0.05) # Max 0.2 boost for frequency
298
-
299
- # Adjust confidence based on relationships
300
- relationship_confidence = 0
301
- if related_events:
302
- relationship_scores = []
303
- for event in related_events:
304
- cursor = self.relationship_engine.conn.execute('''
305
- SELECT COUNT(*) as shared_entities
306
- FROM event_entities ee1
307
- JOIN event_entities ee2 ON ee1.entity_id = ee2.entity_id
308
- WHERE ee1.event_id = ? AND ee2.event_id = ?
309
- ''', (event[0], event[0])) # event[0] is the event_id
310
- shared_count = cursor.fetchone()[0]
311
- relationship_scores.append(min(0.3, shared_count * 0.1)) # Max 0.3 boost per relationship
312
-
313
- if relationship_scores:
314
- relationship_confidence = max(relationship_scores)
315
-
316
- final_confidence = min(1.0, base_confidence + frequency_boost + relationship_confidence)
317
- return final_confidence
318
-
319
- async def analyze_event(self, text):
320
- try:
321
- # Parallel extraction
322
- entities_future = self.extract_entities(text)
323
- temporal_data = self.extract_temporal(text)
324
- locations_future = self.extract_locations(text)
325
-
326
- # Gather async results
327
- entities, locations = await asyncio.gather(
328
- entities_future, locations_future
329
- )
330
-
331
- # Add temporal and locations to entities
332
- entities['locations'] = locations
333
- entities['temporal'] = temporal_data
334
-
335
- # Find related events
336
- related_events = self.relationship_engine.find_related_events({
337
- 'text': text,
338
- 'entities': entities
339
- })
340
-
341
- # Calculate confidence with enhanced logic
342
- confidence = self.calculate_confidence(entities, temporal_data, related_events)
343
-
344
- # Store event if confidence meets threshold
345
- cursor = None
346
- if confidence >= 0.6:
347
- cursor = self.relationship_engine.conn.execute(
348
- 'INSERT INTO events (text, timestamp, confidence) VALUES (?, ?, ?)',
349
- (text, datetime.now().isoformat(), confidence)
350
- )
351
- event_id = cursor.lastrowid
352
-
353
- # Store entities and their relationships
354
- self.relationship_engine.store_entities(event_id, {
355
- 'person': entities['people'],
356
- 'organization': entities['organizations'],
357
- 'location': entities['locations'],
358
- 'temporal': temporal_data,
359
- 'hashtag': entities['hashtags']
360
- })
361
-
362
- # Update entity relationships
363
- self.relationship_engine.update_entity_relationships(event_id)
364
-
365
- self.relationship_engine.conn.commit()
366
-
367
- # Get entity relationships for rich output
368
- entity_relationships = []
369
- if cursor and cursor.lastrowid:
370
- query = '''
371
- SELECT DISTINCT er.*,
372
- e1.entity_text as source_text, e1.entity_type as source_type,
373
- e2.entity_text as target_text, e2.entity_type as target_type
374
- FROM event_entities ee
375
- JOIN entity_relationships er ON ee.entity_id IN (er.source_entity_id, er.target_entity_id)
376
- JOIN entities e1 ON er.source_entity_id = e1.id
377
- JOIN entities e2 ON er.target_entity_id = e2.id
378
- WHERE ee.event_id = ?
379
- '''
380
- entity_relationships = self.relationship_engine.conn.execute(query, (cursor.lastrowid,)).fetchall()
381
-
382
- result = {
383
- "text": text,
384
- "entities": entities,
385
- "confidence": confidence,
386
- "verification_needed": confidence < 0.6,
387
- "related_events": [
388
- {
389
- "text": event[1],
390
- "timestamp": event[2],
391
- "confidence": event[3],
392
- "shared_entities": event[4] if len(event) > 4 else None
393
- }
394
- for event in related_events
395
- ],
396
- "entity_relationships": [
397
- {
398
- "type": rel[3],
399
- "source": rel[6],
400
- "target": rel[8],
401
- "confidence": rel[4]
402
- }
403
- for rel in entity_relationships
404
- ] if entity_relationships else []
405
- }
406
-
407
- return result
408
-
409
- except Exception as e:
410
- return {"error": str(e)}
411
-
412
- def get_entity_statistics(self):
413
- """Get statistics about stored entities and relationships"""
414
- stats = {}
415
-
416
- # Entity counts by type
417
- cursor = self.relationship_engine.conn.execute('''
418
- SELECT entity_type, COUNT(*) as count, AVG(frequency) as avg_frequency
419
- FROM entities
420
- GROUP BY entity_type
421
- ''')
422
- stats['entity_counts'] = cursor.fetchall()
423
-
424
- # Most frequent entities
425
- cursor = self.relationship_engine.conn.execute('''
426
- SELECT entity_text, entity_type, frequency
427
- FROM entities
428
- ORDER BY frequency DESC
429
- LIMIT 10
430
- ''')
431
- stats['frequent_entities'] = cursor.fetchall()
432
-
433
- # Relationship statistics
434
- cursor = self.relationship_engine.conn.execute('''
435
- SELECT relationship_type, COUNT(*) as count, AVG(confidence) as avg_confidence
436
- FROM entity_relationships
437
- GROUP BY relationship_type
438
- ''')
439
- stats['relationship_stats'] = cursor.fetchall()
440
-
441
- return stats
442
-
443
- # Initialize analyzer
444
- analyzer = EventAnalyzer()
445
-
446
- # Custom CSS for UI
447
- css = """
448
- .container { max-width: 1200px; margin: auto; padding: 20px; }
449
- .results { padding: 20px; border: 1px solid #ddd; border-radius: 8px; margin-top: 20px; }
450
- .confidence-high { color: #22c55e; font-weight: bold; }
451
- .confidence-low { color: #f97316; font-weight: bold; }
452
- .entity-section { margin: 15px 0; }
453
- .alert-warning { background: #fff3cd; padding: 10px; border-radius: 5px; margin: 10px 0; }
454
- .alert-success { background: #d1fae5; padding: 10px; border-radius: 5px; margin: 10px 0; }
455
- .related-events { background: #f3f4f6; padding: 15px; border-radius: 5px; margin-top: 15px; }
456
- """
457
-
458
- def format_results(analysis_result):
459
- if "error" in analysis_result:
460
- return f"<div style='color: red'>Error: {analysis_result['error']}</div>"
461
-
462
- confidence_class = "confidence-high" if analysis_result["confidence"] >= 0.6 else "confidence-low"
463
-
464
- html = f"""
465
- <div class="results">
466
- <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
467
- <h3 style="margin: 0;">Analysis Results</h3>
468
- <div>
469
- Confidence Score: <span class="{confidence_class}">{int(analysis_result['confidence'] * 100)}%</span>
470
- </div>
471
- </div>
472
-
473
- {f'''
474
- <div class="alert-warning">
475
- &#9888; <strong>Verification Required:</strong> Low confidence score detected. Please verify the extracted information.
476
- </div>
477
- ''' if analysis_result["verification_needed"] else ''}
478
-
479
- <div class="grid grid-cols-2 gap-4">
480
- <div class="space-y-4">
481
- <div class="entity-section">
482
- <h4>People Detected</h4>
483
- <ul>{''.join(f'<li>{person}</li>' for person in analysis_result['entities']['people']) or '<li>None detected</li>'}</ul>
484
- </div>
485
-
486
- <div class="entity-section">
487
- <h4>Organizations</h4>
488
- <ul>{''.join(f'<li>{org}</li>' for org in analysis_result['entities']['organizations']) or '<li>None detected</li>'}</ul>
489
- </div>
490
-
491
- <div class="entity-section">
492
- <h4>Locations</h4>
493
- <ul>{''.join(f'<li>{loc}</li>' for loc in analysis_result['entities']['locations']) or '<li>None detected</li>'}</ul>
494
- </div>
495
- </div>
496
-
497
- <div class="space-y-4">
498
- <div class="entity-section">
499
- <h4>Temporal References</h4>
500
- <ul>{''.join(f'<li>{time}</li>' for time in analysis_result['entities']['temporal']) or '<li>None detected</li>'}</ul>
501
- </div>
502
-
503
- <div class="entity-section">
504
- <h4>Hashtags</h4>
505
- <ul>{''.join(f'<li>{tag}</li>' for tag in analysis_result['entities']['hashtags']) or '<li>None detected</li>'}</ul>
506
- </div>
507
-
508
- {f'''
509
- <div class="entity-section">
510
- <h4>Entity Relationships</h4>
511
- <ul>
512
- {''.join(f"""
513
- <li class="mb-2">
514
- <strong>{rel['source']}</strong> &rarr;
515
- <span class="text-blue-600">{rel['type'].replace('_to_', ' to ')}</span> &rarr;
516
- <strong>{rel['target']}</strong>
517
- <br/>
518
- <small class="text-gray-600">Confidence: {int(rel['confidence'] * 100)}%</small>
519
- </li>
520
- """ for rel in analysis_result['entity_relationships'])}
521
- </ul>
522
- </div>
523
- ''' if analysis_result.get('entity_relationships') else ''}
524
- </div>
525
- </div>
526
-
527
- {f'''
528
- <div class="alert-success mt-4">
529
- &#9989; <strong>Event Validated:</strong> The extracted information meets confidence thresholds.
530
- </div>
531
- ''' if not analysis_result["verification_needed"] else ''}
532
-
533
- {f'''
534
- <div class="related-events">
535
- <h4>Related Events</h4>
536
- <ul>
537
- {''.join(f"""
538
- <li class="mb-2">
539
- <div class="flex justify-between items-center">
540
- <div>{event["text"]}</div>
541
- <div class="text-sm text-gray-600">
542
- {event["timestamp"]} |
543
- Confidence: {int(event["confidence"] * 100)}%
544
- {f' | Shared Entities: {event["shared_entities"]}' if event.get("shared_entities") else ''}
545
- </div>
546
- </div>
547
- </li>
548
- """ for event in analysis_result['related_events'])}
549
- </ul>
550
- </div>
551
- ''' if analysis_result.get('related_events') else ''}
552
-
553
- <div class="entity-stats mt-4 p-4 bg-gray-50 rounded-lg">
554
- <h4 class="mb-2">Analysis Metrics</h4>
555
- <div class="grid grid-cols-3 gap-4 text-sm">
556
- <div>
557
- <strong>Confidence Breakdown:</strong>
558
- <ul class="mt-1">
559
- <li>Base Confidence: {int(analysis_result['confidence'] * 70)}%</li>
560
- <li>Entity Boost: {int((analysis_result['confidence'] - 0.7 if analysis_result['confidence'] > 0.7 else 0) * 100)}%</li>
561
- </ul>
562
- </div>
563
- <div>
564
- <strong>Entity Coverage:</strong>
565
- <ul class="mt-1">
566
- <li>Types Detected: {len([t for t in ['people', 'organizations', 'locations', 'temporal', 'hashtags'] if analysis_result['entities'].get(t)])}</li>
567
- <li>Total Entities: {sum(len(e) for e in analysis_result['entities'].values() if isinstance(e, list))}</li>
568
- </ul>
569
- </div>
570
- <div>
571
- <strong>Relationships:</strong>
572
- <ul class="mt-1">
573
- <li>Direct: {len(analysis_result.get('entity_relationships', []))}</li>
574
- <li>Related Events: {len(analysis_result.get('related_events', []))}</li>
575
- </ul>
576
- </div>
577
- </div>
578
- </div>
579
- </div>
580
- """
581
- return html
582
-
583
- # Modified to properly handle async
584
- async def process_input(text):
585
- result = await analyzer.analyze_event(text)
586
- return format_results(result)
587
-
588
- demo = gr.Interface(
589
- fn=process_input,
590
- inputs=[
591
- gr.Textbox(
592
- label="Event Text",
593
- placeholder="Enter text to analyze (e.g., 'John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech')",
594
- lines=3
595
- )
596
- ],
597
- outputs=gr.HTML(),
598
- title="ToY Event Analysis System",
599
- description="Analyze text to extract entities, assess confidence, and identify key event information with relationship tracking.",
600
- css=css,
601
- theme=gr.themes.Soft(),
602
- examples=[
603
- ["John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech"],
604
- ["Sarah Johnson and Mike Smith from Defense Systems Inc. are conducting training in Norfolk, VA on June 15th #defense #training"],
605
- ["Team meeting at headquarters with @commander_smith at 0900 #briefing"]
606
- ]
607
- )
608
-
609
- if __name__ == "__main__":
610
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ import gradio as gr
3
+ from src.analyzer import EventAnalyzer
4
+ from ui.format import ResultFormatter
5
+ from ui.styles import css
6
+
7
+ # Initialize analyzer
8
+ analyzer = EventAnalyzer()
9
+
10
+ async def process_input(text: str) -> str:
11
+ """Process input text and return formatted HTML results."""
12
+ result = await analyzer.analyze_event(text)
13
+ return ResultFormatter.format_results(result)
14
+
15
+ # Define example inputs
16
+ EXAMPLES = [
17
+ ["John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech"],
18
+ ["Sarah Johnson and Mike Smith from Defense Systems Inc. are conducting training in Norfolk, VA on June 15th #defense #training"],
19
+ ["Team meeting at headquarters with @commander_smith at 0900 #briefing"]
20
+ ]
21
+
22
+ # Create Gradio interface
23
+ demo = gr.Interface(
24
+ fn=process_input,
25
+ inputs=[
26
+ gr.Textbox(
27
+ label="Event Text",
28
+ placeholder="Enter text to analyze (e.g., 'John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech')",
29
+ lines=3
30
+ )
31
+ ],
32
+ outputs=gr.HTML(),
33
+ title="Event Analysis System",
34
+ description="Analyze text to extract entities, assess confidence, and identify key event information with relationship tracking.",
35
+ css=css,
36
+ theme=gr.themes.Soft(),
37
+ examples=EXAMPLES
38
+ )
39
+
40
+ if __name__ == "__main__":
41
+ demo.launch()
src/__init__.py ADDED
File without changes
src/analyzer.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/analyzer.py
2
+ from typing import Dict, List, Any, Optional, Union
3
+ import asyncio
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from transformers import pipeline
6
+ from datetime import datetime
7
+
8
+ from .ontology import OntologyRegistry
9
+ from .relationships import RelationshipEngine
10
+
11
+ class EventAnalyzer:
12
+ """Main analyzer class for event processing."""
13
+
14
+ def __init__(self) -> None:
15
+ """Initialize the event analyzer with required components."""
16
+ self.ontology = OntologyRegistry()
17
+ self.relationship_engine = RelationshipEngine()
18
+ self.executor = ThreadPoolExecutor(max_workers=3)
19
+
20
+ # Initialize NLP pipelines
21
+ self.ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
22
+ self.classifier = pipeline("zero-shot-classification")
23
+
24
+ async def extract_entities(self, text: str) -> Dict[str, List[str]]:
25
+ """Extract entities from text using NER pipeline."""
26
+ def _extract():
27
+ return self.ner_pipeline(text)
28
+
29
+ ner_results = await asyncio.get_event_loop().run_in_executor(
30
+ self.executor, _extract
31
+ )
32
+
33
+ entities = {
34
+ "people": [],
35
+ "organizations": [],
36
+ "locations": [],
37
+ "hashtags": [word for word in text.split() if word.startswith('#')]
38
+ }
39
+
40
+ for item in ner_results:
41
+ if item["entity"].endswith("PER"):
42
+ entities["people"].append(item["word"])
43
+ elif item["entity"].endswith("ORG"):
44
+ entities["organizations"].append(item["word"])
45
+ elif item["entity"].endswith("LOC"):
46
+ entities["locations"].append(item["word"])
47
+
48
+ return entities
49
+
50
+ def extract_temporal(self, text: str) -> List[str]:
51
+ """Extract temporal expressions from text."""
52
+ return self.ontology.validate_pattern(text, 'temporal')
53
+
54
+ async def extract_locations(self, text: str) -> List[str]:
55
+ """Extract locations using both NER and pattern matching."""
56
+ entities = await self.extract_entities(text)
57
+ ml_locations = entities.get('locations', [])
58
+ pattern_locations = self.ontology.validate_pattern(text, 'location')
59
+ return list(set(ml_locations + pattern_locations))
60
+
61
+ def calculate_confidence(self,
62
+ entities: Dict[str, List[str]],
63
+ temporal_data: List[str],
64
+ related_events: List[Any]) -> float:
65
+ """Calculate confidence score for extracted information."""
66
+ # Base confidence from entity presence
67
+ base_confidence = min(1.0, (
68
+ 0.2 * bool(entities["people"]) +
69
+ 0.2 * bool(entities["organizations"]) +
70
+ 0.3 * bool(entities["locations"]) +
71
+ 0.3 * bool(temporal_data)
72
+ ))
73
+
74
+ # Get entity parameters for frequency calculation
75
+ entity_params = [
76
+ *entities["people"],
77
+ *entities["organizations"],
78
+ *entities["locations"]
79
+ ]
80
+
81
+ if not entity_params:
82
+ return base_confidence
83
+
84
+ # Calculate entity frequency boost
85
+ query = f'''
86
+ SELECT AVG(frequency) as avg_freq
87
+ FROM entities
88
+ WHERE entity_text IN ({','.join(['?']*len(entity_params))})
89
+ '''
90
+ cursor = self.relationship_engine.conn.execute(query, entity_params)
91
+ avg_frequency = cursor.fetchone()[0] or 1
92
+ frequency_boost = min(0.2, (avg_frequency - 1) * 0.05)
93
+
94
+ # Calculate relationship confidence boost
95
+ relationship_confidence = 0
96
+ if related_events:
97
+ relationship_scores = []
98
+ for event in related_events:
99
+ cursor = self.relationship_engine.conn.execute('''
100
+ SELECT COUNT(*) as shared_entities
101
+ FROM event_entities ee1
102
+ JOIN event_entities ee2 ON ee1.entity_id = ee2.entity_id
103
+ WHERE ee1.event_id = ? AND ee2.event_id = ?
104
+ ''', (event[0], event[0]))
105
+ shared_count = cursor.fetchone()[0]
106
+ relationship_scores.append(min(0.3, shared_count * 0.1))
107
+
108
+ if relationship_scores:
109
+ relationship_confidence = max(relationship_scores)
110
+
111
+ return min(1.0, base_confidence + frequency_boost + relationship_confidence)
112
+
113
+ async def analyze_event(self, text: str) -> Dict[str, Any]:
114
+ """Analyze event text and extract structured information."""
115
+ try:
116
+ # Parallel extraction
117
+ entities_future = self.extract_entities(text)
118
+ temporal_data = self.extract_temporal(text)
119
+ locations_future = self.extract_locations(text)
120
+
121
+ # Gather async results
122
+ entities, locations = await asyncio.gather(
123
+ entities_future, locations_future
124
+ )
125
+
126
+ # Merge locations and add temporal data
127
+ entities['locations'] = locations
128
+ entities['temporal'] = temporal_data
129
+
130
+ # Find related events
131
+ related_events = self.relationship_engine.find_related_events({
132
+ 'text': text,
133
+ 'entities': entities
134
+ })
135
+
136
+ # Calculate confidence
137
+ confidence = self.calculate_confidence(entities, temporal_data, related_events)
138
+
139
+ # Store event if confidence meets threshold
140
+ cursor = None
141
+ if confidence >= 0.6:
142
+ cursor = self.relationship_engine.conn.execute(
143
+ 'INSERT INTO events (text, timestamp, confidence) VALUES (?, ?, ?)',
144
+ (text, datetime.now().isoformat(), confidence)
145
+ )
146
+ event_id = cursor.lastrowid
147
+
148
+ # Store entities and update relationships
149
+ self.relationship_engine.store_entities(event_id, {
150
+ 'person': entities['people'],
151
+ 'organization': entities['organizations'],
152
+ 'location': entities['locations'],
153
+ 'temporal': temporal_data,
154
+ 'hashtag': entities['hashtags']
155
+ })
156
+
157
+ self.relationship_engine.update_entity_relationships(event_id)
158
+ self.relationship_engine.conn.commit()
159
+
160
+ # Get entity relationships for output
161
+ entity_relationships = []
162
+ if cursor and cursor.lastrowid:
163
+ entity_relationships = self.relationship_engine.get_entity_relationships(cursor.lastrowid)
164
+
165
+ return {
166
+ "text": text,
167
+ "entities": entities,
168
+ "confidence": confidence,
169
+ "verification_needed": confidence < 0.6,
170
+ "related_events": [
171
+ {
172
+ "text": event[1],
173
+ "timestamp": event[2],
174
+ "confidence": event[3],
175
+ "shared_entities": event[4] if len(event) > 4 else None
176
+ }
177
+ for event in related_events
178
+ ],
179
+ "entity_relationships": entity_relationships
180
+ }
181
+
182
+ except Exception as e:
183
+ return {"error": str(e)}
184
+
185
+ def get_entity_statistics(self) -> Dict[str, List[tuple]]:
186
+ """Get statistics about stored entities and relationships."""
187
+ stats = {}
188
+
189
+ # Entity counts by type
190
+ cursor = self.relationship_engine.conn.execute('''
191
+ SELECT entity_type, COUNT(*) as count, AVG(frequency) as avg_frequency
192
+ FROM entities
193
+ GROUP BY entity_type
194
+ ''')
195
+ stats['entity_counts'] = cursor.fetchall()
196
+
197
+ # Most frequent entities
198
+ cursor = self.relationship_engine.conn.execute('''
199
+ SELECT entity_text, entity_type, frequency
200
+ FROM entities
201
+ ORDER BY frequency DESC
202
+ LIMIT 10
203
+ ''')
204
+ stats['frequent_entities'] = cursor.fetchall()
205
+
206
+ # Relationship statistics
207
+ cursor = self.relationship_engine.conn.execute('''
208
+ SELECT relationship_type, COUNT(*) as count, AVG(confidence) as avg_confidence
209
+ FROM entity_relationships
210
+ GROUP BY relationship_type
211
+ ''')
212
+ stats['relationship_stats'] = cursor.fetchall()
213
+
214
+ return stats
src/ontology.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/ontology.py
2
+ from typing import List, Dict, Pattern
3
+ import re
4
+
5
+ class OntologyRegistry:
6
+ """Registry for pattern matching and entity validation."""
7
+
8
+ def __init__(self) -> None:
9
+ self.temporal_patterns: List[str] = [
10
+ r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b',
11
+ r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b',
12
+ r'\btomorrow\b',
13
+ r'\bin \d+ (?:days?|weeks?|months?)\b'
14
+ ]
15
+
16
+ self.location_patterns: List[str] = [
17
+ r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b',
18
+ r'\b[A-Z][a-zA-Z]+ Base\b',
19
+ r'\bHeadquarters\b',
20
+ r'\bHQ\b'
21
+ ]
22
+
23
+ self.entity_types: Dict[str, str] = {
24
+ 'PER': 'person',
25
+ 'ORG': 'organization',
26
+ 'LOC': 'location',
27
+ 'MISC': 'miscellaneous'
28
+ }
29
+
30
+ # Compile patterns for better performance
31
+ self._compiled_patterns: Dict[str, List[Pattern]] = {
32
+ 'temporal': [re.compile(p) for p in self.temporal_patterns],
33
+ 'location': [re.compile(p) for p in self.location_patterns]
34
+ }
35
+
36
+ def validate_pattern(self, text: str, pattern_type: str) -> List[str]:
37
+ """
38
+ Validate text against specified pattern type.
39
+
40
+ Args:
41
+ text: Input text to validate
42
+ pattern_type: Type of pattern to match ('temporal' or 'location')
43
+
44
+ Returns:
45
+ List of matched strings
46
+ """
47
+ matches = []
48
+ patterns = self._compiled_patterns.get(pattern_type, [])
49
+
50
+ for pattern in patterns:
51
+ matches.extend(match.group() for match in pattern.finditer(text))
52
+
53
+ return matches
54
+
55
+ def get_entity_type(self, ner_type: str) -> str:
56
+ """Map NER entity type to ontology type."""
57
+ return self.entity_types.get(ner_type, 'miscellaneous')
src/relationships.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/relationships.py
2
+ from typing import Dict, List, Tuple, Optional, Any
3
+ import sqlite3
4
+ from datetime import datetime
5
+ from dataclasses import dataclass
6
+
7
+ @dataclass
8
+ class Entity:
9
+ """Entity data structure."""
10
+ id: Optional[int]
11
+ text: str
12
+ type: str
13
+ first_seen: str
14
+ last_seen: str
15
+ frequency: int
16
+ confidence: float
17
+
18
+ @dataclass
19
+ class Relationship:
20
+ """Relationship data structure."""
21
+ id: Optional[int]
22
+ source_id: int
23
+ target_id: int
24
+ type: str
25
+ confidence: float
26
+ first_seen: str
27
+ last_seen: str
28
+
29
+ class RelationshipEngine:
30
+ """Engine for managing entity and event relationships."""
31
+
32
+ def __init__(self, db_path: str = ':memory:') -> None:
33
+ """Initialize the relationship engine with database connection."""
34
+ self.conn = sqlite3.connect(db_path, check_same_thread=False)
35
+ self.setup_database()
36
+
37
+ def setup_database(self) -> None:
38
+ """Initialize database schema."""
39
+ self.conn.executescript('''
40
+ CREATE TABLE IF NOT EXISTS events (
41
+ id INTEGER PRIMARY KEY,
42
+ text TEXT,
43
+ timestamp DATETIME,
44
+ confidence REAL
45
+ );
46
+
47
+ CREATE TABLE IF NOT EXISTS entities (
48
+ id INTEGER PRIMARY KEY,
49
+ entity_text TEXT,
50
+ entity_type TEXT,
51
+ first_seen DATETIME,
52
+ last_seen DATETIME,
53
+ frequency INTEGER DEFAULT 1,
54
+ confidence REAL
55
+ );
56
+
57
+ CREATE TABLE IF NOT EXISTS event_entities (
58
+ event_id INTEGER,
59
+ entity_id INTEGER,
60
+ FOREIGN KEY (event_id) REFERENCES events(id),
61
+ FOREIGN KEY (entity_id) REFERENCES entities(id),
62
+ PRIMARY KEY (event_id, entity_id)
63
+ );
64
+
65
+ CREATE TABLE IF NOT EXISTS entity_relationships (
66
+ id INTEGER PRIMARY KEY,
67
+ source_entity_id INTEGER,
68
+ target_entity_id INTEGER,
69
+ relationship_type TEXT,
70
+ confidence REAL,
71
+ first_seen DATETIME,
72
+ last_seen DATETIME,
73
+ FOREIGN KEY (source_entity_id) REFERENCES entities(id),
74
+ FOREIGN KEY (target_entity_id) REFERENCES entities(id)
75
+ );
76
+
77
+ CREATE INDEX IF NOT EXISTS idx_entity_text
78
+ ON entities(entity_text, entity_type);
79
+
80
+ CREATE INDEX IF NOT EXISTS idx_event_entities
81
+ ON event_entities(event_id, entity_id);
82
+
83
+ CREATE INDEX IF NOT EXISTS idx_entity_relationships
84
+ ON entity_relationships(source_entity_id, target_entity_id);
85
+ ''')
86
+ self.conn.commit()
87
+
88
+ def store_entities(self, event_id: int, entities_dict: Dict[str, List[str]]) -> None:
89
+ """Store or update entities and their relationships to events."""
90
+ now = datetime.now().isoformat()
91
+
92
+ for entity_type, entities in entities_dict.items():
93
+ if not isinstance(entities, list):
94
+ continue
95
+
96
+ for entity_text in entities:
97
+ # Check if entity exists
98
+ cursor = self.conn.execute(
99
+ 'SELECT id, frequency FROM entities WHERE entity_text = ? AND entity_type = ?',
100
+ (entity_text, entity_type)
101
+ )
102
+ result = cursor.fetchone()
103
+
104
+ if result:
105
+ entity_id, freq = result
106
+ self.conn.execute('''
107
+ UPDATE entities
108
+ SET frequency = ?, last_seen = ?
109
+ WHERE id = ?
110
+ ''', (freq + 1, now, entity_id))
111
+ else:
112
+ cursor = self.conn.execute('''
113
+ INSERT INTO entities
114
+ (entity_text, entity_type, first_seen, last_seen, confidence)
115
+ VALUES (?, ?, ?, ?, ?)
116
+ ''', (entity_text, entity_type, now, now, 1.0))
117
+ entity_id = cursor.lastrowid
118
+
119
+ self.conn.execute('''
120
+ INSERT OR IGNORE INTO event_entities (event_id, entity_id)
121
+ VALUES (?, ?)
122
+ ''', (event_id, entity_id))
123
+
124
+ self.conn.commit()
125
+
126
+ def find_related_events(self, event_data: Dict) -> List[Tuple]:
127
+ """Find events related through shared entities."""
128
+ entity_texts = []
129
+ for entity_type, entities in event_data.get('entities', {}).items():
130
+ if isinstance(entities, list):
131
+ entity_texts.extend(entities)
132
+
133
+ if not entity_texts:
134
+ return []
135
+
136
+ placeholders = ','.join('?' * len(entity_texts))
137
+ query = f'''
138
+ SELECT DISTINCT e.*, COUNT(ee.entity_id) as shared_entities
139
+ FROM events e
140
+ JOIN event_entities ee ON e.id = ee.event_id
141
+ JOIN entities ent ON ee.entity_id = ent.id
142
+ WHERE ent.entity_text IN ({placeholders})
143
+ GROUP BY e.id
144
+ ORDER BY shared_entities DESC, e.timestamp DESC
145
+ LIMIT 5
146
+ '''
147
+
148
+ return self.conn.execute(query, entity_texts).fetchall()
149
+
150
+ def update_entity_relationships(self, event_id: int) -> None:
151
+ """Update relationships between entities in an event."""
152
+ entities = self.conn.execute('''
153
+ SELECT e.id, e.entity_text, e.entity_type
154
+ FROM entities e
155
+ JOIN event_entities ee ON e.id = ee.entity_id
156
+ WHERE ee.event_id = ?
157
+ ''', (event_id,)).fetchall()
158
+
159
+ now = datetime.now().isoformat()
160
+
161
+ for i, entity1 in enumerate(entities):
162
+ for entity2 in entities[i+1:]:
163
+ if entity1[2] == entity2[2]:
164
+ continue
165
+
166
+ relationship_type = f"{entity1[2]}_to_{entity2[2]}"
167
+ self._update_relationship(entity1[0], entity2[0], relationship_type, now)
168
+
169
+ self.conn.commit()
170
+
171
+ def _update_relationship(self, source_id: int, target_id: int, rel_type: str, timestamp: str) -> None:
172
+ """Update or create a relationship between entities."""
173
+ result = self.conn.execute('''
174
+ SELECT id FROM entity_relationships
175
+ WHERE (source_entity_id = ? AND target_entity_id = ?)
176
+ OR (source_entity_id = ? AND target_entity_id = ?)
177
+ ''', (source_id, target_id, target_id, source_id)).fetchone()
178
+
179
+ if result:
180
+ self.conn.execute('''
181
+ UPDATE entity_relationships
182
+ SET last_seen = ?, confidence = confidence + 0.1
183
+ WHERE id = ?
184
+ ''', (timestamp, result[0]))
185
+ else:
186
+ self.conn.execute('''
187
+ INSERT INTO entity_relationships
188
+ (source_entity_id, target_entity_id, relationship_type, confidence, first_seen, last_seen)
189
+ VALUES (?, ?, ?, ?, ?, ?)
190
+ ''', (source_id, target_id, rel_type, 0.5, timestamp, timestamp))
191
+
192
+ def get_entity_relationships(self, event_id: int) -> List[Dict[str, Any]]:
193
+ """Get all relationships for entities in an event."""
194
+ query = '''
195
+ SELECT DISTINCT er.*,
196
+ e1.entity_text as source_text, e1.entity_type as source_type,
197
+ e2.entity_text as target_text, e2.entity_type as target_type
198
+ FROM event_entities ee
199
+ JOIN entity_relationships er ON ee.entity_id IN (er.source_entity_id, er.target_entity_id)
200
+ JOIN entities e1 ON er.source_entity_id = e1.id
201
+ JOIN entities e2 ON er.target_entity_id = e2.id
202
+ WHERE ee.event_id = ?
203
+ '''
204
+ return [dict(row) for row in self.conn.execute(query, (event_id,)).fetchall()]
templates/results.html ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="results">
2
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
3
+ <h3 style="margin: 0;">Analysis Results</h3>
4
+ <div>
5
+ Confidence Score: <span class="$confidence_class">$confidence_score%</span>
6
+ </div>
7
+ </div>
8
+
9
+ $verification_warning
10
+
11
+ <div class="grid grid-cols-2 gap-4">
12
+ <div class="space-y-4">
13
+ <div class="entity-section">
14
+ <h4>People Detected</h4>
15
+ <ul>$people_list</ul>
16
+ </div>
17
+
18
+ <div class="entity-section">
19
+ <h4>Organizations</h4>
20
+ <ul>$org_list</ul>
21
+ </div>
22
+
23
+ <div class="entity-section">
24
+ <h4>Locations</h4>
25
+ <ul>$location_list</ul>
26
+ </div>
27
+ </div>
28
+
29
+ <div class="space-y-4">
30
+ <div class="entity-section">
31
+ <h4>Temporal References</h4>
32
+ <ul>$temporal_list</ul>
33
+ </div>
34
+
35
+ <div class="entity-section">
36
+ <h4>Hashtags</h4>
37
+ <ul>$hashtag_list</ul>
38
+ </div>
39
+
40
+ <div class="entity-section">
41
+ <h4>Entity Relationships</h4>
42
+ <ul>$entity_relationships</ul>
43
+ </div>
44
+ </div>
45
+ </div>
46
+
47
+ $validation_success
48
+
49
+ <div class="related-events">
50
+ <h4>Related Events</h4>
51
+ <ul>$related_events</ul>
52
+ </div>
53
+
54
+ <div class="entity-stats mt-4 p-4 bg-gray-50 rounded-lg">
55
+ <h4 class="mb-2">Analysis Metrics</h4>
56
+ <div class="grid grid-cols-3 gap-4 text-sm">
57
+ <div>
58
+ <strong>Confidence Breakdown:</strong>
59
+ <ul class="mt-1">
60
+ <li>Base Confidence: $base_confidence%</li>
61
+ <li>Entity Boost: $entity_boost%</li>
62
+ </ul>
63
+ </div>
64
+ <div>
65
+ <strong>Entity Coverage:</strong>
66
+ <ul class="mt-1">
67
+ <li>Types Detected: $types_detected</li>
68
+ <li>Total Entities: $total_entities</li>
69
+ </ul>
70
+ </div>
71
+ <div>
72
+ <strong>Relationships:</strong>
73
+ <ul class="mt-1">
74
+ <li>Direct: $direct_relationships</li>
75
+ <li>Related Events: $related_event_count</li>
76
+ </ul>
77
+ </div>
78
+ </div>
79
+ </div>
80
+ </div>
ui/__init__.py ADDED
File without changes
ui/format.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ui/format.py
2
+
3
+ from string import Template
4
+
5
+ class ResultFormatter:
6
+ @staticmethod
7
+ def format_entity_list(entities, entity_type):
8
+ items = entities.get(entity_type, [])
9
+ if not items:
10
+ return '<li>None detected</li>'
11
+ return ''.join(f'<li>{item}</li>' for item in items)
12
+
13
+ @staticmethod
14
+ def format_entity_relationships(relationships):
15
+ if not relationships:
16
+ return ''
17
+
18
+ items = []
19
+ for rel in relationships:
20
+ item = f"""
21
+ <li class="mb-2">
22
+ <strong>{rel['source']}</strong> &rarr;
23
+ <span class="text-blue-600">{rel['type'].replace('_to_', ' to ')}</span> &rarr;
24
+ <strong>{rel['target']}</strong>
25
+ <br/>
26
+ <small class="text-gray-600">Confidence: {int(rel['confidence'] * 100)}%</small>
27
+ </li>
28
+ """
29
+ items.append(item)
30
+ return ''.join(items)
31
+
32
+ @staticmethod
33
+ def format_related_events(events):
34
+ if not events:
35
+ return ''
36
+
37
+ items = []
38
+ for event in events:
39
+ shared = f" | Shared Entities: {event['shared_entities']}" if event.get('shared_entities') else ''
40
+ item = f"""
41
+ <li class="mb-2">
42
+ <div class="flex justify-between items-center">
43
+ <div>{event['text']}</div>
44
+ <div class="text-sm text-gray-600">
45
+ {event['timestamp']} |
46
+ Confidence: {int(event['confidence'] * 100)}%{shared}
47
+ </div>
48
+ </div>
49
+ </li>
50
+ """
51
+ items.append(item)
52
+ return ''.join(items)
53
+
54
+ @staticmethod
55
+ def format_results(analysis_result):
56
+ if "error" in analysis_result:
57
+ return f"<div style='color: red'>Error: {analysis_result['error']}</div>"
58
+
59
+ # Load template
60
+ with open('templates/results.html', 'r') as f:
61
+ template = Template(f.read())
62
+
63
+ # Prepare template variables
64
+ confidence = analysis_result['confidence']
65
+ confidence_class = "confidence-high" if confidence >= 0.6 else "confidence-low"
66
+
67
+ return template.substitute(
68
+ confidence_class=confidence_class,
69
+ confidence_score=int(confidence * 100),
70
+ verification_warning=ResultFormatter._verification_warning(analysis_result),
71
+ people_list=ResultFormatter.format_entity_list(analysis_result['entities'], 'people'),
72
+ org_list=ResultFormatter.format_entity_list(analysis_result['entities'], 'organizations'),
73
+ location_list=ResultFormatter.format_entity_list(analysis_result['entities'], 'locations'),
74
+ temporal_list=ResultFormatter.format_entity_list(analysis_result['entities'], 'temporal'),
75
+ hashtag_list=ResultFormatter.format_entity_list(analysis_result['entities'], 'hashtags'),
76
+ entity_relationships=ResultFormatter.format_entity_relationships(analysis_result.get('entity_relationships')),
77
+ validation_success=ResultFormatter._validation_success(analysis_result),
78
+ related_events=ResultFormatter.format_related_events(analysis_result.get('related_events')),
79
+ base_confidence=int(confidence * 70),
80
+ entity_boost=int((confidence - 0.7 if confidence > 0.7 else 0) * 100),
81
+ types_detected=len([t for t in ['people', 'organizations', 'locations', 'temporal', 'hashtags']
82
+ if analysis_result['entities'].get(t)]),
83
+ total_entities=sum(len(e) for e in analysis_result['entities'].values() if isinstance(e, list)),
84
+ direct_relationships=len(analysis_result.get('entity_relationships', [])),
85
+ related_event_count=len(analysis_result.get('related_events', []))
86
+ )
87
+
88
+ @staticmethod
89
+ def _verification_warning(result):
90
+ if not result["verification_needed"]:
91
+ return ''
92
+ return '''
93
+ <div class="alert-warning">
94
+ &#9888; <strong>Verification Required:</strong> Low confidence score detected. Please verify the extracted information.
95
+ </div>
96
+ '''
97
+
98
+ @staticmethod
99
+ def _validation_success(result):
100
+ if result["verification_needed"]:
101
+ return ''
102
+ return '''
103
+ <div class="alert-success mt-4">
104
+ &#9989; <strong>Event Validated:</strong> The extracted information meets confidence thresholds.
105
+ </div>
106
+ '''
ui/styles.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = """
2
+ .container {
3
+ max-width: 1200px;
4
+ margin: auto;
5
+ padding: 20px;
6
+ }
7
+
8
+ .results {
9
+ padding: 20px;
10
+ border: 1px solid #ddd;
11
+ border-radius: 8px;
12
+ margin-top: 20px;
13
+ }
14
+
15
+ .confidence-high {
16
+ color: #22c55e;
17
+ font-weight: bold;
18
+ }
19
+
20
+ .confidence-low {
21
+ color: #f97316;
22
+ font-weight: bold;
23
+ }
24
+
25
+ .entity-section {
26
+ margin: 15px 0;
27
+ }
28
+
29
+ .alert-warning {
30
+ background: #fff3cd;
31
+ padding: 10px;
32
+ border-radius: 5px;
33
+ margin: 10px 0;
34
+ }
35
+
36
+ .alert-success {
37
+ background: #d1fae5;
38
+ padding: 10px;
39
+ border-radius: 5px;
40
+ margin: 10px 0;
41
+ }
42
+
43
+ .related-events {
44
+ background: #f3f4f6;
45
+ padding: 15px;
46
+ border-radius: 5px;
47
+ margin-top: 15px;
48
+ }
49
+
50
+ .grid {
51
+ display: grid;
52
+ }
53
+
54
+ .grid-cols-2 {
55
+ grid-template-columns: repeat(2, minmax(0, 1fr));
56
+ }
57
+
58
+ .grid-cols-3 {
59
+ grid-template-columns: repeat(3, minmax(0, 1fr));
60
+ }
61
+
62
+ .gap-4 {
63
+ gap: 1rem;
64
+ }
65
+
66
+ .space-y-4 > * + * {
67
+ margin-top: 1rem;
68
+ }
69
+
70
+ .mt-4 {
71
+ margin-top: 1rem;
72
+ }
73
+
74
+ .mb-2 {
75
+ margin-bottom: 0.5rem;
76
+ }
77
+
78
+ .text-blue-600 {
79
+ color: #2563eb;
80
+ }
81
+
82
+ .text-gray-600 {
83
+ color: #4b5563;
84
+ }
85
+
86
+ .text-sm {
87
+ font-size: 0.875rem;
88
+ line-height: 1.25rem;
89
+ }
90
+
91
+ .bg-gray-50 {
92
+ background-color: #f9fafb;
93
+ }
94
+
95
+ .rounded-lg {
96
+ border-radius: 0.5rem;
97
+ }
98
+
99
+ .p-4 {
100
+ padding: 1rem;
101
+ }
102
+ """