AIdeaText commited on
Commit
6f5a069
verified
1 Parent(s): 3004ff5

Create current_situation_analysis.py

Browse files
modules/studentact/current_situation_analysis.py ADDED
@@ -0,0 +1,810 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #v3/modules/studentact/current_situation_analysis.py
2
+
3
+ import streamlit as st
4
+ import matplotlib.pyplot as plt
5
+ import networkx as nx
6
+ import seaborn as sns
7
+ from collections import Counter
8
+ from itertools import combinations
9
+ import numpy as np
10
+ import matplotlib.patches as patches
11
+ import logging
12
+
13
+ # 2. Configuraci贸n b谩sica del logging
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
17
+ handlers=[
18
+ logging.StreamHandler(),
19
+ logging.FileHandler('app.log')
20
+ ]
21
+ )
22
+
23
+ # 3. Obtener el logger espec铆fico para este m贸dulo
24
+ logger = logging.getLogger(__name__)
25
+
26
+ #########################################################################
27
+
28
+ def correlate_metrics(scores):
29
+ """
30
+ Ajusta los scores para mantener correlaciones l贸gicas entre m茅tricas.
31
+
32
+ Args:
33
+ scores: dict con scores iniciales de vocabulario, estructura, cohesi贸n y claridad
34
+
35
+ Returns:
36
+ dict con scores ajustados
37
+ """
38
+ try:
39
+ # 1. Correlaci贸n estructura-cohesi贸n
40
+ # La cohesi贸n no puede ser menor que estructura * 0.7
41
+ min_cohesion = scores['structure']['normalized_score'] * 0.7
42
+ if scores['cohesion']['normalized_score'] < min_cohesion:
43
+ scores['cohesion']['normalized_score'] = min_cohesion
44
+
45
+ # 2. Correlaci贸n vocabulario-cohesi贸n
46
+ # La cohesi贸n l茅xica depende del vocabulario
47
+ vocab_influence = scores['vocabulary']['normalized_score'] * 0.6
48
+ scores['cohesion']['normalized_score'] = max(
49
+ scores['cohesion']['normalized_score'],
50
+ vocab_influence
51
+ )
52
+
53
+ # 3. Correlaci贸n cohesi贸n-claridad
54
+ # La claridad no puede superar cohesi贸n * 1.2
55
+ max_clarity = scores['cohesion']['normalized_score'] * 1.2
56
+ if scores['clarity']['normalized_score'] > max_clarity:
57
+ scores['clarity']['normalized_score'] = max_clarity
58
+
59
+ # 4. Correlaci贸n estructura-claridad
60
+ # La claridad no puede superar estructura * 1.1
61
+ struct_max_clarity = scores['structure']['normalized_score'] * 1.1
62
+ scores['clarity']['normalized_score'] = min(
63
+ scores['clarity']['normalized_score'],
64
+ struct_max_clarity
65
+ )
66
+
67
+ # Normalizar todos los scores entre 0 y 1
68
+ for metric in scores:
69
+ scores[metric]['normalized_score'] = max(0.0, min(1.0, scores[metric]['normalized_score']))
70
+
71
+ return scores
72
+
73
+ except Exception as e:
74
+ logger.error(f"Error en correlate_metrics: {str(e)}")
75
+ return scores
76
+
77
+ ##########################################################################
78
+
79
+ def analyze_text_dimensions(doc):
80
+ """
81
+ Analiza las dimensiones principales del texto manteniendo correlaciones l贸gicas.
82
+ """
83
+ try:
84
+ # Obtener scores iniciales
85
+ vocab_score, vocab_details = analyze_vocabulary_diversity(doc)
86
+ struct_score = analyze_structure(doc)
87
+ cohesion_score = analyze_cohesion(doc)
88
+ clarity_score, clarity_details = analyze_clarity(doc)
89
+
90
+ # Crear diccionario de scores inicial
91
+ scores = {
92
+ 'vocabulary': {
93
+ 'normalized_score': vocab_score,
94
+ 'details': vocab_details
95
+ },
96
+ 'structure': {
97
+ 'normalized_score': struct_score,
98
+ 'details': None
99
+ },
100
+ 'cohesion': {
101
+ 'normalized_score': cohesion_score,
102
+ 'details': None
103
+ },
104
+ 'clarity': {
105
+ 'normalized_score': clarity_score,
106
+ 'details': clarity_details
107
+ }
108
+ }
109
+
110
+ # Ajustar correlaciones entre m茅tricas
111
+ adjusted_scores = correlate_metrics(scores)
112
+
113
+ # Logging para diagn贸stico
114
+ logger.info(f"""
115
+ Scores originales vs ajustados:
116
+ Vocabulario: {vocab_score:.2f} -> {adjusted_scores['vocabulary']['normalized_score']:.2f}
117
+ Estructura: {struct_score:.2f} -> {adjusted_scores['structure']['normalized_score']:.2f}
118
+ Cohesi贸n: {cohesion_score:.2f} -> {adjusted_scores['cohesion']['normalized_score']:.2f}
119
+ Claridad: {clarity_score:.2f} -> {adjusted_scores['clarity']['normalized_score']:.2f}
120
+ """)
121
+
122
+ return adjusted_scores
123
+
124
+ except Exception as e:
125
+ logger.error(f"Error en analyze_text_dimensions: {str(e)}")
126
+ return {
127
+ 'vocabulary': {'normalized_score': 0.0, 'details': {}},
128
+ 'structure': {'normalized_score': 0.0, 'details': {}},
129
+ 'cohesion': {'normalized_score': 0.0, 'details': {}},
130
+ 'clarity': {'normalized_score': 0.0, 'details': {}}
131
+ }
132
+
133
+
134
+
135
+ #############################################################################################
136
+
137
+ def analyze_clarity(doc):
138
+ """
139
+ Analiza la claridad del texto considerando m煤ltiples factores.
140
+ """
141
+ try:
142
+ sentences = list(doc.sents)
143
+ if not sentences:
144
+ return 0.0, {}
145
+
146
+ # 1. Longitud de oraciones
147
+ sentence_lengths = [len(sent) for sent in sentences]
148
+ avg_length = sum(sentence_lengths) / len(sentences)
149
+
150
+ # Normalizar usando los umbrales definidos para clarity
151
+ length_score = normalize_score(
152
+ value=avg_length,
153
+ metric_type='clarity',
154
+ optimal_length=20, # Una oraci贸n ideal tiene ~20 palabras
155
+ min_threshold=0.60, # Consistente con METRIC_THRESHOLDS
156
+ target_threshold=0.75 # Consistente con METRIC_THRESHOLDS
157
+ )
158
+
159
+ # 2. An谩lisis de conectores
160
+ connector_count = 0
161
+ connector_weights = {
162
+ 'CCONJ': 1.0, # Coordinantes
163
+ 'SCONJ': 1.2, # Subordinantes
164
+ 'ADV': 0.8 # Adverbios conectivos
165
+ }
166
+
167
+ for token in doc:
168
+ if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']:
169
+ connector_count += connector_weights[token.pos_]
170
+
171
+ # Normalizar conectores por oraci贸n
172
+ connectors_per_sentence = connector_count / len(sentences) if sentences else 0
173
+ connector_score = normalize_score(
174
+ value=connectors_per_sentence,
175
+ metric_type='clarity',
176
+ optimal_connections=1.5, # ~1.5 conectores por oraci贸n es 贸ptimo
177
+ min_threshold=0.60,
178
+ target_threshold=0.75
179
+ )
180
+
181
+ # 3. Complejidad estructural
182
+ clause_count = 0
183
+ for sent in sentences:
184
+ verbs = [token for token in sent if token.pos_ == 'VERB']
185
+ clause_count += len(verbs)
186
+
187
+ complexity_raw = clause_count / len(sentences) if sentences else 0
188
+ complexity_score = normalize_score(
189
+ value=complexity_raw,
190
+ metric_type='clarity',
191
+ optimal_depth=2.0, # ~2 cl谩usulas por oraci贸n es 贸ptimo
192
+ min_threshold=0.60,
193
+ target_threshold=0.75
194
+ )
195
+
196
+ # 4. Densidad l茅xica
197
+ content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
198
+ total_words = len([token for token in doc if token.is_alpha])
199
+ density = content_words / total_words if total_words > 0 else 0
200
+
201
+ density_score = normalize_score(
202
+ value=density,
203
+ metric_type='clarity',
204
+ optimal_connections=0.6, # 60% de palabras de contenido es 贸ptimo
205
+ min_threshold=0.60,
206
+ target_threshold=0.75
207
+ )
208
+
209
+ # Score final ponderado
210
+ weights = {
211
+ 'length': 0.3,
212
+ 'connectors': 0.3,
213
+ 'complexity': 0.2,
214
+ 'density': 0.2
215
+ }
216
+
217
+ clarity_score = (
218
+ weights['length'] * length_score +
219
+ weights['connectors'] * connector_score +
220
+ weights['complexity'] * complexity_score +
221
+ weights['density'] * density_score
222
+ )
223
+
224
+ details = {
225
+ 'length_score': length_score,
226
+ 'connector_score': connector_score,
227
+ 'complexity_score': complexity_score,
228
+ 'density_score': density_score,
229
+ 'avg_sentence_length': avg_length,
230
+ 'connectors_per_sentence': connectors_per_sentence,
231
+ 'density': density
232
+ }
233
+
234
+ # Agregar logging para diagn贸stico
235
+ logger.info(f"""
236
+ Scores de Claridad:
237
+ - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras)
238
+ - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oraci贸n)
239
+ - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cl谩usulas)
240
+ - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido)
241
+ - Score Final: {clarity_score:.2f}
242
+ """)
243
+
244
+ return clarity_score, details
245
+
246
+ except Exception as e:
247
+ logger.error(f"Error en analyze_clarity: {str(e)}")
248
+ return 0.0, {}
249
+
250
+
251
+ def analyze_vocabulary_diversity(doc):
252
+ """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
253
+ try:
254
+ # 1. An谩lisis b谩sico de diversidad
255
+ unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
256
+ total_words = len([token for token in doc if token.is_alpha])
257
+ basic_diversity = len(unique_lemmas) / total_words if total_words > 0 else 0
258
+
259
+ # 2. An谩lisis de registro
260
+ academic_words = 0
261
+ narrative_words = 0
262
+ technical_terms = 0
263
+
264
+ # Clasificar palabras por registro
265
+ for token in doc:
266
+ if token.is_alpha:
267
+ # Detectar t茅rminos acad茅micos/t茅cnicos
268
+ if token.pos_ in ['NOUN', 'VERB', 'ADJ']:
269
+ if any(parent.pos_ == 'NOUN' for parent in token.ancestors):
270
+ technical_terms += 1
271
+ # Detectar palabras narrativas
272
+ if token.pos_ in ['VERB', 'ADV'] and token.dep_ in ['ROOT', 'advcl']:
273
+ narrative_words += 1
274
+
275
+ # 3. An谩lisis de complejidad sint谩ctica
276
+ avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))
277
+
278
+ # 4. Calcular score ponderado
279
+ weights = {
280
+ 'diversity': 0.3,
281
+ 'technical': 0.3,
282
+ 'narrative': 0.2,
283
+ 'complexity': 0.2
284
+ }
285
+
286
+ scores = {
287
+ 'diversity': basic_diversity,
288
+ 'technical': technical_terms / total_words if total_words > 0 else 0,
289
+ 'narrative': narrative_words / total_words if total_words > 0 else 0,
290
+ 'complexity': min(1.0, avg_sentence_length / 20) # Normalizado a 20 palabras
291
+ }
292
+
293
+ # Score final ponderado
294
+ final_score = sum(weights[key] * scores[key] for key in weights)
295
+
296
+ # Informaci贸n adicional para diagn贸stico
297
+ details = {
298
+ 'text_type': 'narrative' if scores['narrative'] > scores['technical'] else 'academic',
299
+ 'scores': scores
300
+ }
301
+
302
+ return final_score, details
303
+
304
+ except Exception as e:
305
+ logger.error(f"Error en analyze_vocabulary_diversity: {str(e)}")
306
+ return 0.0, {}
307
+
308
+ def analyze_cohesion(doc):
309
+ """Analiza la cohesi贸n textual"""
310
+ try:
311
+ sentences = list(doc.sents)
312
+ if len(sentences) < 2:
313
+ logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n")
314
+ return 0.0
315
+
316
+ # 1. An谩lisis de conexiones l茅xicas
317
+ lexical_connections = 0
318
+ total_possible_connections = 0
319
+
320
+ for i in range(len(sentences)-1):
321
+ # Obtener lemmas significativos (no stopwords)
322
+ sent1_words = {token.lemma_ for token in sentences[i]
323
+ if token.is_alpha and not token.is_stop}
324
+ sent2_words = {token.lemma_ for token in sentences[i+1]
325
+ if token.is_alpha and not token.is_stop}
326
+
327
+ if sent1_words and sent2_words: # Verificar que ambos conjuntos no est茅n vac铆os
328
+ intersection = len(sent1_words.intersection(sent2_words))
329
+ total_possible = min(len(sent1_words), len(sent2_words))
330
+
331
+ if total_possible > 0:
332
+ lexical_score = intersection / total_possible
333
+ lexical_connections += lexical_score
334
+ total_possible_connections += 1
335
+
336
+ # 2. An谩lisis de conectores
337
+ connector_count = 0
338
+ connector_types = {
339
+ 'CCONJ': 1.0, # Coordinantes
340
+ 'SCONJ': 1.2, # Subordinantes
341
+ 'ADV': 0.8 # Adverbios conectivos
342
+ }
343
+
344
+ for token in doc:
345
+ if (token.pos_ in connector_types and
346
+ token.dep_ in ['cc', 'mark', 'advmod'] and
347
+ not token.is_stop):
348
+ connector_count += connector_types[token.pos_]
349
+
350
+ # 3. C谩lculo de scores normalizados
351
+ if total_possible_connections > 0:
352
+ lexical_cohesion = lexical_connections / total_possible_connections
353
+ else:
354
+ lexical_cohesion = 0
355
+
356
+ if len(sentences) > 1:
357
+ connector_cohesion = min(1.0, connector_count / (len(sentences) - 1))
358
+ else:
359
+ connector_cohesion = 0
360
+
361
+ # 4. Score final ponderado
362
+ weights = {
363
+ 'lexical': 0.7,
364
+ 'connectors': 0.3
365
+ }
366
+
367
+ cohesion_score = (
368
+ weights['lexical'] * lexical_cohesion +
369
+ weights['connectors'] * connector_cohesion
370
+ )
371
+
372
+ # 5. Logging para diagn贸stico
373
+ logger.info(f"""
374
+ An谩lisis de Cohesi贸n:
375
+ - Conexiones l茅xicas encontradas: {lexical_connections}
376
+ - Conexiones posibles: {total_possible_connections}
377
+ - Lexical cohesion score: {lexical_cohesion}
378
+ - Conectores encontrados: {connector_count}
379
+ - Connector cohesion score: {connector_cohesion}
380
+ - Score final: {cohesion_score}
381
+ """)
382
+
383
+ return cohesion_score
384
+
385
+ except Exception as e:
386
+ logger.error(f"Error en analyze_cohesion: {str(e)}")
387
+ return 0.0
388
+
389
+ def analyze_structure(doc):
390
+ try:
391
+ if len(doc) == 0:
392
+ return 0.0
393
+
394
+ structure_scores = []
395
+ for token in doc:
396
+ if token.dep_ == 'ROOT':
397
+ result = get_dependency_depths(token)
398
+ structure_scores.append(result['final_score'])
399
+
400
+ if not structure_scores:
401
+ return 0.0
402
+
403
+ return min(1.0, sum(structure_scores) / len(structure_scores))
404
+
405
+ except Exception as e:
406
+ logger.error(f"Error en analyze_structure: {str(e)}")
407
+ return 0.0
408
+
409
+ # Funciones auxiliares de an谩lisis
410
+
411
+ def get_dependency_depths(token, depth=0, analyzed_tokens=None):
412
+ """
413
+ Analiza la profundidad y calidad de las relaciones de dependencia.
414
+
415
+ Args:
416
+ token: Token a analizar
417
+ depth: Profundidad actual en el 谩rbol
418
+ analyzed_tokens: Set para evitar ciclos en el an谩lisis
419
+
420
+ Returns:
421
+ dict: Informaci贸n detallada sobre las dependencias
422
+ - depths: Lista de profundidades
423
+ - relations: Diccionario con tipos de relaciones encontradas
424
+ - complexity_score: Puntuaci贸n de complejidad
425
+ """
426
+ if analyzed_tokens is None:
427
+ analyzed_tokens = set()
428
+
429
+ # Evitar ciclos
430
+ if token.i in analyzed_tokens:
431
+ return {
432
+ 'depths': [],
433
+ 'relations': {},
434
+ 'complexity_score': 0
435
+ }
436
+
437
+ analyzed_tokens.add(token.i)
438
+
439
+ # Pesos para diferentes tipos de dependencias
440
+ dependency_weights = {
441
+ # Dependencias principales
442
+ 'nsubj': 1.2, # Sujeto nominal
443
+ 'obj': 1.1, # Objeto directo
444
+ 'iobj': 1.1, # Objeto indirecto
445
+ 'ROOT': 1.3, # Ra铆z
446
+
447
+ # Modificadores
448
+ 'amod': 0.8, # Modificador adjetival
449
+ 'advmod': 0.8, # Modificador adverbial
450
+ 'nmod': 0.9, # Modificador nominal
451
+
452
+ # Estructuras complejas
453
+ 'csubj': 1.4, # Cl谩usula como sujeto
454
+ 'ccomp': 1.3, # Complemento clausal
455
+ 'xcomp': 1.2, # Complemento clausal abierto
456
+ 'advcl': 1.2, # Cl谩usula adverbial
457
+
458
+ # Coordinaci贸n y subordinaci贸n
459
+ 'conj': 1.1, # Conjunci贸n
460
+ 'cc': 0.7, # Coordinaci贸n
461
+ 'mark': 0.8, # Marcador
462
+
463
+ # Otros
464
+ 'det': 0.5, # Determinante
465
+ 'case': 0.5, # Caso
466
+ 'punct': 0.1 # Puntuaci贸n
467
+ }
468
+
469
+ # Inicializar resultados
470
+ current_result = {
471
+ 'depths': [depth],
472
+ 'relations': {token.dep_: 1},
473
+ 'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1)
474
+ }
475
+
476
+ # Analizar hijos recursivamente
477
+ for child in token.children:
478
+ child_result = get_dependency_depths(child, depth + 1, analyzed_tokens)
479
+
480
+ # Combinar profundidades
481
+ current_result['depths'].extend(child_result['depths'])
482
+
483
+ # Combinar relaciones
484
+ for rel, count in child_result['relations'].items():
485
+ current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count
486
+
487
+ # Acumular score de complejidad
488
+ current_result['complexity_score'] += child_result['complexity_score']
489
+
490
+ # Calcular m茅tricas adicionales
491
+ current_result['max_depth'] = max(current_result['depths'])
492
+ current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths'])
493
+ current_result['relation_diversity'] = len(current_result['relations'])
494
+
495
+ # Calcular score ponderado por tipo de estructura
496
+ structure_bonus = 0
497
+
498
+ # Bonus por estructuras complejas
499
+ if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']:
500
+ structure_bonus += 0.3
501
+
502
+ # Bonus por coordinaci贸n balanceada
503
+ if 'conj' in current_result['relations'] and 'cc' in current_result['relations']:
504
+ structure_bonus += 0.2
505
+
506
+ # Bonus por modificaci贸n rica
507
+ if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2:
508
+ structure_bonus += 0.2
509
+
510
+ current_result['final_score'] = (
511
+ current_result['complexity_score'] * (1 + structure_bonus)
512
+ )
513
+
514
+ return current_result
515
+
516
+ def normalize_score(value, metric_type,
517
+ min_threshold=0.0, target_threshold=1.0,
518
+ range_factor=2.0, optimal_length=None,
519
+ optimal_connections=None, optimal_depth=None):
520
+ """
521
+ Normaliza un valor considerando umbrales espec铆ficos por tipo de m茅trica.
522
+
523
+ Args:
524
+ value: Valor a normalizar
525
+ metric_type: Tipo de m茅trica ('vocabulary', 'structure', 'cohesion', 'clarity')
526
+ min_threshold: Valor m铆nimo aceptable
527
+ target_threshold: Valor objetivo
528
+ range_factor: Factor para ajustar el rango
529
+ optimal_length: Longitud 贸ptima (opcional)
530
+ optimal_connections: N煤mero 贸ptimo de conexiones (opcional)
531
+ optimal_depth: Profundidad 贸ptima de estructura (opcional)
532
+
533
+ Returns:
534
+ float: Valor normalizado entre 0 y 1
535
+ """
536
+ try:
537
+ # Definir umbrales por tipo de m茅trica
538
+ METRIC_THRESHOLDS = {
539
+ 'vocabulary': {
540
+ 'min': 0.60,
541
+ 'target': 0.75,
542
+ 'range_factor': 1.5
543
+ },
544
+ 'structure': {
545
+ 'min': 0.65,
546
+ 'target': 0.80,
547
+ 'range_factor': 1.8
548
+ },
549
+ 'cohesion': {
550
+ 'min': 0.55,
551
+ 'target': 0.70,
552
+ 'range_factor': 1.6
553
+ },
554
+ 'clarity': {
555
+ 'min': 0.60,
556
+ 'target': 0.75,
557
+ 'range_factor': 1.7
558
+ }
559
+ }
560
+
561
+ # Validar valores negativos o cero
562
+ if value < 0:
563
+ logger.warning(f"Valor negativo recibido: {value}")
564
+ return 0.0
565
+
566
+ # Manejar caso donde el valor es cero
567
+ if value == 0:
568
+ logger.warning("Valor cero recibido")
569
+ return 0.0
570
+
571
+ # Obtener umbrales espec铆ficos para el tipo de m茅trica
572
+ thresholds = METRIC_THRESHOLDS.get(metric_type, {
573
+ 'min': min_threshold,
574
+ 'target': target_threshold,
575
+ 'range_factor': range_factor
576
+ })
577
+
578
+ # Identificar el valor de referencia a usar
579
+ if optimal_depth is not None:
580
+ reference = optimal_depth
581
+ elif optimal_connections is not None:
582
+ reference = optimal_connections
583
+ elif optimal_length is not None:
584
+ reference = optimal_length
585
+ else:
586
+ reference = thresholds['target']
587
+
588
+ # Validar valor de referencia
589
+ if reference <= 0:
590
+ logger.warning(f"Valor de referencia inv谩lido: {reference}")
591
+ return 0.0
592
+
593
+ # Calcular score basado en umbrales
594
+ if value < thresholds['min']:
595
+ # Valor por debajo del m铆nimo
596
+ score = (value / thresholds['min']) * 0.5 # M谩ximo 0.5 para valores bajo el m铆nimo
597
+ elif value < thresholds['target']:
598
+ # Valor entre m铆nimo y objetivo
599
+ range_size = thresholds['target'] - thresholds['min']
600
+ progress = (value - thresholds['min']) / range_size
601
+ score = 0.5 + (progress * 0.5) # Escala entre 0.5 y 1.0
602
+ else:
603
+ # Valor alcanza o supera el objetivo
604
+ score = 1.0
605
+
606
+ # Penalizar valores muy por encima del objetivo
607
+ if value > (thresholds['target'] * thresholds['range_factor']):
608
+ excess = (value - thresholds['target']) / (thresholds['target'] * thresholds['range_factor'])
609
+ score = max(0.7, 1.0 - excess) # No bajar de 0.7 para valores altos
610
+
611
+ # Asegurar que el resultado est茅 entre 0 y 1
612
+ return max(0.0, min(1.0, score))
613
+
614
+ except Exception as e:
615
+ logger.error(f"Error en normalize_score: {str(e)}")
616
+ return 0.0
617
+
618
+
619
+ # Funciones de generaci贸n de gr谩ficos
620
+ def generate_sentence_graphs(doc):
621
+ """Genera visualizaciones de estructura de oraciones"""
622
+ fig, ax = plt.subplots(figsize=(10, 6))
623
+ # Implementar visualizaci贸n
624
+ plt.close()
625
+ return fig
626
+
627
+ def generate_word_connections(doc):
628
+ """Genera red de conexiones de palabras"""
629
+ fig, ax = plt.subplots(figsize=(10, 6))
630
+ # Implementar visualizaci贸n
631
+ plt.close()
632
+ return fig
633
+
634
+ def generate_connection_paths(doc):
635
+ """Genera patrones de conexi贸n"""
636
+ fig, ax = plt.subplots(figsize=(10, 6))
637
+ # Implementar visualizaci贸n
638
+ plt.close()
639
+ return fig
640
+
641
+ def create_vocabulary_network(doc):
642
+ """
643
+ Genera el grafo de red de vocabulario.
644
+ """
645
+ G = nx.Graph()
646
+
647
+ # Crear nodos para palabras significativas
648
+ words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
649
+ word_freq = Counter(words)
650
+
651
+ # A帽adir nodos con tama帽o basado en frecuencia
652
+ for word, freq in word_freq.items():
653
+ G.add_node(word, size=freq)
654
+
655
+ # Crear conexiones basadas en co-ocurrencia
656
+ window_size = 5
657
+ for i in range(len(words) - window_size):
658
+ window = words[i:i+window_size]
659
+ for w1, w2 in combinations(set(window), 2):
660
+ if G.has_edge(w1, w2):
661
+ G[w1][w2]['weight'] += 1
662
+ else:
663
+ G.add_edge(w1, w2, weight=1)
664
+
665
+ # Crear visualizaci贸n
666
+ fig, ax = plt.subplots(figsize=(12, 8))
667
+ pos = nx.spring_layout(G)
668
+
669
+ # Dibujar nodos
670
+ nx.draw_networkx_nodes(G, pos,
671
+ node_size=[G.nodes[node]['size']*100 for node in G.nodes],
672
+ node_color='lightblue',
673
+ alpha=0.7)
674
+
675
+ # Dibujar conexiones
676
+ nx.draw_networkx_edges(G, pos,
677
+ width=[G[u][v]['weight']*0.5 for u,v in G.edges],
678
+ alpha=0.5)
679
+
680
+ # A帽adir etiquetas
681
+ nx.draw_networkx_labels(G, pos)
682
+
683
+ plt.title("Red de Vocabulario")
684
+ plt.axis('off')
685
+ return fig
686
+
687
+ def create_syntax_complexity_graph(doc):
688
+ """
689
+ Genera el diagrama de arco de complejidad sint谩ctica.
690
+ Muestra la estructura de dependencias con colores basados en la complejidad.
691
+ """
692
+ try:
693
+ # Preparar datos para la visualizaci贸n
694
+ sentences = list(doc.sents)
695
+ if not sentences:
696
+ return None
697
+
698
+ # Crear figura para el gr谩fico
699
+ fig, ax = plt.subplots(figsize=(12, len(sentences) * 2))
700
+
701
+ # Colores para diferentes niveles de profundidad
702
+ depth_colors = plt.cm.viridis(np.linspace(0, 1, 6))
703
+
704
+ y_offset = 0
705
+ max_x = 0
706
+
707
+ for sent in sentences:
708
+ words = [token.text for token in sent]
709
+ x_positions = range(len(words))
710
+ max_x = max(max_x, len(words))
711
+
712
+ # Dibujar palabras
713
+ plt.plot(x_positions, [y_offset] * len(words), 'k-', alpha=0.2)
714
+ plt.scatter(x_positions, [y_offset] * len(words), alpha=0)
715
+
716
+ # A帽adir texto
717
+ for i, word in enumerate(words):
718
+ plt.annotate(word, (i, y_offset), xytext=(0, -10),
719
+ textcoords='offset points', ha='center')
720
+
721
+ # Dibujar arcos de dependencia
722
+ for token in sent:
723
+ if token.dep_ != "ROOT":
724
+ # Calcular profundidad de dependencia
725
+ depth = 0
726
+ current = token
727
+ while current.head != current:
728
+ depth += 1
729
+ current = current.head
730
+
731
+ # Determinar posiciones para el arco
732
+ start = token.i - sent[0].i
733
+ end = token.head.i - sent[0].i
734
+
735
+ # Altura del arco basada en la distancia entre palabras
736
+ height = 0.5 * abs(end - start)
737
+
738
+ # Color basado en la profundidad
739
+ color = depth_colors[min(depth, len(depth_colors)-1)]
740
+
741
+ # Crear arco
742
+ arc = patches.Arc((min(start, end) + abs(end - start)/2, y_offset),
743
+ width=abs(end - start),
744
+ height=height,
745
+ angle=0,
746
+ theta1=0,
747
+ theta2=180,
748
+ color=color,
749
+ alpha=0.6)
750
+ ax.add_patch(arc)
751
+
752
+ y_offset -= 2
753
+
754
+ # Configurar el gr谩fico
755
+ plt.xlim(-1, max_x)
756
+ plt.ylim(y_offset - 1, 1)
757
+ plt.axis('off')
758
+ plt.title("Complejidad Sint谩ctica")
759
+
760
+ return fig
761
+
762
+ except Exception as e:
763
+ logger.error(f"Error en create_syntax_complexity_graph: {str(e)}")
764
+ return None
765
+
766
+
767
+ def create_cohesion_heatmap(doc):
768
+ """Genera un mapa de calor que muestra la cohesi贸n entre p谩rrafos/oraciones."""
769
+ try:
770
+ sentences = list(doc.sents)
771
+ n_sentences = len(sentences)
772
+
773
+ if n_sentences < 2:
774
+ return None
775
+
776
+ similarity_matrix = np.zeros((n_sentences, n_sentences))
777
+
778
+ for i in range(n_sentences):
779
+ for j in range(n_sentences):
780
+ sent1_lemmas = {token.lemma_ for token in sentences[i]
781
+ if token.is_alpha and not token.is_stop}
782
+ sent2_lemmas = {token.lemma_ for token in sentences[j]
783
+ if token.is_alpha and not token.is_stop}
784
+
785
+ if sent1_lemmas and sent2_lemmas:
786
+ intersection = len(sent1_lemmas & sent2_lemmas) # Corregido aqu铆
787
+ union = len(sent1_lemmas | sent2_lemmas) # Y aqu铆
788
+ similarity_matrix[i, j] = intersection / union if union > 0 else 0
789
+
790
+ # Crear visualizaci贸n
791
+ fig, ax = plt.subplots(figsize=(10, 8))
792
+
793
+ sns.heatmap(similarity_matrix,
794
+ cmap='YlOrRd',
795
+ square=True,
796
+ xticklabels=False,
797
+ yticklabels=False,
798
+ cbar_kws={'label': 'Cohesi贸n'},
799
+ ax=ax)
800
+
801
+ plt.title("Mapa de Cohesi贸n Textual")
802
+ plt.xlabel("Oraciones")
803
+ plt.ylabel("Oraciones")
804
+
805
+ plt.tight_layout()
806
+ return fig
807
+
808
+ except Exception as e:
809
+ logger.error(f"Error en create_cohesion_heatmap: {str(e)}")
810
+ return None