AIdeaText commited on
Commit
6fd4adc
verified
1 Parent(s): c902818

Update modules/studentact/current_situation_analysis.py

Browse files
modules/studentact/current_situation_analysis.py CHANGED
@@ -232,22 +232,81 @@ def analyze_cohesion(doc):
232
  logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n")
233
  return 0.0
234
 
235
- connections = 0
 
 
 
236
  for i in range(len(sentences)-1):
237
- sent1_words = {token.lemma_ for token in sentences[i]}
238
- sent2_words = {token.lemma_ for token in sentences[i+1]}
239
- connections += len(sent1_words.intersection(sent2_words))
 
 
240
 
241
- # Validar que haya conexiones antes de normalizar
242
- if connections == 0:
243
- logger.warning("No se encontraron conexiones entre oraciones")
244
- return 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
- return normalize_score(connections, optimal_connections=max(5, len(sentences) * 0.2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  except Exception as e:
248
  logger.error(f"Error en analyze_cohesion: {str(e)}")
249
  return 0.0
250
 
 
 
251
  def analyze_structure(doc):
252
  """Analiza la complejidad estructural"""
253
  try:
@@ -272,12 +331,111 @@ def analyze_structure(doc):
272
  return 0.0
273
 
274
  # Funciones auxiliares de an谩lisis
275
- def get_dependency_depths(token, depth=0):
276
- """Obtiene las profundidades de dependencia"""
277
- depths = [depth]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  for child in token.children:
279
- depths.extend(get_dependency_depths(child, depth + 1))
280
- return depths
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
  def normalize_score(value, optimal_value=1.0, range_factor=2.0, optimal_length=None,
283
  optimal_connections=None, optimal_depth=None):
 
232
  logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n")
233
  return 0.0
234
 
235
+ # 1. An谩lisis de conexiones l茅xicas
236
+ lexical_connections = 0
237
+ total_possible_connections = 0
238
+
239
  for i in range(len(sentences)-1):
240
+ # Obtener lemmas significativos (no stopwords)
241
+ sent1_words = {token.lemma_ for token in sentences[i]
242
+ if token.is_alpha and not token.is_stop}
243
+ sent2_words = {token.lemma_ for token in sentences[i+1]
244
+ if token.is_alpha and not token.is_stop}
245
 
246
+ if sent1_words and sent2_words: # Verificar que ambos conjuntos no est茅n vac铆os
247
+ intersection = len(sent1_words.intersection(sent2_words))
248
+ total_possible = min(len(sent1_words), len(sent2_words))
249
+
250
+ if total_possible > 0:
251
+ lexical_score = intersection / total_possible
252
+ lexical_connections += lexical_score
253
+ total_possible_connections += 1
254
+
255
+ # 2. An谩lisis de conectores
256
+ connector_count = 0
257
+ connector_types = {
258
+ 'CCONJ': 1.0, # Coordinantes
259
+ 'SCONJ': 1.2, # Subordinantes
260
+ 'ADV': 0.8 # Adverbios conectivos
261
+ }
262
+
263
+ for token in doc:
264
+ if (token.pos_ in connector_types and
265
+ token.dep_ in ['cc', 'mark', 'advmod'] and
266
+ not token.is_stop):
267
+ connector_count += connector_types[token.pos_]
268
+
269
+ # 3. C谩lculo de scores normalizados
270
+ if total_possible_connections > 0:
271
+ lexical_cohesion = lexical_connections / total_possible_connections
272
+ else:
273
+ lexical_cohesion = 0
274
 
275
+ if len(sentences) > 1:
276
+ connector_cohesion = min(1.0, connector_count / (len(sentences) - 1))
277
+ else:
278
+ connector_cohesion = 0
279
+
280
+ # 4. Score final ponderado
281
+ weights = {
282
+ 'lexical': 0.7,
283
+ 'connectors': 0.3
284
+ }
285
+
286
+ cohesion_score = (
287
+ weights['lexical'] * lexical_cohesion +
288
+ weights['connectors'] * connector_cohesion
289
+ )
290
+
291
+ # 5. Logging para diagn贸stico
292
+ logger.info(f"""
293
+ An谩lisis de Cohesi贸n:
294
+ - Conexiones l茅xicas encontradas: {lexical_connections}
295
+ - Conexiones posibles: {total_possible_connections}
296
+ - Lexical cohesion score: {lexical_cohesion}
297
+ - Conectores encontrados: {connector_count}
298
+ - Connector cohesion score: {connector_cohesion}
299
+ - Score final: {cohesion_score}
300
+ """)
301
+
302
+ return cohesion_score
303
+
304
  except Exception as e:
305
  logger.error(f"Error en analyze_cohesion: {str(e)}")
306
  return 0.0
307
 
308
+
309
+
310
  def analyze_structure(doc):
311
  """Analiza la complejidad estructural"""
312
  try:
 
331
  return 0.0
332
 
333
  # Funciones auxiliares de an谩lisis
334
+
335
+ def get_dependency_depths(token, depth=0, analyzed_tokens=None):
336
+ """
337
+ Analiza la profundidad y calidad de las relaciones de dependencia.
338
+
339
+ Args:
340
+ token: Token a analizar
341
+ depth: Profundidad actual en el 谩rbol
342
+ analyzed_tokens: Set para evitar ciclos en el an谩lisis
343
+
344
+ Returns:
345
+ dict: Informaci贸n detallada sobre las dependencias
346
+ - depths: Lista de profundidades
347
+ - relations: Diccionario con tipos de relaciones encontradas
348
+ - complexity_score: Puntuaci贸n de complejidad
349
+ """
350
+ if analyzed_tokens is None:
351
+ analyzed_tokens = set()
352
+
353
+ # Evitar ciclos
354
+ if token.i in analyzed_tokens:
355
+ return {
356
+ 'depths': [],
357
+ 'relations': {},
358
+ 'complexity_score': 0
359
+ }
360
+
361
+ analyzed_tokens.add(token.i)
362
+
363
+ # Pesos para diferentes tipos de dependencias
364
+ dependency_weights = {
365
+ # Dependencias principales
366
+ 'nsubj': 1.2, # Sujeto nominal
367
+ 'obj': 1.1, # Objeto directo
368
+ 'iobj': 1.1, # Objeto indirecto
369
+ 'ROOT': 1.3, # Ra铆z
370
+
371
+ # Modificadores
372
+ 'amod': 0.8, # Modificador adjetival
373
+ 'advmod': 0.8, # Modificador adverbial
374
+ 'nmod': 0.9, # Modificador nominal
375
+
376
+ # Estructuras complejas
377
+ 'csubj': 1.4, # Cl谩usula como sujeto
378
+ 'ccomp': 1.3, # Complemento clausal
379
+ 'xcomp': 1.2, # Complemento clausal abierto
380
+ 'advcl': 1.2, # Cl谩usula adverbial
381
+
382
+ # Coordinaci贸n y subordinaci贸n
383
+ 'conj': 1.1, # Conjunci贸n
384
+ 'cc': 0.7, # Coordinaci贸n
385
+ 'mark': 0.8, # Marcador
386
+
387
+ # Otros
388
+ 'det': 0.5, # Determinante
389
+ 'case': 0.5, # Caso
390
+ 'punct': 0.1 # Puntuaci贸n
391
+ }
392
+
393
+ # Inicializar resultados
394
+ current_result = {
395
+ 'depths': [depth],
396
+ 'relations': {token.dep_: 1},
397
+ 'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1)
398
+ }
399
+
400
+ # Analizar hijos recursivamente
401
  for child in token.children:
402
+ child_result = get_dependency_depths(child, depth + 1, analyzed_tokens)
403
+
404
+ # Combinar profundidades
405
+ current_result['depths'].extend(child_result['depths'])
406
+
407
+ # Combinar relaciones
408
+ for rel, count in child_result['relations'].items():
409
+ current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count
410
+
411
+ # Acumular score de complejidad
412
+ current_result['complexity_score'] += child_result['complexity_score']
413
+
414
+ # Calcular m茅tricas adicionales
415
+ current_result['max_depth'] = max(current_result['depths'])
416
+ current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths'])
417
+ current_result['relation_diversity'] = len(current_result['relations'])
418
+
419
+ # Calcular score ponderado por tipo de estructura
420
+ structure_bonus = 0
421
+
422
+ # Bonus por estructuras complejas
423
+ if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']:
424
+ structure_bonus += 0.3
425
+
426
+ # Bonus por coordinaci贸n balanceada
427
+ if 'conj' in current_result['relations'] and 'cc' in current_result['relations']:
428
+ structure_bonus += 0.2
429
+
430
+ # Bonus por modificaci贸n rica
431
+ if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2:
432
+ structure_bonus += 0.2
433
+
434
+ current_result['final_score'] = (
435
+ current_result['complexity_score'] * (1 + structure_bonus)
436
+ )
437
+
438
+ return current_result
439
 
440
  def normalize_score(value, optimal_value=1.0, range_factor=2.0, optimal_length=None,
441
  optimal_connections=None, optimal_depth=None):