Spaces:

AIdeaText
/

v3

Sleeping

App Files Files Community

AIdeaText commited on Dec 20, 2024

Commit

6fd4adc

verified ·

1 Parent(s): c902818

Update modules/studentact/current_situation_analysis.py

Browse files

Files changed (1) hide show

modules/studentact/current_situation_analysis.py +172 -14

modules/studentact/current_situation_analysis.py CHANGED Viewed

@@ -232,22 +232,81 @@ def analyze_cohesion(doc):
             logger.warning("Texto demasiado corto para análisis de cohesión")
             return 0.0
-        connections = 0
         for i in range(len(sentences)-1):
-            sent1_words = {token.lemma_ for token in sentences[i]}
-            sent2_words = {token.lemma_ for token in sentences[i+1]}
-            connections += len(sent1_words.intersection(sent2_words))
-        # Validar que haya conexiones antes de normalizar
-        if connections == 0:
-            logger.warning("No se encontraron conexiones entre oraciones")
-            return 0.0
-        return normalize_score(connections, optimal_connections=max(5, len(sentences) * 0.2))
     except Exception as e:
         logger.error(f"Error en analyze_cohesion: {str(e)}")
         return 0.0
 def analyze_structure(doc):
     """Analiza la complejidad estructural"""
     try:
@@ -272,12 +331,111 @@ def analyze_structure(doc):
         return 0.0
 # Funciones auxiliares de análisis
-def get_dependency_depths(token, depth=0):
-    """Obtiene las profundidades de dependencia"""
-    depths = [depth]
     for child in token.children:
-        depths.extend(get_dependency_depths(child, depth + 1))
-    return depths
 def normalize_score(value, optimal_value=1.0, range_factor=2.0, optimal_length=None,
                    optimal_connections=None, optimal_depth=None):

             logger.warning("Texto demasiado corto para análisis de cohesión")
             return 0.0
+        # 1. Análisis de conexiones léxicas
+        lexical_connections = 0
+        total_possible_connections = 0
         for i in range(len(sentences)-1):
+            # Obtener lemmas significativos (no stopwords)
+            sent1_words = {token.lemma_ for token in sentences[i]
+                         if token.is_alpha and not token.is_stop}
+            sent2_words = {token.lemma_ for token in sentences[i+1]
+                         if token.is_alpha and not token.is_stop}
+            if sent1_words and sent2_words:  # Verificar que ambos conjuntos no estén vacíos
+                intersection = len(sent1_words.intersection(sent2_words))
+                total_possible = min(len(sent1_words), len(sent2_words))
+                if total_possible > 0:
+                    lexical_score = intersection / total_possible
+                    lexical_connections += lexical_score
+                    total_possible_connections += 1
+        # 2. Análisis de conectores
+        connector_count = 0
+        connector_types = {
+            'CCONJ': 1.0,  # Coordinantes
+            'SCONJ': 1.2,  # Subordinantes
+            'ADV': 0.8     # Adverbios conectivos
+        }
+        for token in doc:
+            if (token.pos_ in connector_types and
+                token.dep_ in ['cc', 'mark', 'advmod'] and
+                not token.is_stop):
+                connector_count += connector_types[token.pos_]
+        # 3. Cálculo de scores normalizados
+        if total_possible_connections > 0:
+            lexical_cohesion = lexical_connections / total_possible_connections
+        else:
+            lexical_cohesion = 0
+        if len(sentences) > 1:
+            connector_cohesion = min(1.0, connector_count / (len(sentences) - 1))
+        else:
+            connector_cohesion = 0
+        # 4. Score final ponderado
+        weights = {
+            'lexical': 0.7,
+            'connectors': 0.3
+        }
+        cohesion_score = (
+            weights['lexical'] * lexical_cohesion +
+            weights['connectors'] * connector_cohesion
+        )
+        # 5. Logging para diagnóstico
+        logger.info(f"""
+            Análisis de Cohesión:
+            - Conexiones léxicas encontradas: {lexical_connections}
+            - Conexiones posibles: {total_possible_connections}
+            - Lexical cohesion score: {lexical_cohesion}
+            - Conectores encontrados: {connector_count}
+            - Connector cohesion score: {connector_cohesion}
+            - Score final: {cohesion_score}
+        """)
+        return cohesion_score
     except Exception as e:
         logger.error(f"Error en analyze_cohesion: {str(e)}")
         return 0.0
 def analyze_structure(doc):
     """Analiza la complejidad estructural"""
     try:
         return 0.0
 # Funciones auxiliares de análisis
+def get_dependency_depths(token, depth=0, analyzed_tokens=None):
+    """
+    Analiza la profundidad y calidad de las relaciones de dependencia.
+    Args:
+        token: Token a analizar
+        depth: Profundidad actual en el árbol
+        analyzed_tokens: Set para evitar ciclos en el análisis
+    Returns:
+        dict: Información detallada sobre las dependencias
+            - depths: Lista de profundidades
+            - relations: Diccionario con tipos de relaciones encontradas
+            - complexity_score: Puntuación de complejidad
+    """
+    if analyzed_tokens is None:
+        analyzed_tokens = set()
+    # Evitar ciclos
+    if token.i in analyzed_tokens:
+        return {
+            'depths': [],
+            'relations': {},
+            'complexity_score': 0
+        }
+    analyzed_tokens.add(token.i)
+    # Pesos para diferentes tipos de dependencias
+    dependency_weights = {
+        # Dependencias principales
+        'nsubj': 1.2,    # Sujeto nominal
+        'obj': 1.1,      # Objeto directo
+        'iobj': 1.1,     # Objeto indirecto
+        'ROOT': 1.3,     # Raíz
+        # Modificadores
+        'amod': 0.8,     # Modificador adjetival
+        'advmod': 0.8,   # Modificador adverbial
+        'nmod': 0.9,     # Modificador nominal
+        # Estructuras complejas
+        'csubj': 1.4,    # Cláusula como sujeto
+        'ccomp': 1.3,    # Complemento clausal
+        'xcomp': 1.2,    # Complemento clausal abierto
+        'advcl': 1.2,    # Cláusula adverbial
+        # Coordinación y subordinación
+        'conj': 1.1,     # Conjunción
+        'cc': 0.7,       # Coordinación
+        'mark': 0.8,     # Marcador
+        # Otros
+        'det': 0.5,      # Determinante
+        'case': 0.5,     # Caso
+        'punct': 0.1     # Puntuación
+    }
+    # Inicializar resultados
+    current_result = {
+        'depths': [depth],
+        'relations': {token.dep_: 1},
+        'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1)
+    }
+    # Analizar hijos recursivamente
     for child in token.children:
+        child_result = get_dependency_depths(child, depth + 1, analyzed_tokens)
+        # Combinar profundidades
+        current_result['depths'].extend(child_result['depths'])
+        # Combinar relaciones
+        for rel, count in child_result['relations'].items():
+            current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count
+        # Acumular score de complejidad
+        current_result['complexity_score'] += child_result['complexity_score']
+    # Calcular métricas adicionales
+    current_result['max_depth'] = max(current_result['depths'])
+    current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths'])
+    current_result['relation_diversity'] = len(current_result['relations'])
+    # Calcular score ponderado por tipo de estructura
+    structure_bonus = 0
+    # Bonus por estructuras complejas
+    if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']:
+        structure_bonus += 0.3
+    # Bonus por coordinación balanceada
+    if 'conj' in current_result['relations'] and 'cc' in current_result['relations']:
+        structure_bonus += 0.2
+    # Bonus por modificación rica
+    if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2:
+        structure_bonus += 0.2
+    current_result['final_score'] = (
+        current_result['complexity_score'] * (1 + structure_bonus)
+    )
+    return current_result
 def normalize_score(value, optimal_value=1.0, range_factor=2.0, optimal_length=None,
                    optimal_connections=None, optimal_depth=None):