AIdeaText commited on
Commit
5804eec
verified
1 Parent(s): 0a86702

Update modules/studentact/current_situation_analysis.py

Browse files
modules/studentact/current_situation_analysis.py CHANGED
@@ -12,101 +12,77 @@ import logging
12
 
13
  logger = logging.getLogger(__name__)
14
 
 
15
  def analyze_text_dimensions(doc):
16
  """
17
  Analiza las dimensiones principales del texto.
18
-
19
- Args:
20
- doc: Documento procesado por spaCy
21
-
22
- Returns:
23
- dict: M茅tricas del an谩lisis
24
  """
25
  try:
26
  # An谩lisis de vocabulario
27
- vocab_score = analyze_vocabulary_diversity(doc)
28
- vocab_normalized = normalize_score(
29
- value=vocab_score,
30
- optimal_connections=len(doc) * 0.4 # 40% del total de palabras como conexiones 贸ptimas
31
- )
32
-
33
  # An谩lisis de estructura
34
  struct_score = analyze_structure(doc)
35
- struct_normalized = normalize_score(
36
- value=struct_score,
37
- optimal_length=20 # Longitud 贸ptima promedio de oraci贸n
38
- )
39
-
40
  # An谩lisis de cohesi贸n
41
  cohesion_score = analyze_cohesion(doc)
42
- cohesion_normalized = normalize_score(
43
- value=cohesion_score,
44
- optimal_value=0.7 # 70% de cohesi贸n como valor 贸ptimo
45
- )
46
-
47
  # An谩lisis de claridad
48
- clarity_score = analyze_clarity(doc)
49
- clarity_normalized = normalize_score(
50
- value=clarity_score,
51
- optimal_value=0.8 # 80% de claridad como valor 贸ptimo
52
- )
53
 
54
  return {
55
  'vocabulary': {
56
- 'raw_score': vocab_score,
57
- 'normalized_score': vocab_normalized
58
  },
59
  'structure': {
60
- 'raw_score': struct_score,
61
- 'normalized_score': struct_normalized
62
  },
63
  'cohesion': {
64
- 'raw_score': cohesion_score,
65
- 'normalized_score': cohesion_normalized
66
  },
67
  'clarity': {
68
- 'raw_score': clarity_score,
69
- 'normalized_score': clarity_normalized
70
  }
71
  }
72
 
73
  except Exception as e:
74
  logger.error(f"Error en analyze_text_dimensions: {str(e)}")
75
- raise
 
 
 
 
 
76
 
 
77
  def analyze_clarity(doc):
78
  """
79
- Analiza la claridad del texto considerando m煤ltiples factores:
80
- - Longitud y variaci贸n de oraciones
81
- - Uso de conectores
82
- - Complejidad estructural
83
- - Claridad referencial
84
- - Densidad l茅xica
85
  """
86
  try:
87
  # 1. An谩lisis de oraciones
88
  sentences = list(doc.sents)
89
  if not sentences:
90
- return 0.0
91
 
92
  # Longitud de oraciones
93
  sentence_lengths = [len(sent) for sent in sentences]
94
  avg_length = sum(sentence_lengths) / len(sentences)
95
  length_variation = np.std(sentence_lengths) if len(sentences) > 1 else 0
96
 
97
- # Penalizar oraciones muy cortas o muy largas
98
- length_score = normalize_score(
99
- avg_length,
100
- optimal_length=20, # Longitud 贸ptima
101
- range_factor=1.5 # Factor de tolerancia
102
- )
103
 
104
  # 2. An谩lisis de conectores
105
  connector_count = 0
106
  connector_types = {
107
- 'CCONJ': 0.8, # Coordinantes
108
- 'SCONJ': 1.0, # Subordinantes
109
- 'ADV': 0.6 # Adverbios conectivos
110
  }
111
 
112
  for token in doc:
@@ -121,52 +97,33 @@ def analyze_clarity(doc):
121
  verbs = [token for token in sent if token.pos_ == 'VERB']
122
  clause_count += len(verbs)
123
 
124
- complexity_score = normalize_score(
125
- clause_count / len(sentences),
126
- optimal_value=2.0, # Promedio 贸ptimo de cl谩usulas por oraci贸n
127
- range_factor=1.5
128
- )
129
-
130
- # 4. Claridad referencial
131
- reference_score = analyze_reference_clarity(doc)
132
 
133
- # 5. Densidad l茅xica
134
  content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
135
- function_words = len([token for token in doc if token.pos_ not in ['NOUN', 'VERB', 'ADJ', 'ADV']])
136
  density_score = normalize_score(
137
- content_words / (content_words + function_words) if (content_words + function_words) > 0 else 0,
138
- optimal_value=0.6, # 60% de palabras de contenido es 贸ptimo
139
- range_factor=1.5
140
  )
141
 
142
- # Pesos para cada factor
143
- weights = {
144
- 'length': 0.2,
145
- 'connectors': 0.2,
146
- 'complexity': 0.2,
147
- 'reference': 0.2,
148
- 'density': 0.2
149
- }
150
-
151
- # C谩lculo del score final ponderado
152
  clarity_score = (
153
- weights['length'] * length_score +
154
- weights['connectors'] * connector_score +
155
- weights['complexity'] * complexity_score +
156
- weights['reference'] * reference_score +
157
- weights['density'] * density_score
158
  )
159
 
160
- # Informaci贸n detallada para diagn贸stico
161
  details = {
162
  'length_score': length_score,
163
  'connector_score': connector_score,
164
  'complexity_score': complexity_score,
165
- 'reference_score': reference_score,
166
  'density_score': density_score,
167
  'avg_sentence_length': avg_length,
168
  'length_variation': length_variation,
169
- 'connectors_per_sentence': connector_count / len(sentences)
170
  }
171
 
172
  return clarity_score, details
 
12
 
13
  logger = logging.getLogger(__name__)
14
 
15
+ ###################################################################
16
  def analyze_text_dimensions(doc):
17
  """
18
  Analiza las dimensiones principales del texto.
 
 
 
 
 
 
19
  """
20
  try:
21
  # An谩lisis de vocabulario
22
+ vocab_score, vocab_details = analyze_vocabulary_diversity(doc)
23
+
 
 
 
 
24
  # An谩lisis de estructura
25
  struct_score = analyze_structure(doc)
26
+
 
 
 
 
27
  # An谩lisis de cohesi贸n
28
  cohesion_score = analyze_cohesion(doc)
29
+
 
 
 
 
30
  # An谩lisis de claridad
31
+ clarity_score, clarity_details = analyze_clarity(doc)
 
 
 
 
32
 
33
  return {
34
  'vocabulary': {
35
+ 'normalized_score': vocab_score,
36
+ 'details': vocab_details
37
  },
38
  'structure': {
39
+ 'normalized_score': struct_score,
40
+ 'details': None # Por ahora no tiene detalles
41
  },
42
  'cohesion': {
43
+ 'normalized_score': cohesion_score,
44
+ 'details': None # Por ahora no tiene detalles
45
  },
46
  'clarity': {
47
+ 'normalized_score': clarity_score,
48
+ 'details': clarity_details
49
  }
50
  }
51
 
52
  except Exception as e:
53
  logger.error(f"Error en analyze_text_dimensions: {str(e)}")
54
+ return {
55
+ 'vocabulary': {'normalized_score': 0.0, 'details': {}},
56
+ 'structure': {'normalized_score': 0.0, 'details': {}},
57
+ 'cohesion': {'normalized_score': 0.0, 'details': {}},
58
+ 'clarity': {'normalized_score': 0.0, 'details': {}}
59
+ }
60
 
61
+ ####################################################################
62
  def analyze_clarity(doc):
63
  """
64
+ Analiza la claridad del texto considerando m煤ltiples factores.
 
 
 
 
 
65
  """
66
  try:
67
  # 1. An谩lisis de oraciones
68
  sentences = list(doc.sents)
69
  if not sentences:
70
+ return 0.0, {}
71
 
72
  # Longitud de oraciones
73
  sentence_lengths = [len(sent) for sent in sentences]
74
  avg_length = sum(sentence_lengths) / len(sentences)
75
  length_variation = np.std(sentence_lengths) if len(sentences) > 1 else 0
76
 
77
+ # Normalizar longitud
78
+ length_score = normalize_score(avg_length, optimal_length=20)
 
 
 
 
79
 
80
  # 2. An谩lisis de conectores
81
  connector_count = 0
82
  connector_types = {
83
+ 'CCONJ': 0.8,
84
+ 'SCONJ': 1.0,
85
+ 'ADV': 0.6
86
  }
87
 
88
  for token in doc:
 
97
  verbs = [token for token in sent if token.pos_ == 'VERB']
98
  clause_count += len(verbs)
99
 
100
+ complexity_raw = clause_count / len(sentences) if len(sentences) > 0 else 0
101
+ complexity_score = normalize_score(complexity_raw, optimal_value=2.0)
 
 
 
 
 
 
102
 
103
+ # 4. Densidad l茅xica
104
  content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
105
+ total_words = len([token for token in doc])
106
  density_score = normalize_score(
107
+ content_words / total_words if total_words > 0 else 0,
108
+ optimal_value=0.6
 
109
  )
110
 
111
+ # C谩lculo del score final
 
 
 
 
 
 
 
 
 
112
  clarity_score = (
113
+ 0.3 * length_score +
114
+ 0.3 * connector_score +
115
+ 0.2 * complexity_score +
116
+ 0.2 * density_score
 
117
  )
118
 
 
119
  details = {
120
  'length_score': length_score,
121
  'connector_score': connector_score,
122
  'complexity_score': complexity_score,
 
123
  'density_score': density_score,
124
  'avg_sentence_length': avg_length,
125
  'length_variation': length_variation,
126
+ 'connectors_per_sentence': connector_count / len(sentences) if len(sentences) > 0 else 0
127
  }
128
 
129
  return clarity_score, details