AIdeaText commited on
Commit
f9997f7
verified
1 Parent(s): 7eef08f

Update modules/studentact/current_situation_analysis.py

Browse files
modules/studentact/current_situation_analysis.py CHANGED
@@ -143,41 +143,121 @@ def analyze_clarity(doc):
143
  logger.error(f"Error en analyze_clarity: {str(e)}")
144
  return 0.0, {}
145
 
146
- def analyze_reference_clarity(doc):
 
147
  """
148
- Analiza la claridad de las referencias en el texto
149
  """
150
  try:
151
- # Contar referencias anaf贸ricas
152
- reference_count = 0
153
- unclear_references = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  for token in doc:
156
- # Detectar pronombres y determinantes
157
- if token.pos_ in ['PRON', 'DET']:
158
- reference_count += 1
159
 
160
- # Verificar si tiene antecedente claro
161
- has_antecedent = False
162
- for ancestor in token.ancestors:
163
- if ancestor.pos_ == 'NOUN':
164
- has_antecedent = True
165
- break
166
-
167
- if not has_antecedent:
168
- unclear_references += 1
169
-
170
- # Calcular score
171
- if reference_count == 0:
172
- return 1.0 # No hay referencias = claridad m谩xima
 
 
173
 
174
- clarity = 1.0 - (unclear_references / reference_count)
175
- return max(0.0, min(1.0, clarity))
 
 
 
 
 
 
 
 
 
 
 
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  except Exception as e:
178
- logger.error(f"Error en analyze_reference_clarity: {str(e)}")
179
- return 0.0
180
 
 
181
  def analyze_vocabulary_diversity(doc):
182
  """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
183
  try:
@@ -547,9 +627,6 @@ def normalize_score(value, metric_type,
547
  logger.error(f"Error en normalize_score: {str(e)}")
548
  return 0.0
549
 
550
-
551
-
552
-
553
  ##############################################################
554
 
555
  # Funciones de generaci贸n de gr谩ficos
 
143
  logger.error(f"Error en analyze_clarity: {str(e)}")
144
  return 0.0, {}
145
 
146
+ ###################################################################################3
147
+ def analyze_clarity(doc):
148
  """
149
+ Analiza la claridad del texto considerando m煤ltiples factores.
150
  """
151
  try:
152
+ sentences = list(doc.sents)
153
+ if not sentences:
154
+ return 0.0, {}
155
+
156
+ # 1. Longitud de oraciones
157
+ sentence_lengths = [len(sent) for sent in sentences]
158
+ avg_length = sum(sentence_lengths) / len(sentences)
159
+
160
+ # Normalizar usando los umbrales definidos para clarity
161
+ length_score = normalize_score(
162
+ value=avg_length,
163
+ metric_type='clarity',
164
+ optimal_length=20, # Una oraci贸n ideal tiene ~20 palabras
165
+ min_threshold=0.60, # Consistente con METRIC_THRESHOLDS
166
+ target_threshold=0.75 # Consistente con METRIC_THRESHOLDS
167
+ )
168
+
169
+ # 2. An谩lisis de conectores
170
+ connector_count = 0
171
+ connector_weights = {
172
+ 'CCONJ': 1.0, # Coordinantes
173
+ 'SCONJ': 1.2, # Subordinantes
174
+ 'ADV': 0.8 # Adverbios conectivos
175
+ }
176
 
177
  for token in doc:
178
+ if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']:
179
+ connector_count += connector_weights[token.pos_]
 
180
 
181
+ # Normalizar conectores por oraci贸n
182
+ connectors_per_sentence = connector_count / len(sentences) if sentences else 0
183
+ connector_score = normalize_score(
184
+ value=connectors_per_sentence,
185
+ metric_type='clarity',
186
+ optimal_connections=1.5, # ~1.5 conectores por oraci贸n es 贸ptimo
187
+ min_threshold=0.60,
188
+ target_threshold=0.75
189
+ )
190
+
191
+ # 3. Complejidad estructural
192
+ clause_count = 0
193
+ for sent in sentences:
194
+ verbs = [token for token in sent if token.pos_ == 'VERB']
195
+ clause_count += len(verbs)
196
 
197
+ complexity_raw = clause_count / len(sentences) if sentences else 0
198
+ complexity_score = normalize_score(
199
+ value=complexity_raw,
200
+ metric_type='clarity',
201
+ optimal_depth=2.0, # ~2 cl谩usulas por oraci贸n es 贸ptimo
202
+ min_threshold=0.60,
203
+ target_threshold=0.75
204
+ )
205
+
206
+ # 4. Densidad l茅xica
207
+ content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
208
+ total_words = len([token for token in doc if token.is_alpha])
209
+ density = content_words / total_words if total_words > 0 else 0
210
 
211
+ density_score = normalize_score(
212
+ value=density,
213
+ metric_type='clarity',
214
+ optimal_connections=0.6, # 60% de palabras de contenido es 贸ptimo
215
+ min_threshold=0.60,
216
+ target_threshold=0.75
217
+ )
218
+
219
+ # Score final ponderado
220
+ weights = {
221
+ 'length': 0.3,
222
+ 'connectors': 0.3,
223
+ 'complexity': 0.2,
224
+ 'density': 0.2
225
+ }
226
+
227
+ clarity_score = (
228
+ weights['length'] * length_score +
229
+ weights['connectors'] * connector_score +
230
+ weights['complexity'] * complexity_score +
231
+ weights['density'] * density_score
232
+ )
233
+
234
+ details = {
235
+ 'length_score': length_score,
236
+ 'connector_score': connector_score,
237
+ 'complexity_score': complexity_score,
238
+ 'density_score': density_score,
239
+ 'avg_sentence_length': avg_length,
240
+ 'connectors_per_sentence': connectors_per_sentence,
241
+ 'density': density
242
+ }
243
+
244
+ # Agregar logging para diagn贸stico
245
+ logger.info(f"""
246
+ Scores de Claridad:
247
+ - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras)
248
+ - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oraci贸n)
249
+ - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cl谩usulas)
250
+ - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido)
251
+ - Score Final: {clarity_score:.2f}
252
+ """)
253
+
254
+ return clarity_score, details
255
+
256
  except Exception as e:
257
+ logger.error(f"Error en analyze_clarity: {str(e)}")
258
+ return 0.0, {}
259
 
260
+ ##########################################################################3
261
  def analyze_vocabulary_diversity(doc):
262
  """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
263
  try:
 
627
  logger.error(f"Error en normalize_score: {str(e)}")
628
  return 0.0
629
 
 
 
 
630
  ##############################################################
631
 
632
  # Funciones de generaci贸n de gr谩ficos