AIdeaText commited on
Commit
4fe0b02
verified
1 Parent(s): 0dfdca3

Update modules/studentact/current_situation_analysis.py

Browse files
modules/studentact/current_situation_analysis.py CHANGED
@@ -75,16 +75,197 @@ def analyze_text_dimensions(doc):
75
  raise
76
 
77
  def analyze_clarity(doc):
78
- """Analiza la claridad basada en longitud de oraciones"""
79
- sentences = list(doc.sents)
80
- avg_length = sum(len(sent) for sent in sentences) / len(sentences)
81
- return normalize_score(avg_length, optimal_length=20)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def analyze_vocabulary_diversity(doc):
84
- """Analiza la diversidad del vocabulario"""
85
- unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
86
- total_words = len([token for token in doc if token.is_alpha])
87
- return len(unique_lemmas) / total_words if total_words > 0 else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def analyze_cohesion(doc):
90
  """Analiza la cohesi贸n textual"""
 
75
  raise
76
 
77
  def analyze_clarity(doc):
78
+ """
79
+ Analiza la claridad del texto considerando m煤ltiples factores:
80
+ - Longitud y variaci贸n de oraciones
81
+ - Uso de conectores
82
+ - Complejidad estructural
83
+ - Claridad referencial
84
+ - Densidad l茅xica
85
+ """
86
+ try:
87
+ # 1. An谩lisis de oraciones
88
+ sentences = list(doc.sents)
89
+ if not sentences:
90
+ return 0.0
91
+
92
+ # Longitud de oraciones
93
+ sentence_lengths = [len(sent) for sent in sentences]
94
+ avg_length = sum(sentence_lengths) / len(sentences)
95
+ length_variation = np.std(sentence_lengths) if len(sentences) > 1 else 0
96
+
97
+ # Penalizar oraciones muy cortas o muy largas
98
+ length_score = normalize_score(
99
+ avg_length,
100
+ optimal_length=20, # Longitud 贸ptima
101
+ range_factor=1.5 # Factor de tolerancia
102
+ )
103
+
104
+ # 2. An谩lisis de conectores
105
+ connector_count = 0
106
+ connector_types = {
107
+ 'CCONJ': 0.8, # Coordinantes
108
+ 'SCONJ': 1.0, # Subordinantes
109
+ 'ADV': 0.6 # Adverbios conectivos
110
+ }
111
+
112
+ for token in doc:
113
+ if token.pos_ in connector_types and token.dep_ in ['cc', 'mark', 'advmod']:
114
+ connector_count += connector_types[token.pos_]
115
+
116
+ connector_score = min(1.0, connector_count / (len(sentences) * 0.8))
117
+
118
+ # 3. Complejidad estructural
119
+ clause_count = 0
120
+ for sent in sentences:
121
+ verbs = [token for token in sent if token.pos_ == 'VERB']
122
+ clause_count += len(verbs)
123
+
124
+ complexity_score = normalize_score(
125
+ clause_count / len(sentences),
126
+ optimal_value=2.0, # Promedio 贸ptimo de cl谩usulas por oraci贸n
127
+ range_factor=1.5
128
+ )
129
+
130
+ # 4. Claridad referencial
131
+ reference_score = analyze_reference_clarity(doc)
132
+
133
+ # 5. Densidad l茅xica
134
+ content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
135
+ function_words = len([token for token in doc if token.pos_ not in ['NOUN', 'VERB', 'ADJ', 'ADV']])
136
+ density_score = normalize_score(
137
+ content_words / (content_words + function_words) if (content_words + function_words) > 0 else 0,
138
+ optimal_value=0.6, # 60% de palabras de contenido es 贸ptimo
139
+ range_factor=1.5
140
+ )
141
+
142
+ # Pesos para cada factor
143
+ weights = {
144
+ 'length': 0.2,
145
+ 'connectors': 0.2,
146
+ 'complexity': 0.2,
147
+ 'reference': 0.2,
148
+ 'density': 0.2
149
+ }
150
+
151
+ # C谩lculo del score final ponderado
152
+ clarity_score = (
153
+ weights['length'] * length_score +
154
+ weights['connectors'] * connector_score +
155
+ weights['complexity'] * complexity_score +
156
+ weights['reference'] * reference_score +
157
+ weights['density'] * density_score
158
+ )
159
+
160
+ # Informaci贸n detallada para diagn贸stico
161
+ details = {
162
+ 'length_score': length_score,
163
+ 'connector_score': connector_score,
164
+ 'complexity_score': complexity_score,
165
+ 'reference_score': reference_score,
166
+ 'density_score': density_score,
167
+ 'avg_sentence_length': avg_length,
168
+ 'length_variation': length_variation,
169
+ 'connectors_per_sentence': connector_count / len(sentences)
170
+ }
171
+
172
+ return clarity_score, details
173
+
174
+ except Exception as e:
175
+ logger.error(f"Error en analyze_clarity: {str(e)}")
176
+ return 0.0, {}
177
+
178
+ def analyze_reference_clarity(doc):
179
+ """
180
+ Analiza la claridad de las referencias en el texto
181
+ """
182
+ try:
183
+ # Contar referencias anaf贸ricas
184
+ reference_count = 0
185
+ unclear_references = 0
186
+
187
+ for token in doc:
188
+ # Detectar pronombres y determinantes
189
+ if token.pos_ in ['PRON', 'DET']:
190
+ reference_count += 1
191
+
192
+ # Verificar si tiene antecedente claro
193
+ has_antecedent = False
194
+ for ancestor in token.ancestors:
195
+ if ancestor.pos_ == 'NOUN':
196
+ has_antecedent = True
197
+ break
198
+
199
+ if not has_antecedent:
200
+ unclear_references += 1
201
+
202
+ # Calcular score
203
+ if reference_count == 0:
204
+ return 1.0 # No hay referencias = claridad m谩xima
205
+
206
+ clarity = 1.0 - (unclear_references / reference_count)
207
+ return max(0.0, min(1.0, clarity))
208
+
209
+ except Exception as e:
210
+ logger.error(f"Error en analyze_reference_clarity: {str(e)}")
211
+ return 0.0
212
 
213
  def analyze_vocabulary_diversity(doc):
214
+ """An谩lisis mejorado de la diversidad y calidad del vocabulario"""
215
+ try:
216
+ # 1. An谩lisis b谩sico de diversidad
217
+ unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
218
+ total_words = len([token for token in doc if token.is_alpha])
219
+ basic_diversity = len(unique_lemmas) / total_words if total_words > 0 else 0
220
+
221
+ # 2. An谩lisis de registro
222
+ academic_words = 0
223
+ narrative_words = 0
224
+ technical_terms = 0
225
+
226
+ # Clasificar palabras por registro
227
+ for token in doc:
228
+ if token.is_alpha:
229
+ # Detectar t茅rminos acad茅micos/t茅cnicos
230
+ if token.pos_ in ['NOUN', 'VERB', 'ADJ']:
231
+ if any(parent.pos_ == 'NOUN' for parent in token.ancestors):
232
+ technical_terms += 1
233
+ # Detectar palabras narrativas
234
+ if token.pos_ in ['VERB', 'ADV'] and token.dep_ in ['ROOT', 'advcl']:
235
+ narrative_words += 1
236
+
237
+ # 3. An谩lisis de complejidad sint谩ctica
238
+ avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))
239
+
240
+ # 4. Calcular score ponderado
241
+ weights = {
242
+ 'diversity': 0.3,
243
+ 'technical': 0.3,
244
+ 'narrative': 0.2,
245
+ 'complexity': 0.2
246
+ }
247
+
248
+ scores = {
249
+ 'diversity': basic_diversity,
250
+ 'technical': technical_terms / total_words if total_words > 0 else 0,
251
+ 'narrative': narrative_words / total_words if total_words > 0 else 0,
252
+ 'complexity': min(1.0, avg_sentence_length / 20) # Normalizado a 20 palabras
253
+ }
254
+
255
+ # Score final ponderado
256
+ final_score = sum(weights[key] * scores[key] for key in weights)
257
+
258
+ # Informaci贸n adicional para diagn贸stico
259
+ details = {
260
+ 'text_type': 'narrative' if scores['narrative'] > scores['technical'] else 'academic',
261
+ 'scores': scores
262
+ }
263
+
264
+ return final_score, details
265
+
266
+ except Exception as e:
267
+ logger.error(f"Error en analyze_vocabulary_diversity: {str(e)}")
268
+ return 0.0, {}
269
 
270
  def analyze_cohesion(doc):
271
  """Analiza la cohesi贸n textual"""