JacobLinCool commited on
Commit
bd35af6
·
1 Parent(s): a5be200

feat: more scoring model

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. README.md +2 -0
  3. app.py +21 -29
  4. model/Engessay_grading_ML.py +37 -0
  5. model/IELTS_essay_scoring.py +39 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ *.pyc
README.md CHANGED
@@ -10,6 +10,8 @@ pinned: false
10
  license: mit
11
  preload_from_hub:
12
  - JacobLinCool/IELTS_essay_scoring_safetensors
 
 
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  license: mit
11
  preload_from_hub:
12
  - JacobLinCool/IELTS_essay_scoring_safetensors
13
+ - KevSun/Engessay_grading_ML
14
+ - chillies/mistral-7b-ielts-evaluator-q4
15
  ---
16
 
17
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,34 +1,24 @@
1
  from typing import *
2
  import gradio as gr
3
- import numpy as np
4
- import spaces
5
- import torch
6
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
7
 
8
- model_name = "JacobLinCool/IELTS_essay_scoring_safetensors"
9
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
10
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
11
 
12
 
13
- @spaces.GPU
14
- def grade(question: str, answer: str) -> Tuple[float, float, float, float, float]:
15
  if len(question) < 30 or len(answer) < 30:
16
  raise gr.Error("Please enter more than 30 characters")
17
 
18
- text = f"{question} {answer}"
 
19
 
20
- inputs = tokenizer(
21
- text, return_tensors="pt", padding=True, truncation=True, max_length=512
22
- )
23
- with torch.no_grad():
24
- outputs = model(**inputs)
25
- predictions = outputs.logits.squeeze()
26
-
27
- predicted_scores = predictions.numpy()
28
- normalized_scores = (predicted_scores / predicted_scores.max()) * 9
29
- rounded_scores = np.round(normalized_scores * 2) / 2
30
-
31
- return tuple(rounded_scores)
32
 
33
 
34
  with gr.Blocks() as app:
@@ -46,18 +36,20 @@ with gr.Blocks() as app:
46
  placeholder="Write your essay here",
47
  lines=10,
48
  )
 
 
 
 
 
49
  btn = gr.Button("Grade Essay", variant="primary")
50
  with gr.Column():
51
- task_achievement = gr.Number(label="Task Achievement")
52
- coherence_cohesion = gr.Number(label="Coherence and Cohesion")
53
- vocabulary = gr.Number(label="Vocabulary")
54
- grammar = gr.Number(label="Grammar")
55
- overall = gr.Number(label="Overall")
56
 
57
  btn.click(
58
  fn=grade,
59
- inputs=[question, essay],
60
- outputs=[task_achievement, coherence_cohesion, vocabulary, grammar, overall],
61
  )
62
 
63
  gr.Examples(
 
1
  from typing import *
2
  import gradio as gr
3
+ from model.IELTS_essay_scoring import grade_IELTS_essay_scoring
4
+ from model.Engessay_grading_ML import grade_Engessay_grading_ML
 
 
5
 
6
+ models = {
7
+ "IELTS_essay_scoring": grade_IELTS_essay_scoring,
8
+ "Engessay_grading_ML": grade_Engessay_grading_ML,
9
+ }
10
 
11
 
12
+ # we don't apply @spaces.GPU to here because some models work fast on CPU
13
+ def grade(question: str, answer: str, model: str) -> Tuple[float, str]:
14
  if len(question) < 30 or len(answer) < 30:
15
  raise gr.Error("Please enter more than 30 characters")
16
 
17
+ if model not in models:
18
+ raise gr.Error(f"Model {model} not found")
19
 
20
+ grader = models[model]
21
+ return grader(question, answer)
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  with gr.Blocks() as app:
 
36
  placeholder="Write your essay here",
37
  lines=10,
38
  )
39
+ model = gr.Radio(
40
+ label="Select the grading model",
41
+ choices=list(models.keys()),
42
+ value=list(models.keys())[0],
43
+ )
44
  btn = gr.Button("Grade Essay", variant="primary")
45
  with gr.Column():
46
+ overall = gr.Number(label="Overall Score")
47
+ comment = gr.Textbox(label="Comment", lines=10)
 
 
 
48
 
49
  btn.click(
50
  fn=grade,
51
+ inputs=[question, essay, model],
52
+ outputs=[overall, comment],
53
  )
54
 
55
  gr.Examples(
model/Engessay_grading_ML.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import *
2
+ import torch
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+
5
+ model_name = "KevSun/Engessay_grading_ML"
6
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+
9
+
10
+ @torch.no_grad()
11
+ def grade_Engessay_grading_ML(question: str, answer: str) -> Tuple[float, str]:
12
+ text = f"{question} {answer}"
13
+
14
+ inputs = tokenizer(text, return_tensors="pt")
15
+
16
+ outputs = model(**inputs)
17
+ predictions = outputs.logits.squeeze()
18
+
19
+ predicted_scores = predictions.numpy()
20
+ scaled_scores = 2.25 * predicted_scores - 1.25
21
+ rounded_scores = [round(score * 2) / 2 for score in scaled_scores]
22
+
23
+ labels = [
24
+ "cohesion",
25
+ "syntax",
26
+ "vocabulary",
27
+ "phraseology",
28
+ "grammar",
29
+ "conventions",
30
+ ]
31
+ overall_score = round(sum(rounded_scores) / len(rounded_scores) * 2) / 2
32
+
33
+ comment = ""
34
+ for label, score in zip(labels, rounded_scores):
35
+ comment += f"{label}: {score}\n"
36
+
37
+ return overall_score, comment
model/IELTS_essay_scoring.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import *
2
+ import torch
3
+ import numpy as np
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
+
6
+ model_name = "JacobLinCool/IELTS_essay_scoring_safetensors"
7
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+
10
+
11
+ @torch.no_grad()
12
+ def grade_IELTS_essay_scoring(question: str, answer: str) -> Tuple[float, str]:
13
+ text = f"{question} {answer}"
14
+
15
+ inputs = tokenizer(
16
+ text, return_tensors="pt", padding=True, truncation=True, max_length=512
17
+ )
18
+
19
+ outputs = model(**inputs)
20
+ predictions = outputs.logits.squeeze()
21
+
22
+ predicted_scores = predictions.numpy()
23
+ normalized_scores = (predicted_scores / predicted_scores.max()) * 9
24
+ rounded_scores = np.round(normalized_scores * 2) / 2
25
+
26
+ labels = [
27
+ "Task Achievement",
28
+ "Coherence and Cohesion",
29
+ "Vocabulary",
30
+ "Grammar",
31
+ "Overall",
32
+ ]
33
+ overall_score = float(rounded_scores[-1])
34
+
35
+ comment = ""
36
+ for label, score in zip(labels, rounded_scores):
37
+ comment += f"{label}: {score}\n"
38
+
39
+ return overall_score, comment