File size: 15,872 Bytes
06b4380
 
 
 
 
 
 
 
97907b3
 
 
 
 
 
 
 
 
06b4380
97907b3
06b4380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97907b3
06b4380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97907b3
 
 
06b4380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97907b3
 
 
 
 
 
06b4380
 
97907b3
06b4380
 
 
97907b3
06b4380
 
97907b3
 
06b4380
 
 
97907b3
06b4380
97907b3
 
 
06b4380
 
 
97907b3
06b4380
 
97907b3
 
 
 
06b4380
 
97907b3
 
06b4380
 
97907b3
 
 
 
06b4380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12ec080
06b4380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fa2d2e
 
97907b3
06b4380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8fc7e1
 
 
 
 
06b4380
 
 
97907b3
06b4380
 
 
 
 
97907b3
06b4380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
import streamlit as st
import random
import openai
import os
from PIL import Image
from transformers import pipeline


if "model" not in st.session_state:
    st.session_state.model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

if "result" not in st.session_state:
    st.session_state.result = ""
    
if "essay" not in st.session_state:
    st.session_state.essay = ""
        
# 初始化图片描述生成模型
# image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")


# Set OpenAI API key
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
if not OPENAI_API_KEY:
    st.error("OPENAI_API_KEY not set in environment variables!")
    raise SystemExit
openai.api_key = OPENAI_API_KEY


st.set_page_config(layout="wide", initial_sidebar_state="collapsed")


def split_image(image, output_dir='.'):
    # 获取图片尺寸
    width, height = image.size
    
    # 计算每一部分的宽度
    part_width = width // 3
    
    # 存储分割后图片的路径和描述
    results = []
    
    # 分割图片并保存
    for i in range(3):
        left = i * part_width
        right = left + part_width if i != 2 else width
        part_img = image.crop((left, 0, right, height))
        file_path = os.path.join(output_dir, f'part_{i + 1}.png')
        part_img.save(file_path)
        
        # 生成图片描述
        caption = st.session_state.model(part_img)
        results.append((file_path, caption))
    
    return results

def add_numbering_after_dot(input_string):
    sentences = input_string.split(".")
    numbered_sentences = []
    sentence_count = 1

    for sentence in sentences:
        sentence = sentence.strip()
        if sentence:
            numbered_sentences.append(f"{sentence}.【{sentence_count}】")
            sentence_count += 1

    return " ".join(numbered_sentences)

def add_paragraph_numbering(input_text):
    paragraphs = input_text.split("\n\n")
    numbered_text = ""

    for i, paragraph in enumerate(paragraphs, start=1):
        numbered_text += f"{i}. {paragraph}\n\n"

    return numbered_text

def get_completion_from_messages(messages,
                                 model="gpt-3.5-turbo-16k",
                                 temperature=0, max_tokens=1000):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens,
    )
    return response.choices[0].message["content"]

student_essay = '''One Saturday, Jack’s parents were sitting in the sofa and reading a magazic. “
I want to go Sanya on holiday!” said Jack in a cheerful manner. “Let’s go!” 
A few minutes later they arrived to the air plane station. 
Suddenly, the sky turned black and it stared to rain. “Oh, it’s a bad weather to go travelling!’ said Jack. 
One hours later, they arrived the Sanya, 
the sky was sunny and they played on the beach. “What a wonderful day!” said Jack.'''

example = """
student essay: One day, dad was reading a magazine where there have lots of information in it. 
“Would you like to go on holiday in San Ya?” asked dad. He thinks San Ya is a warm city in China,
which is the perfect place to spend holiday. Then, he bought three tickets. 
It rains dogs and cats when they were going to the plane. “What a bad weather!” said Tom. 
One days ago, they arrived at there on Saturday. 
A local person, who is the photographer, helped they took a photo. They had a happy holiday.

image content: a cartoon of a family sitting on a couch reading a book,a cartoon drawing of a group of people climbing up a flight of stairs
,a cartoon drawing of a family standing on the beach

you should return:

Total mark: 15 points
------------------------------------------------------------------------------------
Content

5 points

- Each picture involves content: ✔️
- Each image contains 2-3 sentences; if sub-clauses are used, description can be reduced to 1-2 sentences for each image: ✔️
- Writing is relevant with sufficient information conveyed: ✔️

------------------------------------------------------------------------------------
Organization

5 points

- Structured writing with clear introduction, body, and conclusion. The first two sentences provide clear context regarding time, place, individuals, and actions: ✔️
- Sentences are 90% logically connected: ✔️
- Incorporates 2-3 different linkage techniques such as conjunctions (because, but, so) and adverbs (unexpectedly, fortunately): ✔️
- Uses at least 5 linkage techniques, with a mixture of 2-3 different methods: ✔️

------------------------------------------------------------------------------------
Language 

5 points

Dimension 1: Complexity

- About 20% (15-20%) of words are bi-syllabic or multi-syllabic: ✔️
- Includes 3 words of B1 level or above: ✔️
- Covers 2-3 types of sub-clauses: adverbial and relative: ✔️
- At least 3 compound sentence structures: ✔️

Dimension 2: Diversity

- Diverse word usage without obvious repetitive grammar structures: ✔️

Language Dimension 3: Accuracy

- Word spelling errors (e.g., "the-they") and grammatical errors (e.g., "...Where there were lots of pleasant beaches...; I wanted to travelling to Sanya, Because"): ❌ (Vocabulary richness compensates for this shortcoming)
"""

example_2 = """
student essay:
There is a Jack’s birthday. There are he, his brother, mum and dad. His brother is giving him a ball to football. Next day, Jack and his brother are playing football match. Jack score a goal.

image content: cartoon of a family sitting around a table with a birthday cake,cartoon of a man giving a birthday cake to a woman,
cartoon of a man kicking a soccer ball into a goal

you should return:

Total mark: 9 points
------------------------------------------------------------------------------------
Content 

3 points:

- Each picture involves content: ✔️
- Each picture has at least one sentence: ✔️
- The content is relevant, but the information derived from the image is not exhaustive: ❌

Organization

3 points:

- Complete structure with a clear introduction, development, and conclusion: ✔️
- 50% of the sentences have logical connections: ✔️
- Contains one to two types of cohesive devices, such as pronouns (e.g., he, his brother) and temporal relations (e.g., next day). The number of cohesive devices used ranges between 2-4 for one type: ✔️

Language 

3 points:

*Dimension 1: Complexity*

- Bi-syllabic and multi-syllabic words make up 9%: ✔️
- All words are common, everyday vocabulary: ✔️
- No complex sentences or compound sentences: ❌

*Dimension 2: Diversity*

- Repetition of words: "Brother" appears 3 times, "football" appears 2 times: ❌

*Dimension 3: Accuracy*

- No word spelling mistakes: ✔️
- Did not use past tense: ❌
- Grammatical errors in four places: "There is a Jack’s birthday", "There are he", "to football", "Jack score": ❌
"""

system_message = f'''
                你是一个剑桥KET作文老师。
                请你认真批改学生在【】符号中的作文
                首先你会给出你的评分,在评分文章时,你会从以下方面考虑:
                Content 
                Organization
                Language 
                记住KET的Band Score都是整数
                total = Content Band Score + Organization Band Score + Language Band Score。

                以下是评分的要求:
                Band 5:Content:一个高质量的5级作文要确保每一幅图的内容都被涉及,并且每幅图都至少有2-3个句子描述(如果使用从句,描述可以缩减到1-2句)。整体的写作内容需要与主题紧密相关,并确保信息得到充分的传达。
                Organization:作文需要有一个完整的结构,包括明确的开头、经过和结尾,且第一句应清晰地交代时间、地点、人物和行为。句子之间的逻辑连接需要达到90%以上,使用的衔接手段需要多样,涵盖两到三种不同类型(如连词、指代和特定的副词)。而且,衔接手段的使用不能少于5次,且不能都是同一种手段。
                Language:复杂度:这要求作文中双音节和多音节词的占比为15-20%,至少有3-5个非日常生活词汇,包括2-3种不同类型的从句(如宾语从句、状语从句和定语从句),以及不少于3句的复合句式。
                丰富度:作文中的单词和句子结构应该变化丰富,相同的单词和句式最多只能出现1次(人名除外)。
                准确度:作文中的单词拼写和句式使用错误不能超过3个,并特别注意描述语言应使用过去时态,而对话部分则没有时态限制。

                Band 4:Content:1.每幅图的内容都涉及 2.每幅图至少有1个句子 3.写作内容基本相关,1幅图细节不充分
                Organization:1.	结构完整(明确的开头、经过和结尾)2.句与句之间有70%的衔接逻辑 3.包含两种衔接手段:连词、指代或副词 4.衔接手段数量为3-5个(涵盖两个种类)
                Language:维度1:复杂度 1.双音节、多音节词占比为10-15% 2.有1-2个非日常生活词汇 3.1-2种从句:宾从、状从或定从 4.复合句式2-3句
                维度2:丰富度 相同单词、句式结构最多复现2次
                维度3:准确度 单词拼写错误、句式错误:3-5个(描述语言使用过去时态,对话语言不限时态)

                Band 3:Content:1.每幅图内容基本涉及 2.每幅图至少有1个句子 3.写作内容稍偏离主题,至少有1幅图细节不充分
                Organization:1.结构完整(明确的开头、经过和结尾)2.句与句之间有50%的衔接逻辑 3.包含一到两种衔接手段:连词、指代或副词4.衔接手段数量为2-4个(一个种类)
                Language:维度1:复杂度 1.	双音节、多音节词占比为5-10% 2.单词均为日常生活词汇 3.1种从句:宾从、状从或定从 4.复合句式1句
                维度2:丰富度 相同单词、句式复现2次以上
                维度3:准确度 单词拼写错误、句式错误:6-8个(语言错误少于6个,可不写从句)

                Band 2:Content:1.图片内容没有完全覆盖或图片信息有误 2.某一幅图片1个句子的描述都没有 3.写作内容稍偏离主题,至少2幅图片细节不充分
                Organization:1.	结构不完整(有开头没经过或者有开头没结尾)2.句与句之间有一些逻辑(30%) 3.包含一种衔接手段:连词、指代或副词 4.衔接手段数量为1-2个
                Language:维度1:复杂度 1.双音节、多音节词占比少于5% 2.单词均为日常生活词汇 3.无从句或复合句
                维度2:准确度 单词拼写错误、句式错误:5-8个

                Band 1:Content:1.图片信息缺漏、有误 2.两幅图片1个句子的描述都没有 3.写作内容与图片不相干
                Organization:1.故事结构不完整 2.句与句之间几乎无逻辑(10%) 3.包含一种衔接手段:连词、指代或副词 4.衔接手段数量为1个
                Language:维度1:复杂度 1.双音节、多音节词占比少于3% 2.单词均为日常生活词汇 3.无从句或复合句
                维度2:准确度 单词拼写错误、句式错误:10个以上
     
                Band 0:Content:写作内容与图片毫无关联
                Organization:1.	缺乏故事结构 2.句与句之间毫无逻辑(0%) 3.无衔接手段
                Language:1.	无双音节、多音节单词 2.无完整句型

                把回答都整理成markdown格式。不要给出具体建议和修改文章。
                
                这是一个具体的例子:{example}
                这是另一个例子:{example_2}
                
                '''

st.markdown("""
    <h2 style='color: black;'>
        阅思乐KET作文批改 demo 0.0.2
    </h2>
    """, 
    unsafe_allow_html=True
    )

st.markdown(
    """
<style>
    /* 修改标题的大小 */
    .streamlit-container h1 {
        font-size: 2.5em !important;
    }
</style>
""",
    unsafe_allow_html=True,
)

col1, col2 = st.columns([3,3])

with col1:

    uploaded_image = st.file_uploader("上传作文图片", type=["jpg", "png", "jpeg"])
    if uploaded_image:
        st.image(uploaded_image, caption="已上传的图片。", use_column_width=True)
    st.session_state.essay = st.text_area(
    " 输入你的作文",
    height= 400
    )
    push = False
    if st.button("开始打分"):
        push = True
with col2:
    st.markdown(
    """
    <style>
        /* 创建一个固定大小的容器,并允许滚动 */
        .scrollable-container {
            max-height: 400px;  /* 容器的高度 */
            overflow-y: auto;  /* 允许垂直滚动 */
        }
    </style>
    """,
        unsafe_allow_html=True,
    )

    if student_essay and push and uploaded_image:
        with st.spinner('处理图片...'):
            image = Image.open(uploaded_image).convert("RGB") 
            output_dir = '.'
            st.session_state.result = split_image(image, output_dir)
            
        progress_placeholder = st.empty()
        progress_text = "开始打分"
        my_bar = progress_placeholder.progress(0, text=progress_text)
        query = str(st.session_state.essay)
        user_message = query
        messages =  [
        {'role':'system',
            'content': system_message},
        {'role':'user',
            'content': f"remember your settings student essay:【{query}】, image content:{st.session_state.result}"},]
        response = get_completion_from_messages(messages)
        my_bar.progress(50, text="开始提供相关素材")
        system_message_4 = '''
            你是一个KET老师,根据学生提供的文章,提供一些素材和观点的地道英文表达及中文释义;
            在每句英文的后面的括号里给出中文释义
            并把回答都整理成markdown格式。
            这是一个具体的例子:
            返回:
            ❤ 此类文章可能会用到的高分素材和观点,提供给你参考 ❤
            - it was a scorching summer day.

            - ... along which there were a wide variety of shops.

            - It was terrible to run on such a scorching/burning day.

            - The moment he stopped for a short break, he had sweated heavily. (一般过去时与过去完成时连用)

            - The reason why he went into a convenient store was that he wanted to get something icy to drink.

            - ...sweated heavily; Jimmy asked the shopkeeper politely

            - ...replied Jimmy in a cheerful manner.

            学生:'''
        user_message  = query
        messages =  [
        {'role':'system',
            'content': system_message_4},
        {'role':'user',
            'content': f"请记住你的设定【{query}】"},]
        response3 = get_completion_from_messages(messages)
        my_bar.progress(100, text="打分完成")
        st.markdown('<div class="scrollable-container">', unsafe_allow_html=True)
        with st.container():
            st.markdown(response)
        st.divider() 
        with st.container():
            st.markdown("""
            <h4 style='color: black;'>
                素材建议
            </h4>
            """, 
            unsafe_allow_html=True
            )
            st.write(response3)
        my_bar.empty()
        st.markdown('</div>', unsafe_allow_html=True)