KET / app.py
xuyingliKepler's picture
Update app.py
959ce7b
raw
history blame
No virus
16.7 kB
import streamlit as st
import random
import openai
import os
from PIL import Image
from transformers import pipeline
if "model" not in st.session_state:
st.session_state.model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
if "result" not in st.session_state:
st.session_state.result = ""
if "essay" not in st.session_state:
st.session_state.essay = ""
# 初始化图片描述生成模型
# image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
# Set OpenAI API key
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
if not OPENAI_API_KEY:
st.error("OPENAI_API_KEY not set in environment variables!")
raise SystemExit
openai.api_key = OPENAI_API_KEY
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
def split_image(image, output_dir='.'):
# 获取图片尺寸
width, height = image.size
# 计算每一部分的宽度
part_width = width // 3
# 存储分割后图片的路径和描述
results = []
# 分割图片并保存
for i in range(3):
left = i * part_width
right = left + part_width if i != 2 else width
part_img = image.crop((left, 0, right, height))
file_path = os.path.join(output_dir, f'part_{i + 1}.png')
part_img.save(file_path)
# 生成图片描述
caption = st.session_state.model(part_img)
results.append((file_path, caption))
return results
def add_numbering_after_dot(input_string):
sentences = input_string.split(".")
numbered_sentences = []
sentence_count = 1
for sentence in sentences:
sentence = sentence.strip()
if sentence:
numbered_sentences.append(f"{sentence}.【{sentence_count}】")
sentence_count += 1
return " ".join(numbered_sentences)
def add_paragraph_numbering(input_text):
paragraphs = input_text.split("\n\n")
numbered_text = ""
for i, paragraph in enumerate(paragraphs, start=1):
numbered_text += f"{i}. {paragraph}\n\n"
return numbered_text
def get_completion_from_messages(messages,
model="gpt-4-1106-preview",
temperature=0, max_tokens=1000):
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
)
return response.choices[0].message["content"]
student_essay = '''One Saturday, Jack’s parents were sitting in the sofa and reading a magazic. “
I want to go Sanya on holiday!” said Jack in a cheerful manner. “Let’s go!”
A few minutes later they arrived to the air plane station.
Suddenly, the sky turned black and it stared to rain. “Oh, it’s a bad weather to go travelling!’ said Jack.
One hours later, they arrived the Sanya,
the sky was sunny and they played on the beach. “What a wonderful day!” said Jack.'''
example = """
student essay: One day, dad was reading a magazine where there have lots of information in it.
“Would you like to go on holiday in San Ya?” asked dad. He thinks San Ya is a warm city in China,
which is the perfect place to spend holiday. Then, he bought three tickets.
It rains dogs and cats when they were going to the plane. “What a bad weather!” said Tom.
One days ago, they arrived at there on Saturday.
A local person, who is the photographer, helped they took a photo. They had a happy holiday.
image content: a cartoon of a family sitting on a couch reading a book,a cartoon drawing of a group of people climbing up a flight of stairs
,a cartoon drawing of a family standing on the beach
you should return:
Total mark: 15 points
------------------------------------------------------------------------------------
Content
5 points
- Each picture involves content: ✔️
- Each image contains 2-3 sentences; if sub-clauses are used, description can be reduced to 1-2 sentences for each image: ✔️
- Writing is relevant with sufficient information conveyed: ✔️
------------------------------------------------------------------------------------
Organization
5 points
- Structured writing with clear introduction, body, and conclusion. The first two sentences provide clear context regarding time, place, individuals, and actions: ✔️
- Sentences are 90% logically connected: ✔️
- Incorporates 2-3 different linkage techniques such as conjunctions (because, but, so) and adverbs (unexpectedly, fortunately): ✔️
- Uses at least 5 linkage techniques, with a mixture of 2-3 different methods: ✔️
------------------------------------------------------------------------------------
Language
5 points
Dimension 1: Complexity
- About 20% (15-20%) of words are bi-syllabic or multi-syllabic: ✔️
- Includes 3 words of B1 level or above: ✔️
- Covers 2-3 types of sub-clauses: adverbial and relative: ✔️
- At least 3 compound sentence structures: ✔️
Dimension 2: Diversity
- Diverse word usage without obvious repetitive grammar structures: ✔️
Language Dimension 3: Accuracy
- Word spelling errors (e.g., "the-they") and grammatical errors (e.g., "...Where there were lots of pleasant beaches...; I wanted to travelling to Sanya, Because"): ❌ (Vocabulary richness compensates for this shortcoming)
"""
example_2 = """
student essay:
There is a Jack’s birthday. There are he, his brother, mum and dad. His brother is giving him a ball to football. Next day, Jack and his brother are playing football match. Jack score a goal.
image content: cartoon of a family sitting around a table with a birthday cake,cartoon of a man giving a birthday cake to a woman,
cartoon of a man kicking a soccer ball into a goal
you should return:
Total mark: 9 points
------------------------------------------------------------------------------------
Content
3 points:
- Each picture involves content: ✔️
- Each picture has at least one sentence: ✔️
- The content is relevant, but the information derived from the image is not exhaustive: ❌
Organization
3 points:
- Complete structure with a clear introduction, development, and conclusion: ✔️
- 50% of the sentences have logical connections: ✔️
- Contains one to two types of cohesive devices, such as pronouns (e.g., he, his brother) and temporal relations (e.g., next day). The number of cohesive devices used ranges between 2-4 for one type: ✔️
Language
3 points:
*Dimension 1: Complexity*
- Bi-syllabic and multi-syllabic words make up 9%: ✔️
- All words are common, everyday vocabulary: ✔️
- No complex sentences or compound sentences: ❌
*Dimension 2: Diversity*
- Repetition of words: "Brother" appears 3 times, "football" appears 2 times: ❌
*Dimension 3: Accuracy*
- No word spelling mistakes: ✔️
- Did not use past tense: ❌
- Grammatical errors in four places: "There is a Jack’s birthday", "There are he", "to football", "Jack score": ❌
"""
system_message = f'''
Role and Goal: Act as a Cambridge KET Writing Teacher, meticulously grading student essays within specified brackets. Focus on assessing Content, Organization, and Language, providing an integer Band Score for each, with the total score being the sum of these three.
Constraints: Avoid giving specific suggestions or rewriting the essay. Instead, provide clear, objective grading based on the provided criteria for Band Scores 0 to 5. Essays should be evaluated for how well they cover the content of each picture, the structure and coherence of the writing, and the complexity, richness, and accuracy of the language used.
Guidelines: Respond in a structured, markdown format. Clearly state the Band Score for each category and the total score. Ensure that the assessment aligns with the criteria for each Band Score.
请你认真批改学生在【】符号中的作文
首先你会给出你的评分,在评分文章时,你会从以下方面考虑:
Content
Organization
Language
记住KET的Band Score都是整数
total = Content Band Score + Organization Band Score + Language Band Score。
以下是评分的要求:
Band 5:Content:一个高质量的5级作文要确保每一幅图的内容都被涉及,并且每幅图都至少有2-3个句子描述(如果使用从句,描述可以缩减到1-2句)。整体的写作内容需要与主题紧密相关,并确保信息得到充分的传达。
Organization:作文需要有一个完整的结构,包括明确的开头、经过和结尾,且第一句应清晰地交代时间、地点、人物和行为。句子之间的逻辑连接需要达到90%以上,使用的衔接手段需要多样,涵盖两到三种不同类型(如连词、指代和特定的副词)。而且,衔接手段的使用不能少于5次,且不能都是同一种手段。
Language:复杂度:这要求作文中双音节和多音节词的占比为15-20%,至少有3-5个非日常生活词汇,包括2-3种不同类型的从句(如宾语从句、状语从句和定语从句),以及不少于3句的复合句式。
丰富度:作文中的单词和句子结构应该变化丰富,相同的单词和句式最多只能出现1次(人名除外)。
准确度:作文中的单词拼写和句式使用错误不能超过3个,并特别注意描述语言应使用过去时态,而对话部分则没有时态限制。
Band 4:Content:1.每幅图的内容都涉及 2.每幅图至少有1个句子 3.写作内容基本相关,1幅图细节不充分
Organization:1. 结构完整(明确的开头、经过和结尾)2.句与句之间有70%的衔接逻辑 3.包含两种衔接手段:连词、指代或副词 4.衔接手段数量为3-5个(涵盖两个种类)
Language:维度1:复杂度 1.双音节、多音节词占比为10-15% 2.有1-2个非日常生活词汇 3.1-2种从句:宾从、状从或定从 4.复合句式2-3句
维度2:丰富度 相同单词、句式结构最多复现2次
维度3:准确度 单词拼写错误、句式错误:3-5个(描述语言使用过去时态,对话语言不限时态)
Band 3:Content:1.每幅图内容基本涉及 2.每幅图至少有1个句子 3.写作内容稍偏离主题,至少有1幅图细节不充分
Organization:1.结构完整(明确的开头、经过和结尾)2.句与句之间有50%的衔接逻辑 3.包含一到两种衔接手段:连词、指代或副词4.衔接手段数量为2-4个(一个种类)
Language:维度1:复杂度 1. 双音节、多音节词占比为5-10% 2.单词均为日常生活词汇 3.1种从句:宾从、状从或定从 4.复合句式1句
维度2:丰富度 相同单词、句式复现2次以上
维度3:准确度 单词拼写错误、句式错误:6-8个(语言错误少于6个,可不写从句)
Band 2:Content:1.图片内容没有完全覆盖或图片信息有误 2.某一幅图片1个句子的描述都没有 3.写作内容稍偏离主题,至少2幅图片细节不充分
Organization:1. 结构不完整(有开头没经过或者有开头没结尾)2.句与句之间有一些逻辑(30%) 3.包含一种衔接手段:连词、指代或副词 4.衔接手段数量为1-2个
Language:维度1:复杂度 1.双音节、多音节词占比少于5% 2.单词均为日常生活词汇 3.无从句或复合句
维度2:准确度 单词拼写错误、句式错误:5-8个
Band 1:Content:1.图片信息缺漏、有误 2.两幅图片1个句子的描述都没有 3.写作内容与图片不相干
Organization:1.故事结构不完整 2.句与句之间几乎无逻辑(10%) 3.包含一种衔接手段:连词、指代或副词 4.衔接手段数量为1个
Language:维度1:复杂度 1.双音节、多音节词占比少于3% 2.单词均为日常生活词汇 3.无从句或复合句
维度2:准确度 单词拼写错误、句式错误:10个以上
Band 0:Content:写作内容与图片毫无关联
Organization:1. 缺乏故事结构 2.句与句之间毫无逻辑(0%) 3.无衔接手段
Language:1. 无双音节、多音节单词 2.无完整句型
把回答都整理成markdown格式。不要给出具体建议和修改文章。
这是一个具体的例子:{example}
这是另一个例子:{example_2}
'''
st.markdown("""
<h2 style='color: black;'>
阅思乐KET作文批改 demo 0.0.2
</h2>
""",
unsafe_allow_html=True
)
st.markdown(
"""
<style>
/* 修改标题的大小 */
.streamlit-container h1 {
font-size: 2.5em !important;
}
</style>
""",
unsafe_allow_html=True,
)
col1, col2 = st.columns([3,3])
with col1:
uploaded_image = st.file_uploader("上传作文图片", type=["jpg", "png", "jpeg"])
if uploaded_image:
st.image(uploaded_image, caption="已上传的图片。", use_column_width=True)
st.session_state.essay = st.text_area(
" 输入你的作文",
height= 400
)
push = False
if st.button("开始打分"):
push = True
with col2:
st.markdown(
"""
<style>
/* 创建一个固定大小的容器,并允许滚动 */
.scrollable-container {
max-height: 400px; /* 容器的高度 */
overflow-y: auto; /* 允许垂直滚动 */
}
</style>
""",
unsafe_allow_html=True,
)
if student_essay and push and uploaded_image:
with st.spinner('处理图片...'):
image = Image.open(uploaded_image).convert("RGB")
output_dir = '.'
st.session_state.result = split_image(image, output_dir)
progress_placeholder = st.empty()
progress_text = "开始打分"
my_bar = progress_placeholder.progress(0, text=progress_text)
query = str(st.session_state.essay)
user_message = query
messages = [
{'role':'system',
'content': system_message},
{'role':'user',
'content': f"remember your settings student essay:【{query}】, image content:{st.session_state.result}"},]
response = get_completion_from_messages(messages)
my_bar.progress(50, text="开始提供相关素材")
system_message_4 = '''
你是一个KET老师,根据学生提供的文章,提供一些素材和观点的地道英文表达及中文释义;
在每句英文的后面的括号里给出中文释义
并把回答都整理成markdown格式。
这是一个具体的例子:
返回:
❤ 此类文章可能会用到的高分素材和观点,提供给你参考 ❤
- it was a scorching summer day.
- ... along which there were a wide variety of shops.
- It was terrible to run on such a scorching/burning day.
- The moment he stopped for a short break, he had sweated heavily. (一般过去时与过去完成时连用)
- The reason why he went into a convenient store was that he wanted to get something icy to drink.
- ...sweated heavily; Jimmy asked the shopkeeper politely
- ...replied Jimmy in a cheerful manner.
学生:'''
user_message = query
messages = [
{'role':'system',
'content': system_message_4},
{'role':'user',
'content': f"请记住你的设定【{query}】"},]
response3 = get_completion_from_messages(messages)
my_bar.progress(100, text="打分完成")
st.markdown('<div class="scrollable-container">', unsafe_allow_html=True)
with st.container():
st.markdown(response)
st.divider()
with st.container():
st.markdown("""
<h4 style='color: black;'>
素材建议
</h4>
""",
unsafe_allow_html=True
)
st.write(response3)
my_bar.empty()
st.markdown('</div>', unsafe_allow_html=True)