Spaces:

xhiroga
/

llm-exercise-report

Sleeping

File size: 5,409 Bytes

9ec8a57

import json

import google.generativeai as genai


PREREQUISITE_PROMPT = """\
あなたは採点者です。

問題, 採点基準, 回答 が与えられます。
回答を1,2,3,4,5の5段階で採点し、数字のみを出力してください。

# 採点基準
基本的な採点基準
- 1点: 誤っている、 指示に従えていない
- 2点: 誤っているが、方向性は合っている
- 3点: 部分的に誤っている、 部分的に合っている
- 4点: 合っている
- 5点: 役に立つ

基本的な減点項目
- 不自然な日本語: -1点
- 部分的に事実と異なる内容を述べている: -1点
"""


def evaluation_prompt(
    input: str, output: str, eval_aspect: str | None, target: str | None
) -> str:
    return f"""\
回答を1,2,3,4,5の5段階で採点し、数字のみを出力してください。

# 問題: {input}

{f"# 正解例: {target}" if target is not None else ""}

{f"# 採点基準: {eval_aspect}" if eval_aspect is not None else ""}

# 回答: {output}
"""


def evaluate(results: list[dict], api_key=str, batch_size: int = 10) -> list[dict]:
    genai.configure(api_key=api_key)

    model = genai.GenerativeModel("gemini-1.5-pro-latest")

    evaluations = []
    for i in range(0, len(results), batch_size):
        batch_results = results[i : i + batch_size]

        prompts = [
            evaluation_prompt(
                result["input"],
                result["output"],
                result.get("eval_aspect"),
                result.get("target"),
            )
            for result in batch_results
        ]

        response = model.generate_content(
            [PREREQUISITE_PROMPT] + prompts,
            generation_config=genai.GenerationConfig(
                response_mime_type="application/json", response_schema=list[int]
            ),
        )
        scores = json.loads(response.parts[0].text)

        for result, score in zip(batch_results, scores):
            evaluations.append(
                {
                    **result,
                    "score": score,
                }
            )

    return evaluations


def report(tasks: list[dict]) -> str:
    return (
        """\
<!DOCTYPE html>
<html lang="ja">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>レポート</title>
    <style>
        body {
            background-color: #f8f9fa;
        }
        .container {
            width: 80%; /* 可変幅 */
            margin: 20px auto;
            background-color: #ffffff;
            border-radius: 8px;
        }
        .divider {
            position: relative;
            padding: 16px 0;
            align-items: center;
            justify-content: center;
        }
        .divider .line {
            height: 1px;
            background-color: #ddd;
        }
        .divider .taskId {
            position: absolute;
            margin: -8px;
            left: 50%;
            transform: translateX(-50%);
            padding: 0 10px;
            font-size: 14px;
            font-weight: 900;
            text-align: center;
            border: 1px solid #ddd;
            border-radius: 9999px;
            background-color: #ffffff;
            white-space: nowrap;
        }
        .message {
            padding: 8px;
        }
        .content {
            font-size: 14px;
            font-weight: 400;
        }
        .from {
            font-size: 14px;
            font-weight: 900;
        }
    </style>
</head>
<body>
    <div class="container" id="container"></div>
    <script>
        const messages = """
        + str(tasks)
        + """;

        // taskId: number
        const createDivider = (taskId) => {
            const divider = document.createElement('div');
            divider.classList.add('divider');

            const line = document.createElement('div');
            line.classList.add('line');

            const taskIdLabel = document.createElement('div');
            taskIdLabel.classList.add('taskId');
            taskIdLabel.textContent = `Task ${taskId}`;

            divider.appendChild(line);
            divider.appendChild(taskIdLabel);

            return divider;
        };

        // task: HTMLDivElement, from: 'input' | 'output' | str, text: string
        // return: HTMLDivElement
        const createMessage = (text, name) => {
            const message = document.createElement('div');
            message.classList.add('message');

            const from = document.createElement('div');
            from.classList.add('from');
            from.textContent = name;

            const content = document.createElement('div');
            content.classList.add('content');
            content.textContent = text;

            message.appendChild(from);
            message.appendChild(content);

            return message;
        };

        const container = document.getElementById('container');

        messages.forEach((message) => {
            const task = document.createElement('div');
            task.classList.add('task');

            task.appendChild(createDivider(message.task_id));
            task.appendChild(createMessage(message.input, 'input'));
            task.appendChild(createMessage(message.output, 'output'));

            container.appendChild(task);
        });
    </script>
</body>
</html>
"""
    )