|
import os |
|
from datetime import datetime |
|
import random |
|
from typing import List |
|
import gradio as gr |
|
from datasets import load_dataset, Dataset, DatasetDict |
|
from huggingface_hub import whoami, InferenceClient |
|
import black |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
HF_API_URL = os.getenv("HF_API_URL", "Qwen/Qwen2.5-Coder-32B-Instruct") |
|
client = InferenceClient(api_key=HF_TOKEN) |
|
|
|
|
|
EXAM_MAX_QUESTIONS = int( |
|
os.getenv("EXAM_MAX_QUESTIONS", 5) |
|
) |
|
EXAM_PASSING_SCORE = float(os.getenv("EXAM_PASSING_SCORE", 0.8)) |
|
EXAM_DATASET_ID = "agents-course/smolagents-quiz-data" |
|
|
|
|
|
|
|
ds = load_dataset(EXAM_DATASET_ID, split="train", download_mode="force_redownload") |
|
quiz_data = list(ds) |
|
|
|
if EXAM_MAX_QUESTIONS: |
|
quiz_data = quiz_data[:EXAM_MAX_QUESTIONS] |
|
|
|
|
|
HAS_IMAGE_FEATURE = "image" in ds.features |
|
|
|
|
|
def format_python_code(code: str) -> str: |
|
"""Format Python code using black.""" |
|
try: |
|
return black.format_str(code, mode=black.Mode()) |
|
except Exception as e: |
|
gr.Warning(f"Code formatting failed: {str(e)}") |
|
return code |
|
|
|
|
|
def check_code( |
|
user_code: str, solution: str, challenge: str, assessment_criteria: List[str] |
|
): |
|
""" |
|
Use LLM to evaluate if the user's code solution is correct. |
|
Returns True if the solution is correct, False otherwise. |
|
""" |
|
|
|
formatted_user_code = format_python_code(user_code) |
|
formatted_solution = format_python_code(solution) |
|
|
|
assessment_criteria_str = "\n".join( |
|
[f"{i + 1}. {c}" for i, c in enumerate(assessment_criteria)] |
|
) |
|
|
|
prompt = f"""You are an expert Python programming instructor evaluating a student's code solution. |
|
|
|
Challenge: |
|
{challenge} |
|
|
|
Reference Solution: |
|
{formatted_solution} |
|
|
|
Student's Solution: |
|
{formatted_user_code} |
|
|
|
Assessment Criteria: |
|
{assessment_criteria_str} |
|
|
|
Evaluate if the student's solution is functionally equivalent to the reference solution. |
|
Consider: |
|
1. Does it solve the problem correctly? |
|
2. Does it handle edge cases appropriately? |
|
3. Does it follow the requirements of the challenge? |
|
4. Does it meet the assessment criteria? |
|
|
|
Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation. |
|
""" |
|
|
|
messages = [{"role": "user", "content": prompt}] |
|
|
|
try: |
|
completion = client.chat.completions.create( |
|
model=HF_API_URL, |
|
messages=messages, |
|
max_tokens=500, |
|
) |
|
|
|
response = completion.choices[0].message.content.strip() |
|
|
|
|
|
is_correct = response.upper().startswith("CORRECT") |
|
|
|
|
|
explanation = response.lower().split("correct")[1] |
|
status = "✅ Correct!" if is_correct else "❌ Incorrect!" + f"\n\n{explanation}" |
|
gr.Info(f"{status}\n\n{explanation}") |
|
|
|
return is_correct |
|
|
|
except Exception as e: |
|
gr.Warning(f"Error checking code: {str(e)}") |
|
|
|
status = ( |
|
"✅ Correct!" if is_correct else "❌ Incorrect!" + f"\n\nError: {str(e)}" |
|
) |
|
gr.Info(f"{status} (Fallback comparison)") |
|
return is_correct |
|
|
|
|
|
def on_user_logged_in(token: gr.OAuthToken | None): |
|
""" |
|
Handle user login state. |
|
On a valid token, hide the login button and reveal the Start button while keeping Next and Submit hidden. |
|
Also, clear the question text, code input, status, and image. |
|
""" |
|
if token is not None: |
|
return ( |
|
gr.update(visible=False), |
|
gr.update(visible=True), |
|
gr.update(visible=False), |
|
gr.update(visible=False), |
|
"", |
|
gr.update(value="", visible=False), |
|
"", |
|
gr.update(value="", visible=False), |
|
) |
|
else: |
|
return ( |
|
gr.update(visible=True), |
|
gr.update(visible=False), |
|
gr.update(visible=False), |
|
gr.update(visible=False), |
|
"", |
|
gr.update(value="", visible=False), |
|
"", |
|
gr.update(value="", visible=False), |
|
) |
|
|
|
|
|
def push_results_to_hub( |
|
user_answers: list, token: gr.OAuthToken | None, signed_in_message: str |
|
): |
|
"""Push results to Hugging Face Hub.""" |
|
|
|
print(f"signed_in_message: {signed_in_message}") |
|
|
|
if not user_answers: |
|
gr.Warning("No answers to submit!") |
|
return "No answers to submit!" |
|
|
|
if token is None: |
|
gr.Warning("Please log in to Hugging Face before pushing!") |
|
return "Please log in to Hugging Face before pushing!" |
|
|
|
|
|
correct_count = sum(1 for answer in user_answers if answer["is_correct"]) |
|
total_questions = len(user_answers) |
|
grade = correct_count / total_questions if total_questions > 0 else 0 |
|
|
|
if grade < float(EXAM_PASSING_SCORE): |
|
gr.Warning( |
|
f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}" |
|
) |
|
return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}" |
|
|
|
gr.Info("Submitting answers to the Hub. Please wait...", duration=2) |
|
|
|
user_info = whoami(token=token.token) |
|
username = user_info["name"] |
|
repo_id = f"{EXAM_DATASET_ID}_responses" |
|
submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
|
|
submission_data = [ |
|
{ |
|
"username": username, |
|
"datetime": submission_time, |
|
"grade": grade, |
|
**answer, |
|
} |
|
for answer in user_answers |
|
] |
|
|
|
try: |
|
|
|
existing_ds = load_dataset(repo_id) |
|
|
|
if not isinstance(existing_ds, dict): |
|
existing_ds = DatasetDict({"default": existing_ds}) |
|
except Exception: |
|
|
|
existing_ds = DatasetDict() |
|
|
|
|
|
new_ds = Dataset.from_list(submission_data) |
|
|
|
|
|
existing_ds[username] = new_ds |
|
|
|
|
|
existing_ds.push_to_hub( |
|
repo_id, |
|
private=True, |
|
) |
|
|
|
return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}" |
|
|
|
|
|
def handle_quiz(question_idx, user_answers, submitted_code, is_start): |
|
"""Handle quiz state and progression""" |
|
|
|
start_btn_update = gr.update(visible=False) if is_start else None |
|
|
|
|
|
if is_start: |
|
question_idx = 0 |
|
else: |
|
|
|
if ( |
|
question_idx < len(quiz_data) and submitted_code.strip() |
|
): |
|
current_q = quiz_data[question_idx] |
|
|
|
formatted_code = format_python_code(submitted_code) |
|
is_correct = check_code( |
|
formatted_code, |
|
current_q["solution"], |
|
current_q["challenge"], |
|
current_q["assessment_criteria"], |
|
) |
|
user_answers.append( |
|
{ |
|
"challenge": current_q["challenge"], |
|
"submitted_code": formatted_code, |
|
"correct_solution": current_q["solution"], |
|
"assessment_criteria": current_q["assessment_criteria"], |
|
"is_correct": is_correct, |
|
} |
|
) |
|
question_idx += 1 |
|
|
|
|
|
if question_idx >= len(quiz_data): |
|
correct_count = sum(1 for answer in user_answers if answer["is_correct"]) |
|
grade = correct_count / len(user_answers) |
|
results_text = ( |
|
f"**Quiz Complete!**\n\n" |
|
f"Your score: {grade:.1%}\n" |
|
f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n" |
|
f"Your answers:\n\n" |
|
) |
|
for idx, answer in enumerate(user_answers): |
|
results_text += ( |
|
f"Question {idx + 1}: {'✅' if answer['is_correct'] else '❌'}\n" |
|
) |
|
|
|
return ( |
|
"", |
|
gr.update(value="", visible=False), |
|
f"{'✅ Passed!' if grade >= EXAM_PASSING_SCORE else '❌ Did not pass'}", |
|
question_idx, |
|
user_answers, |
|
gr.update(visible=False), |
|
gr.update(visible=False), |
|
gr.update(visible=True), |
|
gr.update(value=results_text, visible=True), |
|
gr.update(visible=False), |
|
) |
|
else: |
|
|
|
q = quiz_data[question_idx] |
|
challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}" |
|
|
|
|
|
show_image = HAS_IMAGE_FEATURE and q.get("image") is not None |
|
image_update = gr.update( |
|
value=q.get("image") if show_image else None, visible=show_image |
|
) |
|
|
|
return ( |
|
challenge_text, |
|
gr.update(value=q["placeholder"], visible=True), |
|
"Submit your code solution and click 'Next' to continue.", |
|
question_idx, |
|
user_answers, |
|
gr.update(visible=False), |
|
gr.update(visible=True), |
|
gr.update(visible=False), |
|
gr.update(visible=False), |
|
image_update, |
|
) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
demo.title = f"Coding Quiz: {EXAM_DATASET_ID}" |
|
|
|
question_idx = gr.State(value=0) |
|
user_answers = gr.State(value=[]) |
|
|
|
with gr.Row(variant="compact"): |
|
gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz") |
|
with gr.Row(variant="compact"): |
|
gr.Markdown( |
|
"Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', " |
|
"and finally click 'Submit' to publish your results to the Hugging Face Hub." |
|
) |
|
|
|
with gr.Row(variant="panel"): |
|
with gr.Column(): |
|
question_text = gr.Markdown("") |
|
question_image = gr.Image( |
|
label="Question Image", |
|
visible=True if HAS_IMAGE_FEATURE else False, |
|
type="pil", |
|
) |
|
with gr.Column(): |
|
code_input = gr.Code( |
|
language="python", label="Your Solution", visible=False |
|
) |
|
|
|
with gr.Row(variant="compact"): |
|
status_text = gr.Markdown("") |
|
|
|
with gr.Row(variant="compact"): |
|
login_btn = gr.LoginButton() |
|
start_btn = gr.Button("Start") |
|
next_btn = gr.Button("Next ⏭️", visible=False) |
|
submit_btn = gr.Button("Submit ✅", visible=False) |
|
|
|
with gr.Row(variant="compact"): |
|
final_markdown = gr.Markdown("", visible=False) |
|
|
|
login_btn.click( |
|
fn=on_user_logged_in, |
|
inputs=None, |
|
outputs=[ |
|
login_btn, |
|
start_btn, |
|
next_btn, |
|
submit_btn, |
|
question_text, |
|
code_input, |
|
status_text, |
|
question_image, |
|
], |
|
) |
|
|
|
start_btn.click( |
|
fn=handle_quiz, |
|
inputs=[question_idx, user_answers, code_input, gr.State(True)], |
|
outputs=[ |
|
question_text, |
|
code_input, |
|
status_text, |
|
question_idx, |
|
user_answers, |
|
start_btn, |
|
next_btn, |
|
submit_btn, |
|
final_markdown, |
|
question_image, |
|
], |
|
) |
|
|
|
next_btn.click( |
|
fn=handle_quiz, |
|
inputs=[question_idx, user_answers, code_input, gr.State(False)], |
|
outputs=[ |
|
question_text, |
|
code_input, |
|
status_text, |
|
question_idx, |
|
user_answers, |
|
start_btn, |
|
next_btn, |
|
submit_btn, |
|
final_markdown, |
|
question_image, |
|
], |
|
) |
|
|
|
submit_btn.click( |
|
fn=push_results_to_hub, |
|
inputs=[user_answers, login_btn], |
|
outputs=status_text, |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|