Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import ast | |
import astor | |
import traceback | |
# Initialize Hugging Face model and tokenizer | |
MODEL_NAME = "microsoft/codebert-base" | |
# Load the pre-trained CodeBERT model for understanding code | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
# Helper function to analyze code | |
def analyze_code(code): | |
# Split the code into manageable chunks | |
max_length = 512 | |
lines = code.split("\n") | |
chunks = ["\n".join(lines[i:i+max_length]) for i in range(0, len(lines), max_length)] | |
results = [] | |
for chunk in chunks: | |
tokenized_code = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_length) | |
outputs = model(**tokenized_code) | |
logits = outputs.logits | |
results.append(logits.argmax(dim=1).item()) | |
return results | |
# Function to detect and fix bugs, including logical errors | |
def detect_and_fix_bugs(code): | |
suggestions = [] | |
fixed_code = code | |
try: | |
tree = ast.parse(code) | |
# Detect undefined variable usage | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load): | |
if node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}: | |
suggestions.append(f"Variable '{node.id}' is used but not defined.") | |
undefined_variables = [ | |
node.id for node in ast.walk(tree) | |
if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load) and | |
node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)} | |
] | |
for var in undefined_variables: | |
fix_statement = ast.Assign(targets=[ast.Name(id=var, ctx=ast.Store())], value=ast.Constant(value=None)) | |
tree.body.insert(0, fix_statement) | |
suggestions.append(f"Added a definition for variable '{var}'.") | |
# Detect unused variables | |
assigned_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)} | |
used_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Load)} | |
unused_vars = assigned_vars - used_vars | |
for var in unused_vars: | |
suggestions.append(f"Variable '{var}' is defined but never used.") | |
# Detect missing imports | |
import_names = {n.name for n in ast.walk(tree) if isinstance(n, ast.Import)} | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Call) and hasattr(node.func, 'id') and node.func.id not in import_names: | |
suggestions.append(f"Missing import for '{node.func.id}'.") | |
# Detect invalid function calls | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Call): | |
if isinstance(node.func, ast.Name): | |
if not any(isinstance(n, ast.FunctionDef) and n.name == node.func.id for n in ast.walk(tree)): | |
suggestions.append(f"Function '{node.func.id}' is called but not defined.") | |
# Detect type mismatches (example: adding string to integer) | |
for node in ast.walk(tree): | |
if isinstance(node, ast.BinOp): | |
left = node.left | |
right = node.right | |
if isinstance(left, ast.Constant) and isinstance(right, ast.Constant): | |
if type(left.value) != type(right.value): | |
suggestions.append(f"Type mismatch in operation: '{left.value}' ({type(left.value).__name__}) and '{right.value}' ({type(right.value).__name__}).") | |
# Detect logical errors (example: unreachable code) | |
for i, node in enumerate(tree.body): | |
if isinstance(node, ast.If): | |
if isinstance(node.test, ast.Constant) and node.test.value is False: | |
suggestions.append(f"Unreachable code detected at line {node.lineno}.") | |
elif isinstance(node.test, ast.Constant) and node.test.value is True: | |
suggestions.append(f"Redundant condition always True at line {node.lineno}.") | |
# Detect duplicate keys in dictionaries | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Dict): | |
keys = [k.value for k in node.keys if isinstance(k, ast.Constant)] | |
if len(keys) != len(set(keys)): | |
suggestions.append("Duplicate keys detected in dictionary.") | |
# Convert the modified AST back to code | |
fixed_code = astor.to_source(tree) | |
except Exception as e: | |
suggestions.append(f"Error analyzing code: {traceback.format_exc()}") | |
return suggestions, fixed_code | |
# Streamlit app UI | |
st.title("Code Quality, Bug Detection, and Auto-Correction Tool") | |
st.markdown("Analyze your code for syntax issues, quality, bugs, logical errors, and get suggested corrections.") | |
# File uploader | |
uploaded_file = st.file_uploader("Upload a Python code file", type=["py"]) | |
# Code snippet input | |
code_snippet = st.text_area("Or paste your code snippet below:") | |
if st.button("Analyze and Fix Code"): | |
if uploaded_file is not None: | |
code = uploaded_file.read().decode("utf-8") | |
elif code_snippet.strip(): | |
code = code_snippet | |
else: | |
st.error("Please upload a file or paste code to analyze.") | |
st.stop() | |
# Perform code analysis and bug fixing | |
st.subheader("Analysis Results") | |
st.write("**Code Quality and Bug Suggestions:**") | |
suggestions, fixed_code = detect_and_fix_bugs(code) | |
if suggestions: | |
for i, suggestion in enumerate(suggestions, 1): | |
st.write(f"{i}. {suggestion}") | |
else: | |
st.write("No major issues detected. Your code looks good!") | |
# Display corrected code | |
st.subheader("Corrected Code:") | |
st.code(fixed_code, language="python") | |
# Simulated CodeBERT analysis (placeholder) | |
st.write("**Model Analysis:**") | |
model_results = analyze_code(code) | |
for idx, result in enumerate(model_results, 1): | |
st.write(f"Chunk {idx} classification result: {result}") | |
st.markdown("---") | |
st.markdown("*Powered by Hugging Face and Streamlit*") | |