code_reviewer / app.py
mzishan's picture
Update app.py
3116e29 verified
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import ast
import astor
import traceback
# Initialize Hugging Face model and tokenizer
MODEL_NAME = "microsoft/codebert-base"
# Load the pre-trained CodeBERT model for understanding code
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
# Helper function to analyze code
def analyze_code(code):
# Split the code into manageable chunks
max_length = 512
lines = code.split("\n")
chunks = ["\n".join(lines[i:i+max_length]) for i in range(0, len(lines), max_length)]
results = []
for chunk in chunks:
tokenized_code = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_length)
outputs = model(**tokenized_code)
logits = outputs.logits
results.append(logits.argmax(dim=1).item())
return results
# Function to detect and fix bugs, including logical errors
def detect_and_fix_bugs(code):
suggestions = []
fixed_code = code
try:
tree = ast.parse(code)
# Detect undefined variable usage
for node in ast.walk(tree):
if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
if node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}:
suggestions.append(f"Variable '{node.id}' is used but not defined.")
undefined_variables = [
node.id for node in ast.walk(tree)
if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load) and
node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}
]
for var in undefined_variables:
fix_statement = ast.Assign(targets=[ast.Name(id=var, ctx=ast.Store())], value=ast.Constant(value=None))
tree.body.insert(0, fix_statement)
suggestions.append(f"Added a definition for variable '{var}'.")
# Detect unused variables
assigned_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}
used_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Load)}
unused_vars = assigned_vars - used_vars
for var in unused_vars:
suggestions.append(f"Variable '{var}' is defined but never used.")
# Detect missing imports
import_names = {n.name for n in ast.walk(tree) if isinstance(n, ast.Import)}
for node in ast.walk(tree):
if isinstance(node, ast.Call) and hasattr(node.func, 'id') and node.func.id not in import_names:
suggestions.append(f"Missing import for '{node.func.id}'.")
# Detect invalid function calls
for node in ast.walk(tree):
if isinstance(node, ast.Call):
if isinstance(node.func, ast.Name):
if not any(isinstance(n, ast.FunctionDef) and n.name == node.func.id for n in ast.walk(tree)):
suggestions.append(f"Function '{node.func.id}' is called but not defined.")
# Detect type mismatches (example: adding string to integer)
for node in ast.walk(tree):
if isinstance(node, ast.BinOp):
left = node.left
right = node.right
if isinstance(left, ast.Constant) and isinstance(right, ast.Constant):
if type(left.value) != type(right.value):
suggestions.append(f"Type mismatch in operation: '{left.value}' ({type(left.value).__name__}) and '{right.value}' ({type(right.value).__name__}).")
# Detect logical errors (example: unreachable code)
for i, node in enumerate(tree.body):
if isinstance(node, ast.If):
if isinstance(node.test, ast.Constant) and node.test.value is False:
suggestions.append(f"Unreachable code detected at line {node.lineno}.")
elif isinstance(node.test, ast.Constant) and node.test.value is True:
suggestions.append(f"Redundant condition always True at line {node.lineno}.")
# Detect duplicate keys in dictionaries
for node in ast.walk(tree):
if isinstance(node, ast.Dict):
keys = [k.value for k in node.keys if isinstance(k, ast.Constant)]
if len(keys) != len(set(keys)):
suggestions.append("Duplicate keys detected in dictionary.")
# Convert the modified AST back to code
fixed_code = astor.to_source(tree)
except Exception as e:
suggestions.append(f"Error analyzing code: {traceback.format_exc()}")
return suggestions, fixed_code
# Streamlit app UI
st.title("Code Quality, Bug Detection, and Auto-Correction Tool")
st.markdown("Analyze your code for syntax issues, quality, bugs, logical errors, and get suggested corrections.")
# File uploader
uploaded_file = st.file_uploader("Upload a Python code file", type=["py"])
# Code snippet input
code_snippet = st.text_area("Or paste your code snippet below:")
if st.button("Analyze and Fix Code"):
if uploaded_file is not None:
code = uploaded_file.read().decode("utf-8")
elif code_snippet.strip():
code = code_snippet
else:
st.error("Please upload a file or paste code to analyze.")
st.stop()
# Perform code analysis and bug fixing
st.subheader("Analysis Results")
st.write("**Code Quality and Bug Suggestions:**")
suggestions, fixed_code = detect_and_fix_bugs(code)
if suggestions:
for i, suggestion in enumerate(suggestions, 1):
st.write(f"{i}. {suggestion}")
else:
st.write("No major issues detected. Your code looks good!")
# Display corrected code
st.subheader("Corrected Code:")
st.code(fixed_code, language="python")
# Simulated CodeBERT analysis (placeholder)
st.write("**Model Analysis:**")
model_results = analyze_code(code)
for idx, result in enumerate(model_results, 1):
st.write(f"Chunk {idx} classification result: {result}")
st.markdown("---")
st.markdown("*Powered by Hugging Face and Streamlit*")