Spaces:
Running
Running
import re | |
import pandas as pd | |
# Function to parse the log file | |
def parse_log_file(file_path): | |
with open(file_path, 'r') as file: | |
log_entries = file.readlines() | |
log_data = [] | |
for entry in log_entries: | |
timestamp_match = re.match(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})', entry) | |
if timestamp_match: | |
timestamp = timestamp_match.group(1) | |
log_level = re.search(r'- (\w+) -', entry) | |
url_match = re.search(r'HTTP Request: POST (.*?)"', entry) | |
status_code = re.search(r'"HTTP/1.1 (\d{3})', entry) | |
if log_level and url_match and status_code: | |
log_level = log_level.group(1) | |
url = url_match.group(1) | |
status_code = status_code.group(1) | |
if status_code == '429': | |
h1_tag = re.search(r'<h1>(\d+)</h1>', entry) | |
h1_error_code = h1_tag.group(1) if h1_tag else 'N/A' | |
log_data.append({ | |
'Timestamp': timestamp, | |
'Log Level': log_level, | |
'URL': url, | |
'Status Code': status_code, | |
'H1 Error Code': h1_error_code | |
}) | |
return pd.DataFrame(log_data) | |
# Parse the provided log file | |
file_path = 'paste.txt' | |
parsed_log_df = parse_log_file(file_path) | |
# Display the DataFrame | |
parsed_log_df | |