Update llm_handler.py
Browse files- llm_handler.py +21 -13
llm_handler.py
CHANGED
@@ -7,16 +7,19 @@ from dotenv import load_dotenv
|
|
7 |
|
8 |
class DDoSInference:
|
9 |
def __init__(self):
|
|
|
|
|
|
|
10 |
load_dotenv()
|
11 |
self.client = InferenceClient(api_key=os.getenv("HF_TOK_KEY"))
|
12 |
self.model = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
13 |
self.dataset_path = Path("~/.dataset/original.csv").expanduser()
|
14 |
-
self.results_path = Path("~/.dataset/
|
15 |
self.results_path.parent.mkdir(parents=True, exist_ok=True)
|
16 |
|
17 |
def process_dataset(self):
|
18 |
"""
|
19 |
-
|
20 |
"""
|
21 |
if not self.dataset_path.exists():
|
22 |
raise FileNotFoundError("The preprocessed dataset file does not exist. Ensure it is generated using the processor.")
|
@@ -30,6 +33,7 @@ class DDoSInference:
|
|
30 |
raise ValueError(f"Label column '{label_column}' not found in the dataset.")
|
31 |
|
32 |
ddos_data_without_label = ddos_data.drop([label_column], axis=1)
|
|
|
33 |
stats = {
|
34 |
'Max': ddos_data_without_label.max(),
|
35 |
'Min': ddos_data_without_label.min(),
|
@@ -42,26 +46,21 @@ class DDoSInference:
|
|
42 |
know_prompt = self.generate_knowledge_prompt(stats)
|
43 |
|
44 |
# Prepare results DataFrame
|
45 |
-
|
46 |
-
predict_df = pd.read_csv(self.results_path)
|
47 |
-
else:
|
48 |
-
predict_df = pd.DataFrame(columns=["index", "attack", "benign", "original"])
|
49 |
|
50 |
start_index = predict_df.shape[0]
|
51 |
print(f"Starting inference from row {start_index}")
|
52 |
|
53 |
-
# Process
|
54 |
for i in range(start_index, ddos_data.shape[0]):
|
55 |
row_prompt = self.generate_row_prompt(ddos_data.iloc[i])
|
56 |
probabilities = self.infer_row(know_prompt, row_prompt)
|
57 |
-
|
58 |
-
|
59 |
-
else
|
60 |
-
predict_df.loc[i] = [i, "None", "None", "No valid response"]
|
61 |
|
62 |
# Save after each row for resilience
|
63 |
predict_df.to_csv(self.results_path, index=False)
|
64 |
-
|
65 |
print(f"Processed row {i}: {predict_df.loc[i].to_dict()}")
|
66 |
|
67 |
print("Inference complete. Results saved at:", self.results_path)
|
@@ -152,8 +151,17 @@ class DDoSInference:
|
|
152 |
except Exception as e:
|
153 |
return f"Error: Unable to process your request due to {e}."
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
# Example usage
|
156 |
if __name__ == "__main__":
|
157 |
handler = DDoSInference()
|
158 |
handler.process_dataset()
|
159 |
-
print("You can now interact with the model for mitigation steps or download the results.")
|
|
|
7 |
|
8 |
class DDoSInference:
|
9 |
def __init__(self):
|
10 |
+
"""
|
11 |
+
Initialize DDoSInference class, set up the API client, and paths for dataset and results.
|
12 |
+
"""
|
13 |
load_dotenv()
|
14 |
self.client = InferenceClient(api_key=os.getenv("HF_TOK_KEY"))
|
15 |
self.model = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
16 |
self.dataset_path = Path("~/.dataset/original.csv").expanduser()
|
17 |
+
self.results_path = Path("~/.dataset/PROBABILITY_OF_EACH_ROW_DDOS_AND_BENIGN.csv").expanduser()
|
18 |
self.results_path.parent.mkdir(parents=True, exist_ok=True)
|
19 |
|
20 |
def process_dataset(self):
|
21 |
"""
|
22 |
+
Process the dataset row by row, performing inference using the LLM for each row.
|
23 |
"""
|
24 |
if not self.dataset_path.exists():
|
25 |
raise FileNotFoundError("The preprocessed dataset file does not exist. Ensure it is generated using the processor.")
|
|
|
33 |
raise ValueError(f"Label column '{label_column}' not found in the dataset.")
|
34 |
|
35 |
ddos_data_without_label = ddos_data.drop([label_column], axis=1)
|
36 |
+
|
37 |
stats = {
|
38 |
'Max': ddos_data_without_label.max(),
|
39 |
'Min': ddos_data_without_label.min(),
|
|
|
46 |
know_prompt = self.generate_knowledge_prompt(stats)
|
47 |
|
48 |
# Prepare results DataFrame
|
49 |
+
predict_df = self.load_or_create_results()
|
|
|
|
|
|
|
50 |
|
51 |
start_index = predict_df.shape[0]
|
52 |
print(f"Starting inference from row {start_index}")
|
53 |
|
54 |
+
# Process each row for inference
|
55 |
for i in range(start_index, ddos_data.shape[0]):
|
56 |
row_prompt = self.generate_row_prompt(ddos_data.iloc[i])
|
57 |
probabilities = self.infer_row(know_prompt, row_prompt)
|
58 |
+
|
59 |
+
# If no valid response, mark as "None"
|
60 |
+
predict_df.loc[i] = [i, *probabilities] if probabilities else [i, "None", "None", "No valid response"]
|
|
|
61 |
|
62 |
# Save after each row for resilience
|
63 |
predict_df.to_csv(self.results_path, index=False)
|
|
|
64 |
print(f"Processed row {i}: {predict_df.loc[i].to_dict()}")
|
65 |
|
66 |
print("Inference complete. Results saved at:", self.results_path)
|
|
|
151 |
except Exception as e:
|
152 |
return f"Error: Unable to process your request due to {e}."
|
153 |
|
154 |
+
def load_or_create_results(self):
|
155 |
+
"""
|
156 |
+
Loads the existing results or creates a new DataFrame if the results file doesn't exist.
|
157 |
+
"""
|
158 |
+
if self.results_path.exists():
|
159 |
+
return pd.read_csv(self.results_path)
|
160 |
+
else:
|
161 |
+
return pd.DataFrame(columns=["index", "attack", "benign", "original"])
|
162 |
+
|
163 |
# Example usage
|
164 |
if __name__ == "__main__":
|
165 |
handler = DDoSInference()
|
166 |
handler.process_dataset()
|
167 |
+
print("You can now interact with the model for mitigation steps or download the results.")
|