yash1506 commited on
Commit
52798db
·
verified ·
1 Parent(s): 65fc544

Update llm_handler.py

Browse files
Files changed (1) hide show
  1. llm_handler.py +21 -13
llm_handler.py CHANGED
@@ -7,16 +7,19 @@ from dotenv import load_dotenv
7
 
8
  class DDoSInference:
9
  def __init__(self):
 
 
 
10
  load_dotenv()
11
  self.client = InferenceClient(api_key=os.getenv("HF_TOK_KEY"))
12
  self.model = "Qwen/Qwen2.5-Coder-32B-Instruct"
13
  self.dataset_path = Path("~/.dataset/original.csv").expanduser()
14
- self.results_path = Path("~/.dataset/PROBABILITY_OF_EACH_ROW_DDOS_AND_BENGNIN.csv").expanduser()
15
  self.results_path.parent.mkdir(parents=True, exist_ok=True)
16
 
17
  def process_dataset(self):
18
  """
19
- Processes the dataset row by row and performs inference using the LLM.
20
  """
21
  if not self.dataset_path.exists():
22
  raise FileNotFoundError("The preprocessed dataset file does not exist. Ensure it is generated using the processor.")
@@ -30,6 +33,7 @@ class DDoSInference:
30
  raise ValueError(f"Label column '{label_column}' not found in the dataset.")
31
 
32
  ddos_data_without_label = ddos_data.drop([label_column], axis=1)
 
33
  stats = {
34
  'Max': ddos_data_without_label.max(),
35
  'Min': ddos_data_without_label.min(),
@@ -42,26 +46,21 @@ class DDoSInference:
42
  know_prompt = self.generate_knowledge_prompt(stats)
43
 
44
  # Prepare results DataFrame
45
- if self.results_path.exists():
46
- predict_df = pd.read_csv(self.results_path)
47
- else:
48
- predict_df = pd.DataFrame(columns=["index", "attack", "benign", "original"])
49
 
50
  start_index = predict_df.shape[0]
51
  print(f"Starting inference from row {start_index}")
52
 
53
- # Process rows for inference
54
  for i in range(start_index, ddos_data.shape[0]):
55
  row_prompt = self.generate_row_prompt(ddos_data.iloc[i])
56
  probabilities = self.infer_row(know_prompt, row_prompt)
57
- if probabilities:
58
- predict_df.loc[i] = [i, *probabilities]
59
- else:
60
- predict_df.loc[i] = [i, "None", "None", "No valid response"]
61
 
62
  # Save after each row for resilience
63
  predict_df.to_csv(self.results_path, index=False)
64
-
65
  print(f"Processed row {i}: {predict_df.loc[i].to_dict()}")
66
 
67
  print("Inference complete. Results saved at:", self.results_path)
@@ -152,8 +151,17 @@ class DDoSInference:
152
  except Exception as e:
153
  return f"Error: Unable to process your request due to {e}."
154
 
 
 
 
 
 
 
 
 
 
155
  # Example usage
156
  if __name__ == "__main__":
157
  handler = DDoSInference()
158
  handler.process_dataset()
159
- print("You can now interact with the model for mitigation steps or download the results.")
 
7
 
8
  class DDoSInference:
9
  def __init__(self):
10
+ """
11
+ Initialize DDoSInference class, set up the API client, and paths for dataset and results.
12
+ """
13
  load_dotenv()
14
  self.client = InferenceClient(api_key=os.getenv("HF_TOK_KEY"))
15
  self.model = "Qwen/Qwen2.5-Coder-32B-Instruct"
16
  self.dataset_path = Path("~/.dataset/original.csv").expanduser()
17
+ self.results_path = Path("~/.dataset/PROBABILITY_OF_EACH_ROW_DDOS_AND_BENIGN.csv").expanduser()
18
  self.results_path.parent.mkdir(parents=True, exist_ok=True)
19
 
20
  def process_dataset(self):
21
  """
22
+ Process the dataset row by row, performing inference using the LLM for each row.
23
  """
24
  if not self.dataset_path.exists():
25
  raise FileNotFoundError("The preprocessed dataset file does not exist. Ensure it is generated using the processor.")
 
33
  raise ValueError(f"Label column '{label_column}' not found in the dataset.")
34
 
35
  ddos_data_without_label = ddos_data.drop([label_column], axis=1)
36
+
37
  stats = {
38
  'Max': ddos_data_without_label.max(),
39
  'Min': ddos_data_without_label.min(),
 
46
  know_prompt = self.generate_knowledge_prompt(stats)
47
 
48
  # Prepare results DataFrame
49
+ predict_df = self.load_or_create_results()
 
 
 
50
 
51
  start_index = predict_df.shape[0]
52
  print(f"Starting inference from row {start_index}")
53
 
54
+ # Process each row for inference
55
  for i in range(start_index, ddos_data.shape[0]):
56
  row_prompt = self.generate_row_prompt(ddos_data.iloc[i])
57
  probabilities = self.infer_row(know_prompt, row_prompt)
58
+
59
+ # If no valid response, mark as "None"
60
+ predict_df.loc[i] = [i, *probabilities] if probabilities else [i, "None", "None", "No valid response"]
 
61
 
62
  # Save after each row for resilience
63
  predict_df.to_csv(self.results_path, index=False)
 
64
  print(f"Processed row {i}: {predict_df.loc[i].to_dict()}")
65
 
66
  print("Inference complete. Results saved at:", self.results_path)
 
151
  except Exception as e:
152
  return f"Error: Unable to process your request due to {e}."
153
 
154
+ def load_or_create_results(self):
155
+ """
156
+ Loads the existing results or creates a new DataFrame if the results file doesn't exist.
157
+ """
158
+ if self.results_path.exists():
159
+ return pd.read_csv(self.results_path)
160
+ else:
161
+ return pd.DataFrame(columns=["index", "attack", "benign", "original"])
162
+
163
  # Example usage
164
  if __name__ == "__main__":
165
  handler = DDoSInference()
166
  handler.process_dataset()
167
+ print("You can now interact with the model for mitigation steps or download the results.")