Spaces:

yash1506
/

DRLLMTRY

Sleeping

App Files Files Community

yash1506 commited on Dec 8, 2024

Commit

52798db

verified ·

1 Parent(s): 65fc544

Update llm_handler.py

Browse files

Files changed (1) hide show

llm_handler.py +21 -13

llm_handler.py CHANGED Viewed

@@ -7,16 +7,19 @@ from dotenv import load_dotenv
 class DDoSInference:
     def __init__(self):
         load_dotenv()
         self.client = InferenceClient(api_key=os.getenv("HF_TOK_KEY"))
         self.model = "Qwen/Qwen2.5-Coder-32B-Instruct"
         self.dataset_path = Path("~/.dataset/original.csv").expanduser()
-        self.results_path = Path("~/.dataset/PROBABILITY_OF_EACH_ROW_DDOS_AND_BENGNIN.csv").expanduser()
         self.results_path.parent.mkdir(parents=True, exist_ok=True)
     def process_dataset(self):
         """
-        Processes the dataset row by row and performs inference using the LLM.
         """
         if not self.dataset_path.exists():
             raise FileNotFoundError("The preprocessed dataset file does not exist. Ensure it is generated using the processor.")
@@ -30,6 +33,7 @@ class DDoSInference:
                 raise ValueError(f"Label column '{label_column}' not found in the dataset.")
         ddos_data_without_label = ddos_data.drop([label_column], axis=1)
         stats = {
             'Max': ddos_data_without_label.max(),
             'Min': ddos_data_without_label.min(),
@@ -42,26 +46,21 @@ class DDoSInference:
         know_prompt = self.generate_knowledge_prompt(stats)
         # Prepare results DataFrame
-        if self.results_path.exists():
-            predict_df = pd.read_csv(self.results_path)
-        else:
-            predict_df = pd.DataFrame(columns=["index", "attack", "benign", "original"])
         start_index = predict_df.shape[0]
         print(f"Starting inference from row {start_index}")
-        # Process rows for inference
         for i in range(start_index, ddos_data.shape[0]):
             row_prompt = self.generate_row_prompt(ddos_data.iloc[i])
             probabilities = self.infer_row(know_prompt, row_prompt)
-            if probabilities:
-                predict_df.loc[i] = [i, *probabilities]
-            else:
-                predict_df.loc[i] = [i, "None", "None", "No valid response"]
             # Save after each row for resilience
             predict_df.to_csv(self.results_path, index=False)
             print(f"Processed row {i}: {predict_df.loc[i].to_dict()}")
         print("Inference complete. Results saved at:", self.results_path)
@@ -152,8 +151,17 @@ class DDoSInference:
         except Exception as e:
             return f"Error: Unable to process your request due to {e}."
 # Example usage
 if __name__ == "__main__":
     handler = DDoSInference()
     handler.process_dataset()
-    print("You can now interact with the model for mitigation steps or download the results.")

 class DDoSInference:
     def __init__(self):
+        """
+        Initialize DDoSInference class, set up the API client, and paths for dataset and results.
+        """
         load_dotenv()
         self.client = InferenceClient(api_key=os.getenv("HF_TOK_KEY"))
         self.model = "Qwen/Qwen2.5-Coder-32B-Instruct"
         self.dataset_path = Path("~/.dataset/original.csv").expanduser()
+        self.results_path = Path("~/.dataset/PROBABILITY_OF_EACH_ROW_DDOS_AND_BENIGN.csv").expanduser()
         self.results_path.parent.mkdir(parents=True, exist_ok=True)
     def process_dataset(self):
         """
+        Process the dataset row by row, performing inference using the LLM for each row.
         """
         if not self.dataset_path.exists():
             raise FileNotFoundError("The preprocessed dataset file does not exist. Ensure it is generated using the processor.")
                 raise ValueError(f"Label column '{label_column}' not found in the dataset.")
         ddos_data_without_label = ddos_data.drop([label_column], axis=1)
         stats = {
             'Max': ddos_data_without_label.max(),
             'Min': ddos_data_without_label.min(),
         know_prompt = self.generate_knowledge_prompt(stats)
         # Prepare results DataFrame
+        predict_df = self.load_or_create_results()
         start_index = predict_df.shape[0]
         print(f"Starting inference from row {start_index}")
+        # Process each row for inference
         for i in range(start_index, ddos_data.shape[0]):
             row_prompt = self.generate_row_prompt(ddos_data.iloc[i])
             probabilities = self.infer_row(know_prompt, row_prompt)
+            # If no valid response, mark as "None"
+            predict_df.loc[i] = [i, *probabilities] if probabilities else [i, "None", "None", "No valid response"]
             # Save after each row for resilience
             predict_df.to_csv(self.results_path, index=False)
             print(f"Processed row {i}: {predict_df.loc[i].to_dict()}")
         print("Inference complete. Results saved at:", self.results_path)
         except Exception as e:
             return f"Error: Unable to process your request due to {e}."
+    def load_or_create_results(self):
+        """
+        Loads the existing results or creates a new DataFrame if the results file doesn't exist.
+        """
+        if self.results_path.exists():
+            return pd.read_csv(self.results_path)
+        else:
+            return pd.DataFrame(columns=["index", "attack", "benign", "original"])
 # Example usage
 if __name__ == "__main__":
     handler = DDoSInference()
     handler.process_dataset()
+    print("You can now interact with the model for mitigation steps or download the results.")