{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#!pip install pandas\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load the CSV file into a pandas DataFrame\n", "df = pd.read_csv(filepath_or_buffer=r'C:\\Users\\serban.tica\\Documents\\tobi_llm_intent_recognition\\data\\Pager_Intents.csv', \n", " usecols=[0, 1], names=['utterance', 'intent'])\n", "\n", "# Remove the special character \" from the 'utterance' column\n", "df['utterance'] = df['utterance'].str.replace('\"', '')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
utteranceintent
0utteranceintent
1Pot sa i scriu specialistului de contaccount.Specialist
2Contact specialist de contaccount.Specialist
3Ce numar de telefon are specialistul de contaccount.Specialist
4Ce numar de telefon are specialistul de contaccount.Specialist
\n", "
" ], "text/plain": [ " utterance intent\n", "0 utterance intent\n", "1 Pot sa i scriu specialistului de cont account.Specialist\n", "2 Contact specialist de cont account.Specialist\n", "3 Ce numar de telefon are specialistul de cont account.Specialist\n", "4 Ce numar de telefon are specialistul de cont account.Specialist" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "intent\n", "offer.Request 1084\n", "tshooting 1065\n", "reziliere 829\n", "activate.Service 664\n", "info.req.MyPlan 560\n", "switch.To.Agent 413\n", "invoice.Explanation 318\n", "invoice.PC 298\n", "offer.Request.Existing 278\n", "deactivate.Service 264\n", "info.Dez.Roaming 244\n", "invoice.Details 241\n", "info.req.Service 239\n", "info.req.Cost.Control 230\n", "info.faq.Contact 228\n", "info.faq.Download.DOC 186\n", "invoice.Payment 182\n", "info.reziliere 180\n", "info.faq.IMEI.PUK 178\n", "offer.Request.Tarife 167\n", "info.faq.Roaming.Status 146\n", "offer.Request.Port.In 145\n", "info.faq.Order 136\n", "info.faq.Price.Up 129\n", "byebye 117\n", "info.faq.Recharge 114\n", "invoice.PA 113\n", "info.faq.Revolut 107\n", "info.faq.VFClub 105\n", "info.faq.Deblocare 102\n", "info.faq.MVA.Asociere.Servicii 88\n", "info.faq.Schimbare.SIM 75\n", "welcome 74\n", "info.faq.CashBack 74\n", "activate.Service.Electronic.Bill 73\n", "engleza 72\n", "info.faq.VTV.login 67\n", "info.faq.Settings 64\n", "info.faq.Speedtest 64\n", "info.req.Gratie 55\n", "info.faq.MVA.istoric.apeluri 54\n", "UPC_pay_in_advance 53\n", "info.faq.Own.MSISDN 53\n", "info.faq.Telefon.furat 50\n", "info.faq.DueDate 50\n", "info.faq.Gamification 49\n", "UPC_account_no 48\n", "info.faq.Account.Creation 48\n", "info.faq.Credit.Transfer 48\n", "offer.Request.Port.Out 47\n", "offer.Request.Portare 47\n", "info.faq.MVA.Credentiale 45\n", "info.faq.HBOMax 44\n", "info.faq.Cont.Bancar 41\n", "info.faq.Reclamatie 41\n", "info.faq.Job.Vodafone 40\n", "info.faq.ID.TV 40\n", "info.faq.TeslaLucky 38\n", "UPC_personal_data 38\n", "info.faq.Lista.Canale 37\n", "info.faq.Service.Warranty 37\n", "info.faq.WIFI.Connect 36\n", "info.faq.Tombola 36\n", "info.faq.Swap 36\n", "account.Specialist 35\n", "info.faq.Spam 35\n", "info.faq.PIN.TV 34\n", "info.faq.TikTok 33\n", "UPC_no_bill 33\n", "info.faq.PreOrder 33\n", "info.faq.Port.Forwarding 33\n", "UPC_suspend 33\n", "info.faq.WIFI.Booster 32\n", "info.faq.5G 32\n", "info.faq.Reincarcare.Gresita 32\n", "invoice.DueDate.Update 32\n", "info.faq.Sincronizare.Telecomanda 32\n", "info.faq.Acte.Necesare 32\n", "info.faq.Service.Insurance 32\n", "info.faq.Campanii 32\n", "info.faq.Configurare.Modem 32\n", "info.faq.Acoperire.roaming 31\n", "info.faq.Adresa.Magazine 31\n", "info.faq.Port.Cancel 31\n", "info.faq.FUP 31\n", "info.faq.EECCT 31\n", "info.faq.Curs.Valutar 31\n", "UPC_modify_services 30\n", "Welcome2Tobi 30\n", "info.faq.WIFI.Password 30\n", "info.faq.Netflix 30\n", "info.faq.WIFI.Disable 30\n", "info.faq.Software.Update 30\n", "info.faq.Service.Not.Warranty 30\n", "info.faq.Port.Usage 30\n", "info.faq.Port.Duration 30\n", "info.faq.Cable.Connection 30\n", "info.faq.CAW 30\n", "info.faq.Evalueaza.Experienta 29\n", "info.faq.MVA.Dwld 29\n", "info.faq.Ordine.Consum.Beneficii 29\n", "info.faq.Nr.WhatsApp 28\n", "info.faq.Aviz.Tehnic 28\n", "info.faq.Auto.Reincarcare 26\n", "rejection 25\n", "info.faq.WIFI.Discover 24\n", "confirmation 23\n", "chitchat.CeFaci 22\n", "chitchat.Weather 21\n", "TestJourney 2\n", "intent 1\n", "Name: count, dtype: int64\n" ] } ], "source": [ "# Set max rows to None to display all rows\n", "pd.set_option('display.max_rows', None)\n", "\n", "# Print the count of unique 'intent', and the count of each 'intent'\n", "print(df['intent'].value_counts()) # show all the unique values and their counts\n", "\n", "# Set the default max rows to 10\n", "pd.set_option('display.max_rows', 20)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'matplotlib'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[5], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Get the count of each unique 'intent'\u001b[39;00m\n\u001b[0;32m 4\u001b[0m intent_counts \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mintent\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mvalue_counts()\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" ] } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# Get the count of each unique 'intent'\n", "intent_counts = df['intent'].value_counts()\n", "\n", "def plot_intent_counts(intent_counts):\n", " # Calculate the average and median count\n", " average_count = intent_counts.mean()\n", " median_count = intent_counts.median()\n", "\n", " # Create a bar plot\n", " plt.figure(figsize=(10, 6))\n", " intent_counts.plot(kind='bar')\n", " plt.title('Count of each Intent')\n", " plt.xlabel('Intent')\n", " plt.ylabel('Count')\n", "\n", " # Add a horizontal line for the average count\n", " plt.axhline(y=average_count, color='r', linestyle='--', label=f'Average count: {average_count:.2f}')\n", " # Add a horizontal line for the median count\n", " plt.axhline(y=median_count, color='g', linestyle='--', label=f'Median count: {median_count:.2f}')\n", " plt.legend()\n", "\n", " plt.show()\n", "\n", "plot_intent_counts(intent_counts)" ] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 2 }