{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "leJQ1K4ElUhb"
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "SsSHzoDnolqt"
},
"source": [
"Use Whisper model to generate a transcription for a meeting"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NR6digJYor4k"
},
"source": [
"Step1 - Load the pre-trained speaker"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rjptG4D9ox7q",
"outputId": "81f2c836-131a-4f70-c0ec-fbc966d9969a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pyannote.audio in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (3.3.2)\n",
"Requirement already satisfied: asteroid-filterbanks>=0.4 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (0.4.0)\n",
"Requirement already satisfied: einops>=0.6.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (0.8.0)\n",
"Requirement already satisfied: huggingface-hub>=0.13.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (0.26.1)\n",
"Requirement already satisfied: lightning>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (2.5.0.post0)\n",
"Requirement already satisfied: omegaconf<3.0,>=2.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (2.3.0)\n",
"Requirement already satisfied: pyannote.core>=5.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (5.0.0)\n",
"Requirement already satisfied: pyannote.database>=5.0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (5.1.0)\n",
"Requirement already satisfied: pyannote.metrics>=3.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (3.2.1)\n",
"Requirement already satisfied: pyannote.pipeline>=3.0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (3.0.1)\n",
"Requirement already satisfied: pytorch-metric-learning>=2.1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (2.8.1)\n",
"Requirement already satisfied: rich>=12.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (13.9.2)\n",
"Requirement already satisfied: semver>=3.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (3.0.2)\n",
"Requirement already satisfied: soundfile>=0.12.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (0.13.0)\n",
"Requirement already satisfied: speechbrain>=1.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (1.0.2)\n",
"Requirement already satisfied: tensorboardX>=2.6 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (2.6.2.2)\n",
"Requirement already satisfied: torch>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (2.4.1)\n",
"Requirement already satisfied: torch-audiomentations>=0.11.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (0.11.1)\n",
"Requirement already satisfied: torchaudio>=2.2.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (2.4.1)\n",
"Requirement already satisfied: torchmetrics>=0.11.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.audio) (1.6.1)\n",
"Requirement already satisfied: numpy in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from asteroid-filterbanks>=0.4->pyannote.audio) (1.26.4)\n",
"Requirement already satisfied: typing-extensions in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from asteroid-filterbanks>=0.4->pyannote.audio) (4.12.2)\n",
"Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from huggingface-hub>=0.13.0->pyannote.audio) (3.16.1)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from huggingface-hub>=0.13.0->pyannote.audio) (2024.6.1)\n",
"Requirement already satisfied: packaging>=20.9 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from huggingface-hub>=0.13.0->pyannote.audio) (24.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from huggingface-hub>=0.13.0->pyannote.audio) (6.0.1)\n",
"Requirement already satisfied: requests in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from huggingface-hub>=0.13.0->pyannote.audio) (2.32.3)\n",
"Requirement already satisfied: tqdm>=4.42.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from huggingface-hub>=0.13.0->pyannote.audio) (4.66.4)\n",
"Requirement already satisfied: lightning-utilities<2.0,>=0.10.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from lightning>=2.0.1->pyannote.audio) (0.11.9)\n",
"Requirement already satisfied: pytorch-lightning in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from lightning>=2.0.1->pyannote.audio) (2.5.0.post0)\n",
"Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from omegaconf<3.0,>=2.1->pyannote.audio) (4.9.3)\n",
"Requirement already satisfied: sortedcontainers>=2.0.4 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.core>=5.0.0->pyannote.audio) (2.4.0)\n",
"Requirement already satisfied: scipy>=1.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.core>=5.0.0->pyannote.audio) (1.14.1)\n",
"Requirement already satisfied: pandas>=0.19 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.database>=5.0.1->pyannote.audio) (2.2.2)\n",
"Requirement already satisfied: typer>=0.12.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.database>=5.0.1->pyannote.audio) (0.15.1)\n",
"Requirement already satisfied: scikit-learn>=0.17.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.metrics>=3.2->pyannote.audio) (1.5.2)\n",
"Requirement already satisfied: docopt>=0.6.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.metrics>=3.2->pyannote.audio) (0.6.2)\n",
"Requirement already satisfied: tabulate>=0.7.7 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.metrics>=3.2->pyannote.audio) (0.9.0)\n",
"Requirement already satisfied: matplotlib>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.metrics>=3.2->pyannote.audio) (3.9.1)\n",
"Requirement already satisfied: sympy>=1.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.metrics>=3.2->pyannote.audio) (1.13.3)\n",
"Requirement already satisfied: optuna>=3.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pyannote.pipeline>=3.0.1->pyannote.audio) (4.1.0)\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from rich>=12.0.0->pyannote.audio) (3.0.0)\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from rich>=12.0.0->pyannote.audio) (2.18.0)\n",
"Requirement already satisfied: cffi>=1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from soundfile>=0.12.1->pyannote.audio) (1.17.1)\n",
"Requirement already satisfied: hyperpyyaml in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from speechbrain>=1.0.0->pyannote.audio) (1.2.2)\n",
"Requirement already satisfied: joblib in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from speechbrain>=1.0.0->pyannote.audio) (1.4.2)\n",
"Requirement already satisfied: sentencepiece in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from speechbrain>=1.0.0->pyannote.audio) (0.2.0)\n",
"Requirement already satisfied: protobuf>=3.20 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from tensorboardX>=2.6->pyannote.audio) (4.25.5)\n",
"Requirement already satisfied: networkx in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch>=2.0.0->pyannote.audio) (3.3)\n",
"Requirement already satisfied: jinja2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch>=2.0.0->pyannote.audio) (3.1.4)\n",
"Requirement already satisfied: julius<0.3,>=0.2.3 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch-audiomentations>=0.11.0->pyannote.audio) (0.2.7)\n",
"Requirement already satisfied: librosa>=0.6.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch-audiomentations>=0.11.0->pyannote.audio) (0.10.2.post1)\n",
"Requirement already satisfied: torch-pitch-shift>=1.2.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch-audiomentations>=0.11.0->pyannote.audio) (1.2.5)\n",
"Requirement already satisfied: pycparser in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from cffi>=1.0->soundfile>=0.12.1->pyannote.audio) (2.22)\n",
"Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from fsspec[http]<2026.0,>=2022.5.0->lightning>=2.0.1->pyannote.audio) (3.9.5)\n",
"Requirement already satisfied: audioread>=2.1.9 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (3.0.1)\n",
"Requirement already satisfied: decorator>=4.3.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (5.1.1)\n",
"Requirement already satisfied: numba>=0.51.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (0.60.0)\n",
"Requirement already satisfied: pooch>=1.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (1.8.2)\n",
"Requirement already satisfied: soxr>=0.3.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (0.5.0.post1)\n",
"Requirement already satisfied: lazy-loader>=0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (0.4)\n",
"Requirement already satisfied: msgpack>=1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (1.1.0)\n",
"Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from lightning-utilities<2.0,>=0.10.0->lightning>=2.0.1->pyannote.audio) (65.5.0)\n",
"Requirement already satisfied: mdurl~=0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from markdown-it-py>=2.2.0->rich>=12.0.0->pyannote.audio) (0.1.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (1.2.1)\n",
"Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (4.53.1)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (1.4.5)\n",
"Requirement already satisfied: pillow>=8 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (10.4.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (3.1.2)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (2.9.0.post0)\n",
"Requirement already satisfied: alembic>=1.5.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from optuna>=3.1->pyannote.pipeline>=3.0.1->pyannote.audio) (1.14.0)\n",
"Requirement already satisfied: colorlog in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from optuna>=3.1->pyannote.pipeline>=3.0.1->pyannote.audio) (6.9.0)\n",
"Requirement already satisfied: sqlalchemy>=1.4.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from optuna>=3.1->pyannote.pipeline>=3.0.1->pyannote.audio) (2.0.31)\n",
"Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas>=0.19->pyannote.database>=5.0.1->pyannote.audio) (2024.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas>=0.19->pyannote.database>=5.0.1->pyannote.audio) (2024.1)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from scikit-learn>=0.17.1->pyannote.metrics>=3.2->pyannote.audio) (3.5.0)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from sympy>=1.1->pyannote.metrics>=3.2->pyannote.audio) (1.3.0)\n",
"Requirement already satisfied: primePy>=1.3 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch-pitch-shift>=1.2.2->torch-audiomentations>=0.11.0->pyannote.audio) (1.3)\n",
"Requirement already satisfied: click>=8.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from typer>=0.12.1->pyannote.database>=5.0.1->pyannote.audio) (8.1.7)\n",
"Requirement already satisfied: shellingham>=1.3.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from typer>=0.12.1->pyannote.database>=5.0.1->pyannote.audio) (1.5.4)\n",
"Requirement already satisfied: ruamel.yaml>=0.17.28 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from hyperpyyaml->speechbrain>=1.0.0->pyannote.audio) (0.18.10)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from jinja2->torch>=2.0.0->pyannote.audio) (2.1.5)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests->huggingface-hub>=0.13.0->pyannote.audio) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests->huggingface-hub>=0.13.0->pyannote.audio) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests->huggingface-hub>=0.13.0->pyannote.audio) (2.2.2)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests->huggingface-hub>=0.13.0->pyannote.audio) (2023.7.22)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning>=2.0.1->pyannote.audio) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning>=2.0.1->pyannote.audio) (23.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning>=2.0.1->pyannote.audio) (1.4.1)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning>=2.0.1->pyannote.audio) (6.0.5)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning>=2.0.1->pyannote.audio) (1.9.4)\n",
"Requirement already satisfied: Mako in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from alembic>=1.5.0->optuna>=3.1->pyannote.pipeline>=3.0.1->pyannote.audio) (1.3.8)\n",
"Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from numba>=0.51.0->librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (0.43.0)\n",
"Requirement already satisfied: platformdirs>=2.5.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pooch>=1.1->librosa>=0.6.0->torch-audiomentations>=0.11.0->pyannote.audio) (4.2.2)\n",
"Requirement already satisfied: six>=1.5 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib>=2.0.0->pyannote.metrics>=3.2->pyannote.audio) (1.16.0)\n",
"Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from ruamel.yaml>=0.17.28->hyperpyyaml->speechbrain>=1.0.0->pyannote.audio) (0.2.12)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install --upgrade pyannote.audio"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 561
},
"id": "ZfAH8To3pVvu",
"outputId": "d376d60d-5891-4453-c0a3-0ccff3e3f1c7"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:speechbrain.utils.quirks:Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]\n",
"INFO:speechbrain.utils.quirks:Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []\n",
"Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.5.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../../../../.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`\n",
"INFO:speechbrain.utils.fetching:Fetch hyperparams.yaml: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached\n",
"INFO:speechbrain.utils.fetching:Fetch custom.py: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.\n",
"Model was trained with torch 1.10.0+cu102, yours is 2.4.1. Bad things might happen unless you revert torch to 1.x.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/speechbrain/utils/autocast.py:68: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
" wrapped_fwd = torch.cuda.amp.custom_fwd(fwd, cast_inputs=cast_inputs)\n",
"INFO:speechbrain.utils.fetching:Fetch embedding_model.ckpt: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached\n",
"INFO:speechbrain.utils.fetching:Fetch mean_var_norm_emb.ckpt: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached\n",
"INFO:speechbrain.utils.fetching:Fetch classifier.ckpt: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached\n",
"INFO:speechbrain.utils.fetching:Fetch label_encoder.txt: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached\n",
"INFO:speechbrain.utils.parameter_transfer:Loading pretrained files for: embedding_model, mean_var_norm_emb, classifier, label_encoder\n",
"/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/speechbrain/utils/checkpoints.py:200: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
" state_dict = torch.load(path, map_location=device)\n",
"/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/speechbrain/processing/features.py:1311: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
" stats = torch.load(path, map_location=device)\n"
]
}
],
"source": [
"from pyannote.audio import Pipeline\n",
"from huggingface_hub import login\n",
"\n",
"login(token=\"xxxx\") \n",
"\n",
"diarization_pipeline = Pipeline.from_pretrained(\n",
" \"pyannote/speaker-diarization@2.1\",\n",
" use_auth_token=True # Force fresh download\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xhPqb6K_pYYu",
"outputId": "11c51870-2447-4c91-d663-6379d6a0f431"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: datasets in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (3.0.2)\n",
"Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (3.16.1)\n",
"Requirement already satisfied: numpy>=1.17 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (1.26.4)\n",
"Requirement already satisfied: pyarrow>=15.0.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (17.0.0)\n",
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (0.3.8)\n",
"Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (2.2.2)\n",
"Requirement already satisfied: requests>=2.32.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (2.32.3)\n",
"Requirement already satisfied: tqdm>=4.66.3 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (4.66.4)\n",
"Requirement already satisfied: xxhash in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (3.5.0)\n",
"Requirement already satisfied: multiprocess<0.70.17 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (0.70.16)\n",
"Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.6.1)\n",
"Requirement already satisfied: aiohttp in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (3.9.5)\n",
"Requirement already satisfied: huggingface-hub>=0.23.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (0.26.1)\n",
"Requirement already satisfied: packaging in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (24.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from datasets) (6.0.1)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp->datasets) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp->datasets) (23.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp->datasets) (1.4.1)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp->datasets) (6.0.5)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from aiohttp->datasets) (1.9.4)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (2.2.2)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from requests>=2.32.2->datasets) (2023.7.22)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas->datasets) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas->datasets) (2024.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas->datasets) (2024.1)\n",
"Requirement already satisfied: six>=1.5 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install datasets"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"a0c4cc4941cb4d049ce1071ec9595a82",
"6e1573e793e9444d82df6a1b0cbc9831",
"02ac619e1b834ca9af360895c79482e9",
"0f8dd53356be42ec957a5e4c3ffe02e8",
"85f6e558110d477c9867d6af68635adc",
"1cefe150f569491abfb1f986421e2be0",
"a25b1c26cc8d468b918acf982931d70f",
"5ca25749f3564c8b80ad9929f0f721af",
"a0d449eaeb494a488a0ca46a045ed8bd",
"5303bb131bc4411ea9a27442cf7cbfd3",
"732e1d92c4bf427ab5687ae84d5cd3dc"
]
},
"id": "dOnMRgwIpqVq",
"outputId": "d5ddfb9e-cab4-4f45-f383-5d8d38706507"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:datasets:PyTorch version 2.4.1 available.\n",
"INFO:datasets:JAX version 0.4.34 available.\n"
]
}
],
"source": [
"from datasets import load_dataset\n",
"\n",
"concatenated_librispeech = load_dataset(\n",
" \"sanchit-gandhi/concatenated_librispeech\", split=\"train\", streaming=True\n",
")\n",
"sample = next(iter(concatenated_librispeech))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "mE9WseFGCDyc"
},
"source": [
"Test it and it worked, two people is talking we need identify it."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import Audio\n",
"\n",
"Audio(sample[\"audio\"][\"array\"], rate=sample[\"audio\"][\"sampling_rate\"])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"content\": [\n",
" {\n",
" \"start\": 0.03096875,\n",
" \"end\": 14.543468750000002,\n",
" \"label\": \"SPEAKER_01\"\n",
" },\n",
" {\n",
" \"start\": 15.387218750000002,\n",
" \"end\": 21.25971875,\n",
" \"label\": \"SPEAKER_00\"\n",
" }\n",
" ]\n",
"}\n"
]
}
],
"source": [
"from pyannote.core import Segment\n",
"\n",
"# 提取 Annotation 数据\n",
"result = []\n",
"for segment, _, label in outputs.itertracks(yield_label=True):\n",
" result.append({\n",
" \"start\": segment.start,\n",
" \"end\": segment.end,\n",
" \"label\": label\n",
" })\n",
"\n",
"import json\n",
"json_output = json.dumps({\"content\": result}, indent=2)\n",
"print(json_output)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next: use the Whisper model for our speech transcription system"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
]
}
],
"source": [
"from transformers import pipeline\n",
"\n",
"asr_pipeline = pipeline(\n",
" \"automatic-speech-recognition\",\n",
" model=\"openai/whisper-base\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pass the argument return_timestamps=True to activate the timestamp prediction task for Whisper"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/transformers/models/whisper/generation_whisper.py:509: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead.\n",
" warnings.warn(\n",
"Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.\n",
"Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n"
]
},
{
"data": {
"text/plain": [
"{'text': \" The second and importance is as follows. Sovereignty may be defined to be the right of making laws. In France, the king really exercises a portion of the sovereign power, since the laws have no weight. He was in a favored state of mind, owing to the blight his wife's action threatened to cast upon his entire future.\",\n",
" 'chunks': [{'timestamp': (0.0, 3.56),\n",
" 'text': ' The second and importance is as follows.'},\n",
" {'timestamp': (3.56, 7.84),\n",
" 'text': ' Sovereignty may be defined to be the right of making laws.'},\n",
" {'timestamp': (7.84, 13.88),\n",
" 'text': ' In France, the king really exercises a portion of the sovereign power, since the laws have'},\n",
" {'timestamp': (13.88, 15.48), 'text': ' no weight.'},\n",
" {'timestamp': (15.48, 19.44),\n",
" 'text': \" He was in a favored state of mind, owing to the blight his wife's action threatened to\"},\n",
" {'timestamp': (19.44, 21.28), 'text': ' cast upon his entire future.'}]}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"asr_pipeline(\n",
" sample[\"audio\"].copy(),\n",
" generate_kwargs={\"max_new_tokens\": 256},\n",
" return_timestamps=True,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Speech box - "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting git+https://github.com/huggingface/speechbox\n",
" Cloning https://github.com/huggingface/speechbox to /private/var/folders/rp/s3c7_7cn6sg2s70tp08w3xwm0000gn/T/pip-req-build-uu9jusc2\n",
" Running command git clone --filter=blob:none --quiet https://github.com/huggingface/speechbox /private/var/folders/rp/s3c7_7cn6sg2s70tp08w3xwm0000gn/T/pip-req-build-uu9jusc2\n",
" Resolved https://github.com/huggingface/speechbox to commit e7339dc021c8aa3047f824fb5c24b5b2c8197a76\n",
" Installing build dependencies ... \u001b[?25ldone\n",
"\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
"\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
"\u001b[?25hRequirement already satisfied: torch>=1.9 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from speechbox==0.2.1) (2.4.1)\n",
"Requirement already satisfied: importlib_metadata in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from speechbox==0.2.1) (8.5.0)\n",
"Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from speechbox==0.2.1) (3.16.1)\n",
"Requirement already satisfied: numpy in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from speechbox==0.2.1) (1.26.4)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch>=1.9->speechbox==0.2.1) (4.12.2)\n",
"Requirement already satisfied: sympy in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch>=1.9->speechbox==0.2.1) (1.13.3)\n",
"Requirement already satisfied: networkx in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch>=1.9->speechbox==0.2.1) (3.3)\n",
"Requirement already satisfied: jinja2 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch>=1.9->speechbox==0.2.1) (3.1.4)\n",
"Requirement already satisfied: fsspec in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from torch>=1.9->speechbox==0.2.1) (2024.6.1)\n",
"Requirement already satisfied: zipp>=3.20 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from importlib_metadata->speechbox==0.2.1) (3.20.2)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from jinja2->torch>=1.9->speechbox==0.2.1) (2.1.5)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from sympy->torch>=1.9->speechbox==0.2.1) (1.3.0)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install git+https://github.com/huggingface/speechbox"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"instantiate our combined diarization plus transcription pipeline"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from speechbox import ASRDiarizationPipeline\n",
"\n",
"pipeline = ASRDiarizationPipeline(\n",
" asr_pipeline=asr_pipeline, diarization_pipeline=diarization_pipeline\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The first speaker is segmented as speaking from 0 to 15.48 seconds, and the second speaker from 15.48 to 21.28 seconds"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'speaker': 'SPEAKER_01',\n",
" 'text': ' The second and importance is as follows. Sovereignty may be defined to be the right of making laws. In France, the king really exercises a portion of the sovereign power, since the laws have no weight.',\n",
" 'timestamp': (0.0, 15.48)},\n",
" {'speaker': 'SPEAKER_00',\n",
" 'text': \" He was in a favored state of mind, owing to the blight his wife's action threatened to cast upon his entire future.\",\n",
" 'timestamp': (15.48, 21.28)}]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline(sample[\"audio\"].copy())"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def tuple_to_string(start_end_tuple, ndigits=1):\n",
" return str((round(start_end_tuple[0], ndigits), round(start_end_tuple[1], ndigits)))\n",
"\n",
"\n",
"def format_as_transcription(raw_segments):\n",
" return \"\\n\\n\".join(\n",
" [\n",
" chunk[\"speaker\"] + \" \" + tuple_to_string(chunk[\"timestamp\"]) + chunk[\"text\"]\n",
" for chunk in raw_segments\n",
" ]\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"SPEAKER_01 (0.0, 15.5) The second and importance is as follows. Sovereignty may be defined to be the right of making laws. In France, the king really exercises a portion of the sovereign power, since the laws have no weight.\\n\\nSPEAKER_00 (15.5, 21.3) He was in a favored state of mind, owing to the blight his wife's action threatened to cast upon his entire future.\""
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outputs = pipeline(sample[\"audio\"].copy())\n",
"\n",
"format_as_transcription(outputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"02ac619e1b834ca9af360895c79482e9": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5ca25749f3564c8b80ad9929f0f721af",
"max": 359,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_a0d449eaeb494a488a0ca46a045ed8bd",
"value": 359
}
},
"0f8dd53356be42ec957a5e4c3ffe02e8": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5303bb131bc4411ea9a27442cf7cbfd3",
"placeholder": "",
"style": "IPY_MODEL_732e1d92c4bf427ab5687ae84d5cd3dc",
"value": " 359/359 [00:00<00:00, 5.05kB/s]"
}
},
"1cefe150f569491abfb1f986421e2be0": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5303bb131bc4411ea9a27442cf7cbfd3": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5ca25749f3564c8b80ad9929f0f721af": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6e1573e793e9444d82df6a1b0cbc9831": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1cefe150f569491abfb1f986421e2be0",
"placeholder": "",
"style": "IPY_MODEL_a25b1c26cc8d468b918acf982931d70f",
"value": "README.md: 100%"
}
},
"732e1d92c4bf427ab5687ae84d5cd3dc": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"85f6e558110d477c9867d6af68635adc": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a0c4cc4941cb4d049ce1071ec9595a82": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_6e1573e793e9444d82df6a1b0cbc9831",
"IPY_MODEL_02ac619e1b834ca9af360895c79482e9",
"IPY_MODEL_0f8dd53356be42ec957a5e4c3ffe02e8"
],
"layout": "IPY_MODEL_85f6e558110d477c9867d6af68635adc"
}
},
"a0d449eaeb494a488a0ca46a045ed8bd": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"a25b1c26cc8d468b918acf982931d70f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}