{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Multi-Accent and Multi-Lingual Voice Clone Demo with MeloTTS" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import os\n", "import torch\n", "from openvoice import se_extractor\n", "from openvoice.api import ToneColorConverter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Initialization\n", "\n", "In this example, we will use the checkpoints from OpenVoiceV2. OpenVoiceV2 is trained with more aggressive augmentations and thus demonstrate better robustness in some cases." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/russell/miniconda3/envs/openvoice/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\n", " warnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Loaded checkpoint 'checkpoints_v2/converter/checkpoint.pth'\n", "missing/unexpected keys: [] []\n" ] } ], "source": [ "ckpt_converter = 'checkpoints_v2/converter'\n", "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n", "output_dir = 'outputs_v2'\n", "\n", "tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)\n", "tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')\n", "\n", "os.makedirs(output_dir, exist_ok=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Obtain Tone Color Embedding\n", "We only extract the tone color embedding for the target speaker. The source tone color embeddings can be directly loaded from `checkpoints_v2/ses` folder." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OpenVoice version: v2\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dc253b8bc6d34915bec3fa5b526b0348", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading vocabulary.txt: 0%| | 0.00/460k [00:00 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtqdm_class\u001b[49m\u001b[43m(\u001b[49m\u001b[43mex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43miterables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunksize\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/tqdm/notebook.py:250\u001b[0m, in \u001b[0;36mtqdm_notebook.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 249\u001b[0m it \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__iter__\u001b[39m()\n\u001b[0;32m--> 250\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m it:\n\u001b[1;32m 251\u001b[0m \u001b[38;5;66;03m# return super(tqdm...) will not catch exception\u001b[39;00m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m obj\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/tqdm/std.py:1169\u001b[0m, in \u001b[0;36mtqdm.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1168\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdisable:\n\u001b[0;32m-> 1169\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m iterable:\n\u001b[1;32m 1170\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m obj\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/concurrent/futures/_base.py:609\u001b[0m, in \u001b[0;36mExecutor.map..result_iterator\u001b[0;34m()\u001b[0m\n\u001b[1;32m 608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 609\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpop\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 610\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/concurrent/futures/_base.py:441\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__get_result()\n\u001b[0;32m--> 441\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_condition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/threading.py:312\u001b[0m, in \u001b[0;36mCondition.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 312\u001b[0m \u001b[43mwaiter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 313\u001b[0m gotit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: ", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m reference_speaker \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/example_reference.mp3\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;66;03m# This is the voice you want to clone\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m target_se, audio_name \u001b[38;5;241m=\u001b[39m \u001b[43mse_extractor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_se\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreference_speaker\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtone_color_converter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/Desktop/seamless_communication_test/OpenVoice/openvoice/se_extractor.py:146\u001b[0m, in \u001b[0;36mget_se\u001b[0;34m(audio_path, vc_model, target_dir, vad)\u001b[0m\n\u001b[1;32m 144\u001b[0m wavs_folder \u001b[38;5;241m=\u001b[39m split_audio_vad(audio_path, target_dir\u001b[38;5;241m=\u001b[39mtarget_dir, audio_name\u001b[38;5;241m=\u001b[39maudio_name)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m wavs_folder \u001b[38;5;241m=\u001b[39m \u001b[43msplit_audio_whisper\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtarget_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maudio_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudio_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 148\u001b[0m audio_segs \u001b[38;5;241m=\u001b[39m glob(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mwavs_folder\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/*.wav\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(audio_segs) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", "File \u001b[0;32m~/Desktop/seamless_communication_test/OpenVoice/openvoice/se_extractor.py:22\u001b[0m, in \u001b[0;36msplit_audio_whisper\u001b[0;34m(audio_path, audio_name, target_dir)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mglobal\u001b[39;00m model\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m model \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 22\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mWhisperModel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcuda\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompute_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfloat16\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 23\u001b[0m audio \u001b[38;5;241m=\u001b[39m AudioSegment\u001b[38;5;241m.\u001b[39mfrom_file(audio_path)\n\u001b[1;32m 24\u001b[0m max_len \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(audio)\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/faster_whisper/transcribe.py:122\u001b[0m, in \u001b[0;36mWhisperModel.__init__\u001b[0;34m(self, model_size_or_path, device, device_index, compute_type, cpu_threads, num_workers, download_root, local_files_only)\u001b[0m\n\u001b[1;32m 120\u001b[0m model_path \u001b[38;5;241m=\u001b[39m model_size_or_path\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 122\u001b[0m model_path \u001b[38;5;241m=\u001b[39m \u001b[43mdownload_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 123\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_size_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 124\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 125\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_root\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m ctranslate2\u001b[38;5;241m.\u001b[39mmodels\u001b[38;5;241m.\u001b[39mWhisper(\n\u001b[1;32m 129\u001b[0m model_path,\n\u001b[1;32m 130\u001b[0m device\u001b[38;5;241m=\u001b[39mdevice,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 134\u001b[0m inter_threads\u001b[38;5;241m=\u001b[39mnum_workers,\n\u001b[1;32m 135\u001b[0m )\n\u001b[1;32m 137\u001b[0m tokenizer_file \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(model_path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtokenizer.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/faster_whisper/utils.py:98\u001b[0m, in \u001b[0;36mdownload_model\u001b[0;34m(size_or_id, output_dir, local_files_only, cache_dir)\u001b[0m\n\u001b[1;32m 95\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m cache_dir\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mhuggingface_hub\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msnapshot_download\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\n\u001b[1;32m 100\u001b[0m huggingface_hub\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39mHfHubHTTPError,\n\u001b[1;32m 101\u001b[0m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mConnectionError,\n\u001b[1;32m 102\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[1;32m 103\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 116\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/huggingface_hub/_snapshot_download.py:239\u001b[0m, in \u001b[0;36msnapshot_download\u001b[0;34m(repo_id, repo_type, revision, endpoint, cache_dir, local_dir, local_dir_use_symlinks, library_name, library_version, user_agent, proxies, etag_timeout, resume_download, force_download, token, local_files_only, allow_patterns, ignore_patterns, max_workers, tqdm_class)\u001b[0m\n\u001b[1;32m 237\u001b[0m _inner_hf_hub_download(file)\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 239\u001b[0m \u001b[43mthread_map\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43m_inner_hf_hub_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiltered_repo_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 242\u001b[0m \u001b[43m \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mFetching \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfiltered_repo_files\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m files\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_workers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 244\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# User can use its own tqdm class or the default one from `huggingface_hub.utils`\u001b[39;49;00m\n\u001b[1;32m 245\u001b[0m \u001b[43m \u001b[49m\u001b[43mtqdm_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtqdm_class\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mhf_tqdm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 246\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m local_dir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mrealpath(local_dir))\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/tqdm/contrib/concurrent.py:69\u001b[0m, in \u001b[0;36mthread_map\u001b[0;34m(fn, *iterables, **tqdm_kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;124;03mEquivalent of `list(map(fn, *iterables))`\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;124;03mdriven by `concurrent.futures.ThreadPoolExecutor`.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124;03m [default: max(32, cpu_count() + 4)].\u001b[39;00m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconcurrent\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfutures\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ThreadPoolExecutor\n\u001b[0;32m---> 69\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_executor_map\u001b[49m\u001b[43m(\u001b[49m\u001b[43mThreadPoolExecutor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43miterables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mtqdm_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/site-packages/tqdm/contrib/concurrent.py:51\u001b[0m, in \u001b[0;36m_executor_map\u001b[0;34m(PoolExecutor, fn, *iterables, **tqdm_kwargs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ensure_lock(tqdm_class, lock_name\u001b[38;5;241m=\u001b[39mlock_name) \u001b[38;5;28;01mas\u001b[39;00m lk:\n\u001b[1;32m 48\u001b[0m \u001b[38;5;66;03m# share lock in case workers are already using `tqdm`\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m PoolExecutor(max_workers\u001b[38;5;241m=\u001b[39mmax_workers, initializer\u001b[38;5;241m=\u001b[39mtqdm_class\u001b[38;5;241m.\u001b[39mset_lock,\n\u001b[1;32m 50\u001b[0m initargs\u001b[38;5;241m=\u001b[39m(lk,)) \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(tqdm_class(ex\u001b[38;5;241m.\u001b[39mmap(fn, \u001b[38;5;241m*\u001b[39miterables, chunksize\u001b[38;5;241m=\u001b[39mchunksize), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/concurrent/futures/_base.py:637\u001b[0m, in \u001b[0;36mExecutor.__exit__\u001b[0;34m(self, exc_type, exc_val, exc_tb)\u001b[0m\n\u001b[1;32m 636\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__exit__\u001b[39m(\u001b[38;5;28mself\u001b[39m, exc_type, exc_val, exc_tb):\n\u001b[0;32m--> 637\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshutdown\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwait\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 638\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/concurrent/futures/thread.py:235\u001b[0m, in \u001b[0;36mThreadPoolExecutor.shutdown\u001b[0;34m(self, wait, cancel_futures)\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m wait:\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_threads:\n\u001b[0;32m--> 235\u001b[0m \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/threading.py:1060\u001b[0m, in \u001b[0;36mThread.join\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1057\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot join current thread\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1059\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1060\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_wait_for_tstate_lock\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1061\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1062\u001b[0m \u001b[38;5;66;03m# the behavior of a negative timeout isn't documented, but\u001b[39;00m\n\u001b[1;32m 1063\u001b[0m \u001b[38;5;66;03m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[39;00m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wait_for_tstate_lock(timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mmax\u001b[39m(timeout, \u001b[38;5;241m0\u001b[39m))\n", "File \u001b[0;32m~/miniconda3/envs/openvoice/lib/python3.9/threading.py:1080\u001b[0m, in \u001b[0;36mThread._wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1080\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 1081\u001b[0m lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 1082\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stop()\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "\n", "reference_speaker = 'resources/example_reference.mp3' # This is the voice you want to clone\n", "target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Use MeloTTS as Base Speakers\n", "\n", "MeloTTS is a high-quality multi-lingual text-to-speech library by @MyShell.ai, supporting languages including English (American, British, Indian, Australian, Default), Spanish, French, Chinese, Japanese, Korean. In the following example, we will use the models in MeloTTS as the base speakers. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from melo.api import TTS\n", "\n", "texts = {\n", " 'EN_NEWEST': \"Did you ever hear a folk tale about a giant turtle?\", # The newest English base speaker model\n", " 'EN': \"Did you ever hear a folk tale about a giant turtle?\",\n", " 'ES': \"El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante.\",\n", " 'FR': \"La lueur dorée du soleil caresse les vagues, peignant le ciel d'une palette éblouissante.\",\n", " 'ZH': \"在这次vacation中,我们计划去Paris欣赏埃菲尔铁塔和卢浮宫的美景。\",\n", " 'JP': \"彼は毎朝ジョギングをして体を健康に保っています。\",\n", " 'KR': \"안녕하세요! 오늘은 날씨가 정말 좋네요.\",\n", "}\n", "\n", "\n", "src_path = f'{output_dir}/tmp.wav'\n", "\n", "# Speed is adjustable\n", "speed = 1.0\n", "\n", "for language, text in texts.items():\n", " model = TTS(language=language, device=device)\n", " speaker_ids = model.hps.data.spk2id\n", " \n", " for speaker_key in speaker_ids.keys():\n", " speaker_id = speaker_ids[speaker_key]\n", " speaker_key = speaker_key.lower().replace('_', '-')\n", " \n", " source_se = torch.load(f'checkpoints_v2/base_speakers/ses/{speaker_key}.pth', map_location=device)\n", " model.tts_to_file(text, speaker_id, src_path, speed=speed)\n", " save_path = f'{output_dir}/output_v2_{speaker_key}.wav'\n", "\n", " # Run the tone color converter\n", " encode_message = \"@MyShell\"\n", " tone_color_converter.convert(\n", " audio_src_path=src_path, \n", " src_se=source_se, \n", " tgt_se=target_se, \n", " output_path=save_path,\n", " message=encode_message)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 4 }