Spaces:

KevinSmith94624
/

Text-to-Any

Runtime error

App Files Files Community

KevinSmith94624 commited on Dec 25, 2024

Commit

bf19bc1

verified ·

1 Parent(s): 321d9f9

Delete gradio-based-text-to-any (1).ipynb

Browse files

Files changed (1) hide show

gradio-based-text-to-any (1).ipynb +0 -1

gradio-based-text-to-any (1).ipynb DELETED Viewed

@@ -1 +0,0 @@

- {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# pip install torch_xla -q","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T07:29:06.028570Z","iopub.execute_input":"2024-12-23T07:29:06.028897Z","iopub.status.idle":"2024-12-23T07:29:06.040164Z","shell.execute_reply.started":"2024-12-23T07:29:06.028804Z","shell.execute_reply":"2024-12-23T07:29:06.039321Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# ! nvidia-smi -L","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T07:29:06.046744Z","iopub.execute_input":"2024-12-23T07:29:06.047018Z","iopub.status.idle":"2024-12-23T07:29:06.051114Z","shell.execute_reply.started":"2024-12-23T07:29:06.046989Z","shell.execute_reply":"2024-12-23T07:29:06.050374Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"!pip install gradio diffusers gTTS together -q","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:17:36.027884Z","iopub.execute_input":"2024-12-25T00:17:36.028181Z","iopub.status.idle":"2024-12-25T00:17:50.860673Z","shell.execute_reply.started":"2024-12-25T00:17:36.028159Z","shell.execute_reply":"2024-12-25T00:17:50.859768Z"}},"outputs":[{"name":"stdout","text":"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.8/41.8 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.2/57.2 MB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m320.4/320.4 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m82.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.6/70.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.8/94.8 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.5/73.5 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.6/78.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m450.5/450.5 kB\u001b[0m \u001b[31m25.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m131.3/131.3 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.2/11.2 MB\u001b[0m \u001b[31m106.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.2/73.2 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25h","output_type":"stream"}],"execution_count":1},{"cell_type":"code","source":"# import torch_xla.core.xla_model as xm\n# tpu = xm.xla_device()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T07:29:20.075258Z","iopub.execute_input":"2024-12-23T07:29:20.075501Z","iopub.status.idle":"2024-12-23T07:29:20.078665Z","shell.execute_reply.started":"2024-12-23T07:29:20.075480Z","shell.execute_reply":"2024-12-23T07:29:20.078023Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import torch\nfrom diffusers import TextToVideoSDPipeline, DiffusionPipeline\nfrom diffusers.utils import export_to_video\nimport gradio as gr\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\nimport PIL\nfrom io import BytesIO\nfrom gtts import gTTS\nimport time\nfrom pydub import AudioSegment\nimport nltk\nfrom together import Together\nimport base64\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:17:53.042072Z","iopub.execute_input":"2024-12-25T00:17:53.042422Z","iopub.status.idle":"2024-12-25T00:18:10.158571Z","shell.execute_reply.started":"2024-12-25T00:17:53.042362Z","shell.execute_reply":"2024-12-25T00:18:10.157888Z"}},"outputs":[{"name":"stderr","text":"The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"af4ba4ed88764d96a965454ca9f53a61"}},"metadata":{}}],"execution_count":2},{"cell_type":"code","source":"# tokenizer = AutoTokenizer.from_pretrained(\"MBZUAI/LaMini-GPT-774M\")\n# model0 = AutoModelForCausalLM.from_pretrained(\"MBZUAI/LaMini-GPT-774M\")\n\ntokenizer = AutoTokenizer.from_pretrained(\"ParisNeo/LLama-3.2-3B-Lollms-Finetuned-GGUF\")\nmodel0 = AutoModelForCausalLM.from_pretrained(\"ParisNeo/LLama-3.2-3B-Lollms-Finetuned-GGUF\", ignore_mismatched_sizes=True)\n\n# tokenizer = AutoTokenizer.from_pretrained(\"gokaygokay/tiny_llama_chat_description_to_prompt\", cache_dir = '/kaggle/working')\n# model0 = AutoModelForCausalLM.from_pretrained(\"gokaygokay/tiny_llama_chat_description_to_prompt\", ignore_mismatched_sizes=True, cache_dir = '/kaggle/working')\n# model0 = AutoModelForCausalLM.from_pretrained(\"MJ199999/gpt3_model\",ignore_mismatched_sizes=True, from_tf=True)\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:18:12.707855Z","iopub.execute_input":"2024-12-25T00:18:12.708546Z","iopub.status.idle":"2024-12-25T00:21:21.315758Z","shell.execute_reply.started":"2024-12-25T00:18:12.708513Z","shell.execute_reply":"2024-12-25T00:21:21.314829Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json: 0%| | 0.00/55.4k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7235efc1f1b148c3b7e26d17f2142e12"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json: 0%| | 0.00/9.09M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b79e61717a4b4370b36d9eca861fcf6e"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"special_tokens_map.json: 0%| | 0.00/454 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d654e5a8f22b42f1a07005ad6b59e9a6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/996 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f34bfdcbb15b49428684c0aa736dc5af"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model.bin.index.json: 0%| | 0.00/20.9k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c20f6bbdae684e8bbe45670fd13e1e1f"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0ebcef1a01874b588ca94e1a8850b371"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model-00001-of-00002.bin: 0%| | 0.00/4.97G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0ceedcb97ee54916811dfbf0c6769884"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model-00002-of-00002.bin: 0%| | 0.00/1.46G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d224f4f9a18d42439e6ffd092666604c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6eae4086663a4eb28e3032a12d71ac22"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json: 0%| | 0.00/234 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"60e9176e1c044beaae7bd7cbcd2abc92"}},"metadata":{}}],"execution_count":3},{"cell_type":"code","source":"device = torch.device(\"cuda:1\" if torch.cuda.is_available() else \"cpu\")\nmodel0 = model0.to(device)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:21.316973Z","iopub.execute_input":"2024-12-25T00:21:21.317277Z","iopub.status.idle":"2024-12-25T00:21:25.573305Z","shell.execute_reply.started":"2024-12-25T00:21:21.317253Z","shell.execute_reply":"2024-12-25T00:21:25.572655Z"}},"outputs":[],"execution_count":4},{"cell_type":"code","source":"from transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Initialize Chat History\ndef chat_with_llama(user_input, chat_history):\n # Prepare formatted prompt\n prompt = \"You are a helpful, respectful and honest general-purpose assistant.\"\n for user_content, assist_content in chat_history:\n prompt += f\"user: {user_content}\\n\"\n prompt += f\"assistant: {assist_content}\\n\"\n prompt += f\"user: {user_input}\\n'assistant:\"\n\n # Tokenize and generate response\n inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"cuda:1\")\n output = model0.generate(inputs[\"input_ids\"], max_length=4096, max_new_tokens = 1024, temperature=0.7, max_time = 10.0, repetition_penalty = 1.0)\n response = tokenizer.decode(output[0], skip_special_tokens=True)\n\n # Extract and append assistant's response\n assistant_reply = response.split(\"assistant:\")[-1].split('user:')[0].strip()\n chat_history.append((user_input, assistant_reply))\n\n return assistant_reply, chat_history\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.574627Z","iopub.execute_input":"2024-12-25T00:21:25.574948Z","iopub.status.idle":"2024-12-25T00:21:25.580502Z","shell.execute_reply.started":"2024-12-25T00:21:25.574919Z","shell.execute_reply":"2024-12-25T00:21:25.579626Z"}},"outputs":[],"execution_count":5},{"cell_type":"code","source":"# chat_history = []\n# answer0, chat_history = chat_with_llama('Hi. My name is Smith', [])\n# print(answer0)\n# answer1, chat_history = chat_with_llama('What is my name?', chat_history)\n# print(answer1)\n# answer2, chat_history = chat_with_llama('Can you guess my wife\\'s name?', chat_history)\n# print(answer2)\n# print(chat_history)","metadata":{"trusted":true,"execution":{"execution_failed":"2024-12-23T07:33:54.513Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"api_key='ac2619935e6a25d4bae2890260822fa0379ec7d8726114ff9744a38127bf8525'\nclient = Together(api_key=api_key)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.581779Z","iopub.execute_input":"2024-12-25T00:21:25.582063Z","iopub.status.idle":"2024-12-25T00:21:25.599681Z","shell.execute_reply.started":"2024-12-25T00:21:25.582042Z","shell.execute_reply":"2024-12-25T00:21:25.598921Z"}},"outputs":[],"execution_count":6},{"cell_type":"code","source":"def chat_api(user_input, chat_history):\n messages = []\n for user_content, assist_content in chat_history:\n messages += [\n {\"role\":\"user\", \"content\":user_content},\n {\"role\":\"assistant\", \"content\":assist_content}\n ]\n messages += [{\"role\":\"user\", \"content\":user_input}]\n \n response = client.chat.completions.create(\n model=\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\n messages=messages,\n )\n reply = response.choices[0].message.content\n chat_history.append((user_input, reply))\n return reply, chat_history","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.600510Z","iopub.execute_input":"2024-12-25T00:21:25.600797Z","iopub.status.idle":"2024-12-25T00:21:25.615159Z","shell.execute_reply.started":"2024-12-25T00:21:25.600769Z","shell.execute_reply":"2024-12-25T00:21:25.614457Z"}},"outputs":[],"execution_count":7},{"cell_type":"code","source":"# chat_history = []\n# answer0, chat_history = chat_api('Hi. My name is Smith', [])\n# print(answer0, '\\n-----------------------------------------\\n')\n# answer1, chat_history = chat_api('What is my name?', chat_history)\n# print(answer1, '\\n-----------------------------------------\\n')\n# answer2, chat_history = chat_api('Can you guess my wife\\'s name?', chat_history)\n\n# # Chat Example\n# print(answer2, '\\n-----------------------------------------\\n')\n# print(chat_history)","metadata":{"trusted":true,"execution":{"execution_failed":"2024-12-23T07:33:54.513Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"def tti_api(prompt, num_steps = 25, width = 512, heights = 512):\n response = client.images.generate(\n prompt=prompt,\n model=\"black-forest-labs/FLUX.1-dev\",\n width=width,\n height=heights,\n steps=num_steps,\n n=1,\n response_format=\"b64_json\"\n )\n \n image_data = base64.b64decode(response.data[0].b64_json)\n return image_data","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.615864Z","iopub.execute_input":"2024-12-25T00:21:25.616087Z","iopub.status.idle":"2024-12-25T00:21:25.629523Z","shell.execute_reply.started":"2024-12-25T00:21:25.616067Z","shell.execute_reply":"2024-12-25T00:21:25.628707Z"}},"outputs":[],"execution_count":8},{"cell_type":"code","source":"prompt = 'A nice black lexus 570 car running on the snowy road.'\nimage = tti_api(prompt, num_steps = 25)\nimage = PIL.Image.open(BytesIO(image))\nimage.save('result.png')\nimage.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.630369Z","iopub.execute_input":"2024-12-25T00:21:25.630605Z","iopub.status.idle":"2024-12-25T00:21:27.106671Z","shell.execute_reply.started":"2024-12-25T00:21:25.630586Z","shell.execute_reply":"2024-12-25T00:21:27.105616Z"}},"outputs":[],"execution_count":9},{"cell_type":"code","source":"def ttv(prompt, num_steps = 50):\n # Load the text-to-video model from Hugging Face\n model_id = \"damo-vilab/text-to-video-ms-1.7b\" # ModelScope Text-to-Video model\n #model_id = \"guoyww/animatediff-motion-adapter-v1-5-2\" # ModelScope Text-to-Video \n \n pipe = TextToVideoSDPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant=\"fp16\")\n pipe.to(\"cuda:0\") # Use GPU if available\n \n # Generate video frames\n print(\"Generating video... This may take some time.\")\n with torch.no_grad():\n video_frames = pipe(prompt, num_frames=32, height=256, width=256, num_inference_steps=num_steps).frames[0]\n # Save the generated video\n video_path = export_to_video(video_frames, output_video_path=\"output_video.mp4\")\n return video_path\ntest_video = ttv('An awesome lexus 570 car running on the snowy road, high quality', num_steps = 50)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:27.109018Z","iopub.execute_input":"2024-12-25T00:21:27.109275Z","iopub.status.idle":"2024-12-25T00:22:58.858457Z","shell.execute_reply.started":"2024-12-25T00:21:27.109252Z","shell.execute_reply":"2024-12-25T00:22:58.857305Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"model_index.json: 0%| | 0.00/384 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a0b27eacce0a4d7c98b7046ac0823f3b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Fetching 12 files: 0%| | 0/12 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e950f3d637f544e3bc06117c0757049a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"text_encoder/config.json: 0%| | 0.00/644 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3d672c53342640d0825b8999a03da969"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"scheduler/scheduler_config.json: 0%| | 0.00/465 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a64ba6a5a03f4882a63799c7cd8fca73"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"unet/config.json: 0%| | 0.00/787 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"be96cdd3a8544e3b96e8e5e15d5ffb30"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/tokenizer_config.json: 0%| | 0.00/755 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ceadd9718d0f44ef837ea787213019c4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.fp16.safetensors: 0%| | 0.00/681M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5c50cc639f074f5fbc4f69cc7042ce21"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/vocab.json: 0%| | 0.00/1.06M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6845746769ce42a7baaacdf4122f6ddf"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/merges.txt: 0%| | 0.00/525k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"439b3f3d79434ba097a91c8890e08b55"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/special_tokens_map.json: 0%| | 0.00/460 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8a6c73d18ba34fe3b94ee026735a6cb8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vae/config.json: 0%| | 0.00/657 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"246245042f9f4dcf8c516b1e22098b9d"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.fp16.safetensors: 0%| | 0.00/167M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9ebd3903ed6e4b658f8bac11173bcdaa"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.fp16.safetensors: 0%| | 0.00/2.82G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b1f7d772727a4dc99571bce356ef8f53"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading pipeline components...: 0%| | 0/5 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"baf0f1b569fa43ea99bb417301b3a80f"}},"metadata":{}},{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n warnings.warn(\n","output_type":"stream"},{"name":"stdout","text":"Generating video... This may take some time.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/50 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3248592582c44362aac5fa52be5853e6"}},"metadata":{}},{"name":"stderr","text":"/usr/lib/python3.10/subprocess.py:1796: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n self.pid = _posixsubprocess.fork_exec(\n","output_type":"stream"}],"execution_count":10},{"cell_type":"code","source":"# Ensure the sentence tokenizer is downloaded (if not already)\nnltk.download('punkt')\n\n# Function to convert text to speech and generate SRT content\ndef tts(text):\n # Initialize the Google TTS engine with language (e.g., 'en' for English)\n tts = gTTS(text=text, lang='en', slow=False)\n \n # Save to an audio file\n audio_path = \"output.mp3\"\n tts.save(audio_path)\n \n # Load the audio file with pydub to get the duration\n audio = AudioSegment.from_mp3(audio_path)\n duration_ms = len(audio) # Duration in milliseconds\n \n # Split the text into sentences using NLTK\n sentences = nltk.sent_tokenize(text)\n \n # Estimate the duration per sentence\n chunk_duration_ms = duration_ms // len(sentences) # Estimated duration per sentence\n \n # Generate SRT content\n srt_content = \"\"\n start_time = 0 # Start time of the first subtitle\n for idx, sentence in enumerate(sentences):\n end_time = start_time + chunk_duration_ms\n start_time_formatted = time.strftime('%H:%M:%S', time.gmtime(start_time / 1000)) + ',' + f'{start_time % 1000:03d}'\n end_time_formatted = time.strftime('%H:%M:%S', time.gmtime(end_time / 1000)) + ',' + f'{end_time % 1000:03d}'\n \n srt_content += f\"{idx + 1}\\n\"\n srt_content += f\"{start_time_formatted} --> {end_time_formatted}\\n\"\n srt_content += f\"{sentence}\\n\\n\"\n \n start_time = end_time # Update start time for the next sentence\n \n return audio_path, srt_content","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:22:58.860290Z","iopub.execute_input":"2024-12-25T00:22:58.860668Z","iopub.status.idle":"2024-12-25T00:22:59.006324Z","shell.execute_reply.started":"2024-12-25T00:22:58.860633Z","shell.execute_reply":"2024-12-25T00:22:59.005656Z"}},"outputs":[{"name":"stdout","text":"[nltk_data] Downloading package punkt to /usr/share/nltk_data...\n[nltk_data] Package punkt is already up-to-date!\n","output_type":"stream"}],"execution_count":11},{"cell_type":"code","source":"def tti(prompt, num_steps = 50, width = 512, heights = 512):\n # Load the pre-trained Stable Diffusion pipeline from Hugging Face\n pipe = DiffusionPipeline.from_pretrained(\"stabilityai/stable-diffusion-2-1\")\n #pipe.load_lora_weights(\"FradigmaDangerYT/dalle-e-mini\")\n \n # Move the pipeline to GPU (you can select the GPU with cuda:1 for the second GPU)\n device0 = torch.device(\"cuda:0\") # Use \"cuda:0\" for the first GPU, \"cuda:1\" for the second GPU\n pipe.to(device0)\n print(heights)\n # Generate an image\n image = pipe(prompt, num_inference_steps = num_steps, width = width, height = heights).images[0] # Generate image from the prompt\n return image\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:22:59.007053Z","iopub.execute_input":"2024-12-25T00:22:59.007324Z","iopub.status.idle":"2024-12-25T00:22:59.011814Z","shell.execute_reply.started":"2024-12-25T00:22:59.007302Z","shell.execute_reply":"2024-12-25T00:22:59.010754Z"}},"outputs":[],"execution_count":12},{"cell_type":"code","source":"prompt = 'A nice black lexus 570 car running on the snowy road.'\nimage = tti(prompt, num_steps = 25, width = 320, heights = 240)\n# image = PIL.Image.open(BytesIO(image))\nimage.save('result.png')\nimage.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:22:59.012694Z","iopub.execute_input":"2024-12-25T00:22:59.012942Z","iopub.status.idle":"2024-12-25T00:23:23.213670Z","shell.execute_reply.started":"2024-12-25T00:22:59.012911Z","shell.execute_reply":"2024-12-25T00:23:23.212714Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"model_index.json: 0%| | 0.00/537 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"00315ece5482462094ef53e3ccc0ad69"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Fetching 13 files: 0%| | 0/13 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"624b98d12a21487293ade164658da7d4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"text_encoder/config.json: 0%| | 0.00/633 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7e263911d57945308978e63ad218c197"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/1.36G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1fdce8f0fabd4f92b1a48f1fe144b4ed"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/merges.txt: 0%| | 0.00/525k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ff7371dec38c49118ac6ec9a87e38db3"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"scheduler/scheduler_config.json: 0%| | 0.00/345 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1c11753bf15a4cea8782bea421fc5978"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"(…)ature_extractor/preprocessor_config.json: 0%| | 0.00/342 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"688221b8e00c48598659f4a5fa999b21"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/tokenizer_config.json: 0%| | 0.00/824 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"96f56565644346d2b4ae667ee6cccccd"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/special_tokens_map.json: 0%| | 0.00/460 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"823d1d5a1e8e4330916b66263db32ed5"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/vocab.json: 0%| | 0.00/1.06M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"312ef1679c8a4af7ab15f634c4b67dcd"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"unet/config.json: 0%| | 0.00/939 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6ec60a454ba347958279d82fd21955f7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.safetensors: 0%| | 0.00/3.46G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"09536bea3884449c85a44a2793133c8a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.safetensors: 0%| | 0.00/335M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f4dd6779748945309137f21c1d23bb33"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vae/config.json: 0%| | 0.00/611 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"021d7c9db3024591b1990d3d116abcf4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading pipeline components...: 0%| | 0/6 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3beb2daec308468c894d5ba06e9feacb"}},"metadata":{}},{"name":"stdout","text":"240\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":" 0%| | 0/25 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8a38b7b1101c4325bd50bf5974274d15"}},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"\n# If demo is on, turn off demo\ntry:\n demo.close()\nexcept:\n pass\n\nwith gr.Blocks() as demo:\n gr.Markdown(\"\"\"\n # Gradio based Text-to-Any Project\n \"\"\")\n with gr.Tab(label=\"Llama-Chat\"):\n radios0 = gr.Radio(['use api', 'use loaded model'], value=\"use api\", show_label = False)\n gptDialog = gr.Chatbot(label = \"Llama-Chat\", max_height=512, min_height=512,\n autoscroll= True)\n with gr.Row(equal_height=True):\n prompt0 = gr.Textbox(label = 'Prompt Input', lines = 1, scale = 9, max_lines=2,\n autofocus=True, autoscroll=True, placeholder='Type your message here...')\n with gr.Column(scale = 1):\n generate_btn0 = gr.Button('generate')\n clear_btn0 = gr.Button('clear')\n \n with gr.Tab(label=\"Text-to-Image/Video\"):\n with gr.Row():\n radios1 = gr.Radio(['use api', 'use loaded model'], value=\"use api\", show_label = False)\n steps = gr.Slider(value = 50, minimum = 20, maximum = 100, step = 1, label = 'num_steps')\n width = gr.Slider(value = 1024, minimum = 240, maximum = 1792, step = 16, label = 'width')\n heights = gr.Slider(value = 512, minimum = 160, maximum = 1792, step = 16, label = 'heights')\n \n with gr.Row():\n outputImg = gr.Image(type='pil',height= 512, width=512, label=\"Output Image\", interactive=False)\n outputVideo = gr.Video(width=512, height=512, label = \"Output Video\", interactive=False)\n with gr.Row(equal_height=True):\n prompt1 = gr.Textbox(label = 'Prompt Input', lines = 1, scale = 9, max_lines=2,\n autofocus=True, autoscroll=True, placeholder='Type your message here...')\n with gr.Column(scale = 1):\n generate_btn1 = gr.Button('generate image')\n generate_btn11 = gr.Button('generate video')\n\n with gr.Tab(label = \"Text-to-Speech\"):\n outputAudio = gr.Audio(label=\"Audio Output\", interactive = False)\n outputSrt = gr.Textbox(label = 'Script Output', lines = 10, max_lines = 5, placeholder = 'Script output here')\n with gr.Row(equal_height=False):\n prompt2 = gr.Textbox(label = 'Prompt Input', lines = 5, scale = 9, max_lines=5,\n autofocus=True, autoscroll=True, placeholder='Type your message here...')\n with gr.Column(scale = 1):\n generate_btn2 = gr.Button('generate')\n clear_btn2 = gr.Button('clear')\n\n with gr.Tab(label = 'About'):\n pass\n\n def generate_txt(prompt, check, history):\n if check == 'use api':\n response, history = chat_api(prompt, history)\n if response == None:\n gr.Warning('Can not reach api.')\n else:\n response, history = chat_with_llama(prompt, history)\n if response == None:\n gr.Warning('Failed to load model.')\n return '', history\n \n def clear_chat():\n history = []\n gr.Info('Cleaned successfully!')\n return history\n\n def generate_img(prompt, check, num_steps, width, heights):\n if check == 'use api':\n image = tti_api(prompt, num_steps = num_steps, width = width, heights = heights)\n image = PIL.Image.open(BytesIO(image))\n if not image:\n gr.Warning('Can not reach api')\n gr.Info('Generated Image Successfully!')\n else:\n image = tti(prompt, num_steps = num_steps, width = width, heights = heights)\n gr.Info('Generated Image Successfully!')\n return image\n \n def generate_video(prompt, num_steps):\n video = ttv(prompt, num_steps)\n gr.Info('Generated Video Successfully!')\n return video\n \n def generate_speech(prompt):\n audio, script = tts(prompt)\n gr.Info('Generated Speech Successfully!') \n return audio, script\n \n def clear_speech():\n gr.Info('Cleaned Successfully!')\n return None, ''\n \n prompt0.submit(generate_txt, [prompt0, radios0, gptDialog], [prompt0, gptDialog])\n prompt1.submit(generate_img, [prompt1, radios1], [outputImg])\n\n # generate button click event\n generate_btn0.click(generate_txt, [prompt0, radios0, gptDialog], [prompt0, gptDialog])\n generate_btn1.click(generate_img, [prompt1, radios1, steps, width, heights], [outputImg])\n generate_btn11.click(generate_video, [prompt1, steps], [outputVideo])\n generate_btn2.click(generate_speech, [prompt2], [outputAudio, outputSrt])\n \n # clear button click event\n clear_btn0.click(clear_chat, [], [gptDialog])\n clear_btn2.click(clear_speech, [], [outputAudio, outputSrt])\ndemo.launch()\n ","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:23:23.214982Z","iopub.execute_input":"2024-12-25T00:23:23.215347Z","iopub.status.idle":"2024-12-25T00:23:24.396633Z","shell.execute_reply.started":"2024-12-25T00:23:23.215306Z","shell.execute_reply":"2024-12-25T00:23:24.395829Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/gradio/components/chatbot.py:242: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.\n warnings.warn(\n/usr/local/lib/python3.10/dist-packages/gradio/utils.py:1003: UserWarning: Expected 5 arguments for function <function generate_img at 0x799439ab0dc0>, received 2.\n warnings.warn(\n/usr/local/lib/python3.10/dist-packages/gradio/utils.py:1007: UserWarning: Expected at least 5 arguments for function <function generate_img at 0x799439ab0dc0>, received 2.\n warnings.warn(\n","output_type":"stream"},{"name":"stdout","text":"* Running on local URL: http://127.0.0.1:7860\nKaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n\n* Running on public URL: https://d2c6c018093abcee72.gradio.live\n\nThis share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"<div><iframe src=\"https://d2c6c018093abcee72.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"},"metadata":{}},{"execution_count":14,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}],"execution_count":14},{"cell_type":"code","source":"demo.close()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T08:38:27.489912Z","iopub.execute_input":"2024-12-23T08:38:27.490274Z","iopub.status.idle":"2024-12-23T08:38:27.609053Z","shell.execute_reply.started":"2024-12-23T08:38:27.490243Z","shell.execute_reply":"2024-12-23T08:38:27.607832Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}