gradiotest / app.py
Abrahamau's picture
Update app.py
5a9f58c verified
raw
history blame
3.55 kB
import torch
import os
import random
import gradio as gr
from TTS.api import TTS
from transformers import pipeline
import base64
from datasets import load_dataset
from diffusers import DiffusionPipeline
from huggingface_hub import login
import numpy as np
import spaces
import time
@spaces.GPU
def guessanImage(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU
def guessanAge(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU(duration=120)
def text2speech(text, no, sample):
device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["COQUI_TOS_AGREED"] = "1"
if sample is None:
sample = "sampleaudio/abraham.wav"
if len(text) > 0:
epoch_time = str(int(time.time()))
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
wav = tts.tts_to_file(text=text, file_path="output-"+epoch_time+".wav", speaker_wav=sample, language="en")
return wav
@spaces.GPU
def ImageGenFromText(text, model):
api_key = os.getenv("fluxauth")
login(token=api_key)
if len(text) > 0:
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
seed = random.randint(0, MAX_SEED)
pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt = text,
width = 512,
height = 512,
num_inference_steps = 4,
generator = generator,
guidance_scale=0.0
).images[0]
print(image)
return image
radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
tab1 = gr.Interface(
fn=guessanImage,
inputs=[radio1, gr.Image(type="pil")],
outputs=["text"],
)
radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
tab2 = gr.Interface(
fn=guessanAge,
inputs=[radio2, gr.Image(type="pil")],
outputs=["text"],
)
textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!", label="Type text to convert to your voice:")
sampletext = gr.HTML("""
<h3>If you do not sample your voice my voice will be used as input:<h3>
<audio controls autoplay>
<source src="https://huggingface.co./spaces/Abrahamau/gradiotest/resolve/main/sampleaudio/abraham.wav" type="audio/wav">
Your browser does not support the audio element.
</audio>
""")
micinput = gr.Audio(sources=['microphone'], type="filepath", format="wav")
outaudio = gr.Audio(show_download_button=True, show_share_button=True)
tab3 = gr.Interface(
fn=text2speech,
inputs=[textbox, sampletext, micinput],
outputs=[outaudio],
)
radio4 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
tab4 = gr.Interface(
fn=ImageGenFromText,
inputs=["text", radio4],
outputs=["image"],
)
demo = gr.TabbedInterface([tab1, tab2, tab3, tab4], ["Describe", "Estimage Age", "Speak", "Generate Image"])
demo.launch()