Spaces:

Abrahamau
/

gradiotest

Running

File size: 4,281 Bytes

cf49c4b
 
 
801ca00
33da63d
5a9f58c
cf49c4b
 
 
 
 
fdd85c1
27c643e
801ca00
fdd85c1
a1ff6d2
cf49c4b
 
5d7c9cd
cf49c4b
801ca00
fdd85c1
5614a83
313d70a
 
 
 
2997b62
9f9ad3d
823e565
27c643e
78384ac
8392bfa
 
ba6a087
81ac3b6
33da63d
6aab036
33da63d
2997b62
fdd85c1
abc4ec6
e8f96e7
abc4ec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c34b1da
b1c01e9
 
b47e444
b1c01e9
a948a63
b1c01e9
c34b1da
8fb404e
c3f5319
cf49c4b
b3fa9dd
e02c7a0
 
 
8fb404e
c3f5319
313d70a
b3fa9dd
c3f5319
 
69c8b67
 
a804c80
a1fdb32
5a9f58c
2c1af14
d20b560
a1fdb32
 
 
823e565
 
cc0255f
b35d9ab
 
823e565
cc0255f
b35d9ab
ba6a087
7ea039d
abc4ec6
 
7ea039d
abc4ec6
 
 
2749bcc
367b09f
b1c01e9
c34b1da
2749bcc
 
 
 
b1c01e9
367b09f
2749bcc
 
 
e02c7a0

import torch
import os
import random
import gradio as gr
from TTS.api import TTS
from transformers import pipeline
import base64
from datasets import load_dataset
from diffusers import DiffusionPipeline
from huggingface_hub import login
import numpy as np
import spaces
import time

@spaces.GPU
def guessanImage(model, image):
    imgclassifier  = pipeline("image-classification", model=model)
    if image is not None:  
        description = imgclassifier(image)
    return description

@spaces.GPU
def guessanAge(model, image):
    imgclassifier  = pipeline("image-classification", model=model)
    if image is not None:  
        description = imgclassifier(image)
    return description    

@spaces.GPU(duration=120)
def text2speech(text, no0, sample):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    os.environ["COQUI_TOS_AGREED"] = "1"
    if sample is None:
        sample = "sampleaudio/abraham.wav"
    if len(text) > 0:
        epoch_time = str(int(time.time()))
        tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
        wav = tts.tts_to_file(text=text, file_path="output-"+epoch_time+".wav", speaker_wav=sample, language="en")
        return wav

@spaces.GPU
def ImageGenFromText(text, model):
    api_key = os.getenv("fluxauth")
    login(token=api_key)
    
    if len(text) > 0:
        dtype = torch.bfloat16
        device = "cuda" if torch.cuda.is_available() else "cpu"
        MAX_SEED = np.iinfo(np.int32).max
        seed = random.randint(0, MAX_SEED)
        pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
        generator = torch.Generator().manual_seed(seed)
        image = pipe(
                prompt = text, 
                width = 512,
                height = 512,
                num_inference_steps = 4, 
                generator = generator,
                guidance_scale=0.0
        ).images[0]
        print(image)
        return image

@spaces.GPU
def RunLegalModel(text, model):
    pettyfogger  = pipeline("text-generation", model=model)
    if text is not None:  
        shoddyadvice = pettyfogger(text)
    print(shoddyadvice)
    return shoddyadvice[0]['generated_text']
    
radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
tab1 = gr.Interface(
    fn=guessanImage,
    inputs=[radio1, gr.Image(type="pil")],
    outputs=["text"],
)

radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
tab2 = gr.Interface(
    fn=guessanAge,
    inputs=[radio2, gr.Image(type="pil")],
    outputs=["text"],
)


textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!", label="Type text to convert to your voice:")
sampletext = gr.HTML("""
<h3>If you do not sample your voice my voice will be used as input:<h3>
 <audio controls>
    <source src="https://huggingface.co./spaces/Abrahamau/gradiotest/resolve/main/sampleaudio/abraham.wav" type="audio/wav">
    Your browser does not support the audio element.
</audio>
""")

micinput = gr.Audio(sources=['microphone'], type="filepath", format="wav", label="Please Provide a Sample Voice for the Model to Mimic")
outaudio = gr.Audio(show_download_button=True, show_share_button=True)
tab3 = gr.Interface(
    fn=text2speech,
    inputs=[textbox, sampletext, micinput],
    outputs=[outaudio],
)

radio4 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
tab4 = gr.Interface(
    fn=ImageGenFromText,
    inputs=["text", radio4],
    outputs=["image"],
)


classifiertypes = ["umarbutler/open-australian-legal-llm"]
radio5 = gr.Radio(classifiertypes, value="umarbutler/open-australian-legal-llm", label="Select", info="Legal Model")
textinput5 = gr.Textbox(value="Under the purposes of Part 6 Division 2 of the Act, regulations may confer power on an applicant for")


tab5 = gr.Interface(
    fn=RunLegalModel,
    inputs=[textinput5, radio5],
    outputs=["text"],
)

demo = gr.TabbedInterface([tab1, tab2, tab3, tab4, tab5], ["Describe", "Estimage Age", "Speak", "Generate Image", "Aus. Legal"])
demo.launch()