import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from gtts import gTTS import io from PIL import Image # Install PyTorch try: import torch except ImportError: st.warning("PyTorch is not installed. Installing PyTorch...") import subprocess subprocess.run(["pip", "install", "torch"]) st.success("PyTorch has been successfully installed!") import torch # Install Optimum library try: import optimum except ImportError: st.warning("Optimum library is not installed. Installing optimum...") import subprocess subprocess.run(["pip", "install", "optimum"]) st.success("Optimum library has been successfully installed!") import optimum # Load the image captioning model caption_model = pipeline("image-to-text", model="unography/blip-large-long-cap") story_generator = pipeline("text-generation", model="TheBloke/storytime-13B-GPTQ") def generate_caption(image): # Generate the caption for the uploaded image caption = caption_model(image)[0]["generated_text"] return caption def generate_story(caption): # Generate the story based on the caption using the GPT-2 model prompt = f"Write a short, simple children's story inspired by the image of {caption}. Here's the story:\n\n" story = story_generator(prompt, max_length=500, num_return_sequences=1)[0]["generated_text"] # Extract the story text from the generated output story = story.strip() # Post-process the story (example: remove inappropriate words) inappropriate_words = ["violence", "horror", "scary", "adult", "death", "gun", "shoot"] for word in inappropriate_words: story = story.replace(word, "") # Limit the story to approximately 100 words words = story.split() if len(words) > 100: story = " ".join(words[:100]) + "..." return story def convert_to_audio(story): # Convert the story to audio using gTTS tts = gTTS(text=story, lang="en") audio_bytes = io.BytesIO() tts.write_to_fp(audio_bytes) audio_bytes.seek(0) return audio_bytes def main(): st.title("Storytelling Application") # File uploader for the image (restricted to JPG) uploaded_image = st.file_uploader("Upload an image", type=["jpg"]) if uploaded_image is not None: # Convert the uploaded image to PIL image image = Image.open(uploaded_image) # Display the uploaded image st.image(image, caption="Uploaded Image", use_container_width=True) # Generate the caption for the image caption = generate_caption(image) st.subheader("Generated Caption:") st.write(caption) # Generate the story based on the caption using the GPT-2 model story = generate_story(caption) st.subheader("Generated Story:") st.write(story) # Convert the story to audio audio_bytes = convert_to_audio(story) # Display the audio player st.audio(audio_bytes, format="audio/mp3") if __name__ == "__main__": main()