import gradio as gr from transformers import pipeline # Load the JinaAI ReaderLM-v2 model model_name = "jinaai/ReaderLM-v2" html_converter = pipeline("text-generation", model=model_name) # Function to convert HTML to Markdown or JSON def convert_html(html_input, output_format): # Prepare the prompt for the model prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}" # Generate the output using the model response = html_converter(prompt, max_length=500, num_return_sequences=1) converted_output = response[0]['generated_text'] # Extract the relevant part of the output (remove the prompt) converted_output = converted_output.replace(prompt, "").strip() return converted_output # Gradio Interface with NoCrypt/miku theme interface = gr.Interface( fn=convert_html, inputs=[ gr.Textbox(lines=10, placeholder="Paste your raw HTML here...", label="Raw HTML Input"), gr.Radio(["Markdown", "JSON"], label="Output Format", value="Markdown") ], outputs=gr.Textbox(lines=10, label="Converted Output"), title="HTML to Markdown/JSON Converter", description="Convert raw HTML into beautifully formatted Markdown or JSON using JinaAI ReaderLM-v2.", theme="NoCrypt/miku", # Apply the NoCrypt/miku theme examples=[ ["
This is a test.
", "Markdown"], ["