{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install Dependencies\n", "!pip install gradio>=3.0 transformers>=4.25.0 torch>=1.12.0" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import Libraries\n", "import gradio as gr\n", "from transformers import pipeline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the JinaAI ReaderLM-v2 Model\n", "model_name = \"jinaai/ReaderLM-v2\"\n", "html_converter = pipeline(\"text-generation\", model=model_name)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Function to Convert HTML to Markdown or JSON\n", "def convert_html(html_input, output_format):\n", " # Prepare the prompt for the model\n", " prompt = f\"Convert the following HTML into {output_format}:\\n\\n{html_input}\"\n", " \n", " # Generate the output using the model\n", " response = html_converter(prompt, max_length=500, num_return_sequences=1)\n", " converted_output = response[0]['generated_text']\n", " \n", " # Extract the relevant part of the output (remove the prompt)\n", " converted_output = converted_output.replace(prompt, \"\").strip()\n", " return converted_output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create the Gradio Interface\n", "iface = gr.Interface(\n", " fn=convert_html, # Function to call\n", " inputs=[\n", " gr.Textbox(lines=10, placeholder=\"Paste your raw HTML here...\", label=\"Raw HTML Input\"),\n", " gr.Radio([\"Markdown\", \"JSON\"], label=\"Output Format\", value=\"Markdown\")\n", " ],\n", " outputs=gr.Textbox(lines=10, label=\"Converted Output\"),\n", " title=\"HTML to Markdown/JSON Converter\",\n", " description=\"Convert raw HTML into beautifully formatted Markdown or JSON using JinaAI ReaderLM-v2.\",\n", " theme=\"NoCrypt/miku\", # Apply the NoCrypt/miku theme\n", " examples=[\n", " [\"

Hello World

This is a test.

\", \"Markdown\"],\n", " [\"\", \"JSON\"]\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Launch the Interface\n", "iface.launch(inline=True) # Embed the interface in the notebook" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 5 }