Felguk commited on
Commit
28def44
·
verified ·
1 Parent(s): 153b6ee

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Load the JinaAI ReaderLM-v2 model
5
+ model_name = "jinaai/ReaderLM-v2"
6
+ html_converter = pipeline("text-generation", model=model_name)
7
+
8
+ # Function to convert HTML to Markdown or JSON
9
+ def convert_html(html_input, output_format):
10
+ # Prepare the prompt for the model
11
+ prompt = f"Convert the following HTML into {output_format}:\n\n{html_input}"
12
+
13
+ # Generate the output using the model
14
+ response = html_converter(prompt, max_length=500, num_return_sequences=1)
15
+ converted_output = response[0]['generated_text']
16
+
17
+ # Extract the relevant part of the output (remove the prompt)
18
+ converted_output = converted_output.replace(prompt, "").strip()
19
+ return converted_output
20
+
21
+ # Gradio Interface with NoCrypt/miku theme
22
+ interface = gr.Interface(
23
+ fn=convert_html,
24
+ inputs=[
25
+ gr.Textbox(lines=10, placeholder="Paste your raw HTML here...", label="Raw HTML Input"),
26
+ gr.Radio(["Markdown", "JSON"], label="Output Format", value="Markdown")
27
+ ],
28
+ outputs=gr.Textbox(lines=10, label="Converted Output"),
29
+ title="HTML to Markdown/JSON Converter",
30
+ description="Convert raw HTML into beautifully formatted Markdown or JSON using JinaAI ReaderLM-v2.",
31
+ theme="NoCrypt/miku", # Apply the NoCrypt/miku theme
32
+ examples=[
33
+ ["<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>", "Markdown"],
34
+ ["<ul><li>Item 1</li><li>Item 2</li></ul>", "JSON"]
35
+ ]
36
+ )
37
+
38
+ # Launch the interface
39
+ interface.launch()