import gradio as gr import json import os # Load source_connectors and destination_connectors from JSON files with open('source_connectors.json', 'r') as f: source_connectors = json.load(f) with open('destination_connectors.json', 'r') as f: destination_connectors = json.load(f) def generate_documentation_link(source, destination): return f"[{source['source_connector']} documentation]({source['docs']}) | [{destination['destination_connector']} documentation]({destination['docs']})" def generate_code(source, destination, chunking, embedding): source_connector = source_connectors[source] destination_connector = destination_connectors[destination] # Ensure proper indentation for source and destination configs indented_source_configs = '\n'.join( ' ' + line for line in source_connector['configs'].strip().split('\n')) indented_destination_configs = '\n'.join( ' ' + line for line in destination_connector['configs'].strip().split('\n')) code = f''' import os from unstructured_ingest.v2.pipeline.pipeline import Pipeline from unstructured_ingest.v2.interfaces import ProcessorConfig from unstructured_ingest.v2.processes.partitioner import PartitionerConfig {source_connector['imports']} {destination_connector['imports']} from unstructured_ingest.v2.processes.chunker import ChunkerConfig from unstructured_ingest.v2.processes.embedder import EmbedderConfig if __name__ == "__main__": Pipeline.from_configs( context=ProcessorConfig(), {indented_source_configs} partitioner_config=PartitionerConfig( partition_by_api=True, api_key=os.getenv("UNSTRUCTURED_API_KEY"), partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"), strategy="hi_res", ), {'chunker_config=ChunkerConfig(chunking_strategy="by_title"),' if chunking else '# Chunking is disabled'} {'embedder_config=EmbedderConfig(embedding_provider="' + embedding + '")' if embedding else '# Embedding is disabled'} {indented_destination_configs} ).run() ''' doc_link = generate_documentation_link(source_connector, destination_connector) return code, doc_link demo = gr.Interface( fn=generate_code, inputs=[ gr.Dropdown(list(source_connectors.keys()), label="Get unstructured documents from:"), gr.Dropdown(list(destination_connectors.keys()), label="Upload RAG-ready documents to:"), gr.Checkbox(label="Check to enable chunking"), gr.Dropdown(["langchain-openai", "langchain-huggingface"], label="Embedding provider:") ], outputs=[ gr.Code(language="python", label="Generated Code"), gr.Markdown(label="Documentation Links") ], title="Unstructured-Ingest Code Generator", description="Generate code for the unstructured-ingest library based on your inputs.") demo.launch()