File size: 11,633 Bytes
dd4ae81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
    "Azure": {
        "source_connector": "AzureBlobStorage",
        "imports": "from unstructured_ingest.v2.processes.connectors.azure import (AzureBlobStorageIndexerConfig, AzureBlobStorageDownloaderConfig, AzureBlobStorageConnectionConfig)",
        "configs": "indexer_config=AzureBlobStorageIndexerConfig(container_name=os.getenv(\"AZURE_CONTAINER_NAME\")),\ndownloader_config=AzureBlobStorageDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=AzureBlobStorageConnectionConfig(\n    connection_string=os.getenv(\"AZURE_CONNECTION_STRING\")\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/astradb"
    },
    "Box": {
        "source_connector": "Box",
        "imports": "from unstructured_ingest.v2.processes.connectors.fsspec.box import (BoxAccessConfig, BoxConnectionConfig, BoxIndexerConfig, BoxDownloaderConfig)",
        "configs": "indexer_config=BoxIndexerConfig(remote_url=os.getenv(\"BOX_REMOTE_URL\")),\n        downloader_config=BoxDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=BoxConnectionConfig(\n    access_config=BoxAccessConfig(\n        box_app_config=os.getenv(\"BOX_APP_CONFIG_PATH\")\n    )\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/box"
    },
    "Couchbase": {
        "source_connector": "Couchbase",
        "imports": "from unstructured_ingest.v2.processes.connectors.couchbase import (CouchbaseIndexerConfig, CouchbaseDownloaderConfig, CouchbaseConnectionConfig, CouchbaseAccessConfig)",
        "configs": "indexer_config=CouchbaseIndexerConfig(batch_size=100),\n        downloader_config=CouchbaseDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=CouchbaseConnectionConfig(\n    access_config=CouchbaseAccessConfig(\n        password=os.getenv(\"CB_PASSWORD\")\n    ),\n    username=os.getenv(\"CB_USERNAME\"),\n    connection_string=os.getenv(\"CB_CONN_STR\"),\n    bucket=os.getenv(\"CB_BUCKET\"),\n    scope=os.getenv(\"CB_SCOPE\"),\n    collection=os.getenv(\"CB_COLLECTION\")\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/couchbase"
    },
    "Dropbox": {
        "source_connector": "Dropbox",
        "imports": "from unstructured_ingest.v2.processes.connectors.fsspec.dropbox import (DropboxIndexerConfig, DropboxDownloaderConfig, DropboxAccessConfig, DropboxConnectionConfig)",
        "configs": "indexer_config=DropboxIndexerConfig(remote_url=os.getenv(\"DROPBOX_REMOTE_URL\")),\ndownloader_config=DropboxDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=DropboxConnectionConfig(\n    access_config=DropboxAccessConfig(\n        token=os.getenv(\"DROPBOX_ACCESS_TOKEN\")\n    )\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/dropbox"
    },
    "Elasticsearch": {
        "source_connector": "Elasticsearch",
        "imports": "from unstructured_ingest.v2.processes.connectors.elasticsearch import (ElasticsearchIndexerConfig, ElasticsearchDownloaderConfig, ElasticsearchConnectionConfig, ElasticsearchAccessConfig, ElasticsearchUploadStagerConfig, ElasticsearchUploaderConfig)",
        "configs": "indexer_config=ElasticsearchIndexerConfig(index_name=os.getenv(\"ELASTICSEARCH_INDEX_NAME\")),\ndownloader_config=ElasticsearchDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=ElasticsearchConnectionConfig(\n    access_config=ElasticsearchAccessConfig(\n        password=os.getenv(\"ELASTICSEARCH_PASSWORD\"),\n        ssl_assert_fingerprint=os.getenv(\"ELASTICSEARCH_SSL_ASSERT_FINGERPRINT\")\n    ),\n    hosts=[os.getenv(\"ELASTICSEARCH_HOST\")],\n    username=os.getenv(\"ELASTICSEARCH_USERNAME\"),\n    ca_certs=os.getenv(\"ELASTICSEARCH_CA_CERTS\")\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/elastic-search"
    },
    "Google Cloud Storage": {
        "source_connector": "Google Cloud Storage",
        "imports": "from unstructured_ingest.v2.processes.connectors.gcs import (GCSIndexerConfig, GCSDownloaderConfig, GCSConnectionConfig)",
        "configs": "indexer_config=GCSIndexerConfig(bucket_name=os.getenv(\"GCS_BUCKET_NAME\")),\ndownloader_config=GCSDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=GCSConnectionConfig(\n    project_id=os.getenv(\"GCS_PROJECT_ID\"),\n    credentials_path=os.getenv(\"GCS_CREDENTIALS_PATH\")\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/google-cloud-storage"
    },
    "Google Drive": {
        "source_connector": "Google Drive",
        "imports": "from unstructured_ingest.v2.processes.connectors.google_drive import (GoogleDriveConnectionConfig, GoogleDriveAccessConfig, GoogleDriveIndexerConfig, GoogleDriveDownloaderConfig)",
        "configs": "indexer_config=GoogleDriveIndexerConfig(),\ndownloader_config=GoogleDriveDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=GoogleDriveConnectionConfig(\n    access_config=GoogleDriveAccessConfig(\n        service_account_key_path=os.getenv(\"GCP_SERVICE_ACCOUNT_KEY_FILEPATH\"), # Or\n        service_account_key=os.getenv(\"GCP_SERVICE_ACCOUNT_KEY_STRING\")\n        ),\n    drive_id=os.getenv(\"GOOGLE_DRIVE_FOLDER_ID\"),),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/google-drive"
    },
    "Local directory": {
        "source_connector": "Local directory",
        "imports": "from unstructured_ingest.v2.processes.connectors.local import (LocalIndexerConfig, LocalDownloaderConfig, LocalConnectionConfig, LocalUploaderConfig)",
        "configs": "indexer_config=LocalIndexerConfig(input_path=os.getenv(\"LOCAL_FILE_INPUT_DIR\")),\ndownloader_config=LocalDownloaderConfig(),\nsource_connection_config=LocalConnectionConfig(),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/local"
    },
    "OneDrive": {
        "source_connector": "OneDrive",
        "imports": "from unstructured_ingest.v2.processes.connectors.onedrive import (OnedriveIndexerConfig, OnedriveDownloaderConfig, OnedriveConnectionConfig, OnedriveAccessConfig)",
        "configs": "indexer_config=OnedriveIndexerConfig(path=os.getenv(\"ONEDRIVE_PATH\")),\ndownloader_config=OnedriveDownloaderConfig(\n    download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")\n),\nsource_connection_config=OnedriveConnectionConfig(\n    access_config=OnedriveAccessConfig(\n        client_cred=os.getenv(\"ONEDRIVE_CLIENT_CRED\")\n    ),\n    client_id=os.getenv(\"ONEDRIVE_CLIENT_ID\"),\n    tenant=os.getenv(\"ONEDRIVE_TENANT\"),\n    user_pname=os.getenv(\"ONEDRIVE_USER_PNAME\"),\n    authority_url=os.getenv(\"ONEDRIVE_AUTHORITY_URL\")\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/one-drive"
    },
    "OpenSearch": {
        "source_connector": "OpenSearch",
        "imports": "from unstructured_ingest.v2.processes.connectors.opensearch import (OpensearchIndexerConfig, OpensearchDownloaderConfig, OpenSearchConnectionConfig, OpenSearchAccessConfig)",
        "configs": "indexer_config=OpensearchIndexerConfig(index_name=os.getenv(\"OPENSEARCH_INDEX_NAME\")),\ndownloader_config=OpensearchDownloaderConfig(\n    download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\"),\n    fields=[\"director\", \"plot\"]\n),\nsource_connection_config=OpenSearchConnectionConfig(\n    access_config=OpenSearchAccessConfig(\n        password=os.getenv(\"OPENSEARCH_PASSWORD\"),\n        use_ssl=True\n    ),\n    hosts=[os.getenv(\"OPENSEARCH_HOST\")],\n    username=os.getenv(\"OPENSEARCH_USERNAME\")\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/opensearch"
    },
    "S3": {
        "source_connector": "S3",
        "imports": "from unstructured_ingest.v2.processes.connectors.fsspec.s3 import (S3IndexerConfig, S3DownloaderConfig, S3ConnectionConfig, S3AccessConfig)",
        "configs": "indexer_config=S3IndexerConfig(remote_url=os.getenv(\"AWS_S3_URL\")),\ndownloader_config=S3DownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=S3ConnectionConfig(\n    access_config=S3AccessConfig(\n        key=os.getenv(\"AWS_ACCESS_KEY_ID\"),\n        secret=os.getenv(\"AWS_SECRET_ACCESS_KEY\")\n    )\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/s3"
    },
    "Salesforce": {
        "source_connector": "Salesforce",
        "imports": "from unstructured_ingest.v2.processes.connectors.salesforce import (SalesforceIndexerConfig, SalesforceDownloaderConfig, SalesforceConnectionConfig, SalesforceAccessConfig)",
        "configs": "indexer_config=SalesforceIndexerConfig(categories=[\"EmailMessage\", \"Account\", \"Lead\", \"Case\", \"Campaign\"]),\ndownloader_config=SalesforceDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=SalesforceConnectionConfig(\n    access_config=SalesforceAccessConfig(\n        consumer_key=os.getenv(\"SALESFORCE_CONSUMER_KEY\"),\n        private_key=os.getenv(\"SALESFORCE_PRIVATE_KEY\")\n    ),\n    username=os.getenv(\"SALESFORCE_USERNAME\")\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/salesforce"
    },
    "SFTP": {
        "source_connector": "SFTP",
        "imports": "from unstructured_ingest.v2.processes.connectors.fsspec.sftp import (SftpIndexerConfig, SftpDownloaderConfig, SftpConnectionConfig, SftpAccessConfig)",
        "configs": "indexer_config=SftpIndexerConfig(remote_url=os.getenv(\"SFTP_REMOTE_URL\")),\ndownloader_config=SftpDownloaderConfig(\n    download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\"),\n    remote_url=os.getenv(\"SFTP_REMOTE_URL\")\n),\nsource_connection_config=SftpConnectionConfig(\n    access_config=SftpAccessConfig(password=os.getenv(\"SFTP_PASSWORD\")),\n    host=os.getenv(\"SFTP_HOST\"),\n    port=os.getenv(\"SFTP_PORT\"),\n    username=os.getenv(\"SFTP_USERNAME\"),\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/sftp"
    },
    "Sharepoint": {
        "source_connector": "Sharepoint",
        "imports": "from unstructured_ingest.v2.processes.connectors.sharepoint import (SharepointIndexerConfig, SharepointDownloaderConfig, SharepointConnectionConfig, SharepointAccessConfig, SharepointPermissionsConfig)",
        "configs": "indexer_config=SharepointIndexerConfig(\n    path=os.getenv(\"SHAREPOINT_PATH\"),\n    recursive=False,\n    omit_lists=True,\n    omit_pages=True,\n    omit_files=False\n),\ndownloader_config=SharepointDownloaderConfig(download_dir=os.getenv(\"LOCAL_FILE_DOWNLOAD_DIR\")),\nsource_connection_config=SharepointConnectionConfig(\n    access_config=SharepointAccessConfig(client_cred=os.getenv(\"SHAREPOINT_APP_CLIENT_SECRET\")),\n    client_id=os.getenv(\"SHAREPOINT_APP_CLIENT_ID\"),\n    site=os.getenv(\"SHAREPOINT_SITE\"),\n    permissions_config=SharepointPermissionsConfig(\n        permissions_application_id=os.getenv(\"SHAREPOINT_APP_PERMISSIONS_CLIENT_ID\"),\n        permissions_tenant=os.getenv(\"SHAREPOINT_APP_TENANT_ID\"),\n        permissions_client_cred=os.getenv(\"SHAREPOINT_APP_PERMISSIONS_CLIENT_SECRET\"),\n        authority_url=\"https://login.microsoftonline.com\"\n    )\n),",
        "docs": "https://docs.unstructured.io/api-reference/ingest/source-connectors/sharepoint"
    }
}