File size: 1,125 Bytes
5ddcfe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""
Hugging Face Data Upload Script

Purpose:
This script uploads a local folder to a Hugging Face dataset repository. It's designed to
update or create a dataset on the Hugging Face Hub by uploading the contents of a specified
local folder.

Usage:
- Run the script: python data/scraping_scripts/upload_dbs_to_hf.py

The script will:
- Upload the contents of the 'data' folder to the specified Hugging Face dataset repository.
- https://huggingface.co./datasets/towardsai-buster/ai-tutor-vector-db

Configuration:
- The script is set to upload to the "towardsai-buster/test-data" dataset repository. 
- It deletes all existing files in the repository before uploading (due to delete_patterns=["*"]).
"""

import os

from dotenv import load_dotenv
from huggingface_hub import HfApi

load_dotenv()

api = HfApi(token=os.getenv("HF_TOKEN"))

api.upload_folder(
    folder_path="data",
    repo_id="towardsai-tutors/ai-tutor-vector-db",
    repo_type="dataset",
    # multi_commits=True,
    # multi_commits_verbose=True,
    delete_patterns=["*"],
    ignore_patterns=["*.jsonl", "*.py", "*.txt", "*.ipynb", "*.md", "*.pyc"],
)