File size: 1,648 Bytes
03c0888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from crawl4ai.llmtxt import AsyncLLMTextManager  # Changed to AsyncLLMTextManager
from crawl4ai.async_logger import AsyncLogger
from pathlib import Path
import asyncio

async def main():
    current_file = Path(__file__).resolve()
    # base_dir = current_file.parent.parent / "local/_docs/llm.txt/test_docs"
    base_dir = current_file.parent.parent / "local/_docs/llm.txt"
    docs_dir = base_dir
    
    # Create directory if it doesn't exist
    docs_dir.mkdir(parents=True, exist_ok=True)
   
    # Initialize logger
    logger = AsyncLogger()
    # Updated initialization with default batching params
    # manager = AsyncLLMTextManager(docs_dir, logger, max_concurrent_calls=3, batch_size=2)
    manager = AsyncLLMTextManager(docs_dir, logger,  batch_size=2)

    # Let's first check what files we have
    print("\nAvailable files:")
    for f in docs_dir.glob("*.md"):
        print(f"- {f.name}")

    # Generate index files
    print("\nGenerating index files...")
    await manager.generate_index_files(
        force_generate_facts=False,
        clear_bm25_cache=False
    )

    # Test some relevant queries about Crawl4AI
    test_queries = [
        "How is using the `arun_many` method?",
    ]

    print("\nTesting search functionality:")
    for query in test_queries:
        print(f"\nQuery: {query}")
        results = manager.search(query, top_k=2)
        print(f"Results length: {len(results)} characters")
        if results:
            print("First 200 chars of results:", results[:200].replace('\n', ' '), "...")
        else:
            print("No results found")

if __name__ == "__main__":
    asyncio.run(main())