Spaces:

Alialhajri-Chatbot
/

Alialhajri-Chat-PPG

Sleeping

Alialhajri-Chat-PPG / src /utils /utils.py

umairahmad89

initial commit

67a91b0 3 months ago

1.78 kB

	import os
	import requests
	from typing import List, Dict, Union
	import time
	import mimetypes
	from langchain_core.documents.base import Document


	def parse_file(api_key, file_path) -> str:
	headers = {"Authorization": f"Bearer {api_key}"}
	base_url = "https://api.cloud.llamaindex.ai/api/parsing"

	with open(file_path, "rb") as f:
	mime_type = mimetypes.guess_type(file_path)[0]
	files = {"file": (f.name, f, mime_type)}

	# send the request, upload the file
	url = f"{base_url}/upload"
	response = requests.post(url, headers=headers, files=files)

	response.raise_for_status()
	# get the job id for the result_url
	job_id = response.json()["id"]
	result_type = "text" # or "markdown"
	result_url = f"{base_url}/job/{job_id}/result/{result_type}"

	# check for the result until its ready
	while True:
	response = requests.get(result_url, headers=headers)
	if response.status_code == 200:
	break

	time.sleep(1)

	# download the result
	result = response.json()
	output = result[result_type]

	return output


	def get_paged_text(text: str, separator: str = "\n---\n") -> List[str]:
	"""Split each document into page node, by separator."""
	pages = []
	doc_chunks = text.split(separator)
	for doc_chunk in doc_chunks:
	if doc_chunk:
	pages.append(doc_chunk)

	return pages


	def auth_user(username, password):
	return os.environ["USERNAME"] == username and os.environ["PASSWORD"] == password


	def convert_to_docs(
	docs_str: List[str], metadata: Dict[str, Union[str, int, float]]
	) -> List[Document]:
	docs = []
	for doc in docs_str:
	docs.append(Document(page_content=doc, metadata=metadata.copy()))
	return docs