"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "display(retrieved_examples[\"image\"][0])"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "machine_shape": "hm",
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "name": "python"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/src/notebooks/rag_evaluation.qmd b/src/notebooks/rag_evaluation.qmd
deleted file mode 100644
index 2c29f702f273046fba8fee0d10aebb2480f92d5f..0000000000000000000000000000000000000000
--- a/src/notebooks/rag_evaluation.qmd
+++ /dev/null
@@ -1,786 +0,0 @@
----
-title: RAG Evaluation
-jupyter: python3
-eval: false
----
-
-```{python}
-!pip install -q torch transformers transformers langchain sentence-transformers faiss-gpu openpyxl openai
-```
-
-```{python}
-%reload_ext autoreload
-%autoreload 2
-%reload_ext dotenv
-%dotenv
-```
-
-```{python}
-from tqdm.notebook import tqdm
-import pandas as pd
-from typing import Optional, List, Tuple
-from langchain_core.language_models import BaseChatModel
-import json
-import datasets
-
-pd.set_option("display.max_colwidth", None)
-```
-
-### Load your knowledge base
-
-```{python}
-ds = datasets.load_dataset("m-ric/huggingface_doc", split="train")
-```
-
-# 1. Build a synthetic dataset for evaluation
-We first build a synthetic dataset of questions and associated contexts. The method is to get elements from our knowledge base, and ask an LLM to generate questions based on these documents.
-
-Then we setup other LLM agents to act as quality filters for the generated QA couples: each of them will act as the filter for a specific flaw.
-
-### 1.1. Prepare source documents
-
-```{python}
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.docstore.document import Document as LangchainDocument
-
-langchain_docs = [
- LangchainDocument(page_content=doc["text"], metadata={"source": doc["source"]})
- for doc in tqdm(ds)
-]
-
-
-text_splitter = RecursiveCharacterTextSplitter(
- chunk_size=2000,
- chunk_overlap=200,
- add_start_index=True,
- separators=["\n\n", "\n", ".", " ", ""],
-)
-
-docs_processed = []
-for doc in langchain_docs:
- docs_processed += text_splitter.split_documents([doc])
-```
-
-### 1.2. Setup agents for question generation
-
-We use [Mixtral](https://huggingface.co./mistralai/Mixtral-8x7B-Instruct-v0.1) for QA couple generation because it it has excellent performance in leaderboards such as [Chatbot Arena](https://huggingface.co./spaces/lmsys/chatbot-arena-leaderboard).
-
-```{python}
-from langchain_community.llms import HuggingFaceHub
-
-repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-
-llm = HuggingFaceHub(
- repo_id=repo_id,
- task="text-generation",
- model_kwargs={
- "max_new_tokens": 512,
- "top_k": 30,
- "temperature": 0.1,
- "repetition_penalty": 1.03,
- },
-)
-```
-
-```{python}
-from langchain_community.chat_models import ChatHuggingFace
-
-chat_model = ChatHuggingFace(llm=llm)
-```
-
-```{python}
-from langchain.prompts import ChatPromptTemplate
-
-QA_generation_prompt = """
-Your task is to write a factoid question and an answer given a context.
-Your factoid question should be answerable with a specific, concise piece of factual information from the context.
-Your factoid question should be formulated in the same style as questions users could ask in a search engine.
-This means that your factoid question MUST NOT mention something like "according to the passage" or "context".
-
-Provide your answer as follows:
-
-Output:::
-Factoid question: (your factoid question)
-Answer: (your answer to the factoid question)
-
-Now here is the context.
-
-Context: {context}\n
-Output:::"""
-
-QA_generation_prompt = ChatPromptTemplate.from_template(QA_generation_prompt)
-QA_generation_agent = QA_generation_prompt | chat_model
-```
-
-Now let's generate our QA couples.
-For this example, we generate only 10 QA couples and will load the rest from the Hub.
-
-But for your specific knowledge base, given that you want to get at least ~100 test samples, and accounting for the fact that we will filter out around half of these with our critique agents later on, you should generate much more, in the >200 samples.
-
-```{python}
-import random
-
-N_GENERATIONS = (
- 10 # We intentionally generate only 10 QA couples here for cost and time considerations
-)
-
-print(f"Generating {N_GENERATIONS} QA couples...")
-outputs = []
-for context in tqdm(random.sample(langchain_docs, N_GENERATIONS)):
- # Generate QA couple
- output_QA_couple = QA_generation_agent.invoke({"context": context.page_content}).content
- try:
- question = output_QA_couple.split("Factoid question: ")[1].split("Answer: ")[0]
- answer = output_QA_couple.split("Answer: ")[1]
- outputs.append(
- {
- "context": context.page_content,
- "question": question,
- "answer": answer,
- "source_doc": context.metadata["source"],
- }
- )
- except:
- continue
-```
-
-```{python}
-display(pd.DataFrame(outputs).head(1))
-```
-
-### 1.3. Setup critique agents
-
-The questions generated by the previous agent can have many flaws: we should do a quality check before validating these questions.
-
-We thus build critique agents that will rate each question on several criteria, given in [this paper](https://huggingface.co./papers/2312.10003):
-- **Groundedness:** can the question be answered from the given context?
-- **Relevance:** is the question relevant to users? For instance, `"What is the date when transformers 4.29.1 was released?"` is not relevant for ML practicioners.
-
-One last failure case we've noticed is when a function is tailored for the particular setting where the question was generated, but undecipherable by itself, like `"What is the name of the function used in this guide?"`.
-We also build a critique agent for this criteria:
-- **Stand-alone**: is the question understandable free of any context, for someone with domain knowledge/Internet access? The opposite of this would be `What is the function used in this article?` for a question generated from a specific blog article.
-
-We systematically score functions with all these agents, and whenever the score is too low for any one of the agents, we eliminate the question from our eval dataset.
-
-💡 ___When asking the agents to output a score, we first ask them to produce its rationale. This will help us verify scores, but most importantly, asking it to first output rationale gives the model more tokens to think and elaborate an answer before summarizing it into a single score token.___
-
-We now build and run these critique agents.
-
-```{python}
-question_groundedness_critique_prompt = """
-You will be given a context and a question.
-Your task is to provide a 'total rating' scoring how well one can answer the given question unambiguously with the given context.
-Give your answer on a scale of 1 to 5, where 1 means that the question is not answerable at all given the context, and 5 means that the question is clearly and unambiguously answerable with the context.
-
-Provide your answer as follows:
-
-Answer:::
-Evaluation: (your rationale for the rating)
-Total rating: (your rating)
-
-Now here are the question and context.
-
-Question: {question}\n
-Context: {context}\n
-Answer::: """
-
-question_relevance_critique_prompt = """
-You will be given a question.
-Your task is to provide a 'total rating' representing how useful this question can be to machine learning developers building NLP applications with the Hugging Face ecosystem.
-Give your answer on a scale of 1 to 5, where 1 means that the question is not useful at all, and 5 means that the question is extremely useful.
-
-Provide your answer as follows:
-
-Answer:::
-Evaluation: (your rationale for the rating)
-Total rating: (your rating)
-
-Now here is the question.
-
-Question: {question}\n
-Answer::: """
-
-question_standalone_critique_prompt = """
-You will be given a question.
-Your task is to provide a 'total rating' representing how context-independant this question is.
-Give your answer on a scale of 1 to 5, where 1 means that the question only makes sense in a specific context, and 5 means that the question makes sense by itself.
-For instance, if the question refers to a particular setting, like 'in the context' or 'in the document', the rating must be 1.
-The questions can contain obscure technical nouns or acronyms like Gradio, Hub, Hugging Face or Space and still be a 5: it must simply be clear to an operator with access to documentation what the question is about.
-
-Provide your answer as follows:
-
-Answer:::
-Evaluation: (your rationale for the rating)
-Total rating: (your rating)
-
-Now here is the question.
-
-Question: {question}\n
-Answer::: """
-
-question_groundedness_critique_prompt = ChatPromptTemplate.from_template(
- question_groundedness_critique_prompt
-)
-question_groundedness_critique_agent = question_groundedness_critique_prompt | chat_model
-
-question_relevance_critique_prompt = ChatPromptTemplate.from_template(
- question_relevance_critique_prompt
-)
-question_relevance_critique_agent = question_relevance_critique_prompt | chat_model
-
-question_standalone_critique_prompt = ChatPromptTemplate.from_template(
- question_standalone_critique_prompt
-)
-question_standalone_critique_agent = question_standalone_critique_prompt | chat_model
-```
-
-```{python}
-print("Generating critique for each QA couple...")
-for output in tqdm(outputs):
- # Critique the generated QA couple
- question_groundedness_evaluation = question_groundedness_critique_agent.invoke(
- {"context": output["context"], "question": output["question"]}
- ).content
- question_relevance_evaluation = question_relevance_critique_agent.invoke(
- {"question": output["question"]}
- ).content
- question_standalone_evaluation = question_standalone_critique_agent.invoke(
- {"question": output["question"]}
- ).content
-
- try:
- groundedness_score = int(question_groundedness_evaluation.split("Total rating: ")[1][0])
- groundedness_eval = question_groundedness_evaluation.split("Total rating: ")[0].split(
- "Evaluation: "
- )[1]
- relevance_score = int(question_relevance_evaluation.split("Total rating: ")[1][0])
- relevance_eval = question_relevance_evaluation.split("Total rating: ")[0].split(
- "Evaluation: "
- )[1]
- standalone_score = int(question_standalone_evaluation.split("Total rating: ")[1][0])
- standalone_eval = question_standalone_evaluation.split("Total rating: ")[0].split(
- "Evaluation: "
- )[1]
- output.update(
- {
- "groundedness_score": groundedness_score,
- "groundedness_eval": groundedness_eval,
- "relevance_score": relevance_score,
- "relevance_eval": relevance_eval,
- "standalone_score": standalone_score,
- "standalone_eval": standalone_eval,
- }
- )
- except:
- continue
-```
-
-Now let us filter out bad questions based on our critique agent scores:
-
-```{python}
-import pandas as pd
-
-pd.set_option("display.max_colwidth", None)
-
-generated_questions = pd.DataFrame.from_dict(outputs)
-
-print("Evaluation dataset before filtering:")
-display(
- generated_questions[
- ["question", "answer", "groundedness_score", "relevance_score", "standalone_score"]
- ]
-)
-generated_questions = generated_questions.loc[
- (generated_questions["groundedness_score"] >= 4)
- & (generated_questions["relevance_score"] >= 4)
- & (generated_questions["standalone_score"] >= 4)
-]
-print("============================================")
-print("Final evaluation dataset:")
-display(
- generated_questions[
- ["question", "answer", "groundedness_score", "relevance_score", "standalone_score"]
- ]
-)
-
-eval_dataset = datasets.Dataset.from_pandas(
- generated_questions, split="train", preserve_index=False
-)
-```
-
-Now our synthetic evaluation dataset is complete! We can evaluate different RAG systems on this evaluation dataset.
-
-We have generated only a few QA couples here to reduce time and cost. But let's kick start the next part by loading a pre-generated dataset:
-
-```{python}
-eval_dataset = datasets.load_dataset("m-ric/huggingface_doc_qa_eval", split="train")
-```
-
-# 2. Build our RAG System
-
-### 2.1. Preprocessing documents to build our vector database
-
-- In this part, __we split the documents from our knowledge base into smaller chunks__: these will be the snippets that are picked by the Retriever, to then be ingested by the Reader LLM as supporting elements for its answer.
-- The goal is to build semantically relevant snippets: not too small to be sufficient for supporting an answer, and not too large too avoid diluting individual ideas.
-
-Many options exist for text splitting:
-- split every `n` words / characters, but this has the risk of cutting in half paragraphs or even sentences
-- split after `n` words / character, but only on sentence boundaries
-- **recursive split** tries to preserve even more of the document structure, by processing it tree-like way, splitting first on the largest units (chapters) then recursively splitting on smaller units (paragraphs, sentences).
-
-To learn more about chunking, I recommend you read [this great notebook](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb) by Greg Kamradt.
-
-[This space](https://huggingface.co./spaces/m-ric/chunk_visualizer) lets you visualize how different splitting options affect the chunks you get.
-
-> In the following, we use Langchain's `RecursiveCharacterTextSplitter`.
-
-💡 _To measure chunk length in our Text Splitter, our length function will not be the count of characters, but the count of tokens in the tokenized text: indeed, for subsequent embedder that processes token, measuring length in tokens is more relevant and empirically performs better._
-
-```{python}
-from langchain.docstore.document import Document as LangchainDocument
-
-RAW_KNOWLEDGE_BASE = [
- LangchainDocument(page_content=doc["text"], metadata={"source": doc["source"]})
- for doc in tqdm(ds)
-]
-```
-
-```{python}
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from transformers import AutoTokenizer
-
-
-def split_documents(
- chunk_size: int,
- knowledge_base: List[LangchainDocument],
- tokenizer_name: str,
-) -> List[LangchainDocument]:
- """
- Split documents into chunks of size `chunk_size` characters and return a list of documents.
- """
- text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
- AutoTokenizer.from_pretrained(tokenizer_name),
- chunk_size=chunk_size,
- chunk_overlap=int(chunk_size / 10),
- add_start_index=True,
- strip_whitespace=True,
- separators=["\n\n", "\n", ".", " ", ""],
- )
-
- docs_processed = []
- for doc in knowledge_base:
- docs_processed += text_splitter.split_documents([doc])
-
- # Remove duplicates
- unique_texts = {}
- docs_processed_unique = []
- for doc in docs_processed:
- if doc.page_content not in unique_texts:
- unique_texts[doc.page_content] = True
- docs_processed_unique.append(doc)
-
- return docs_processed_unique
-```
-
-### 2.2. Retriever - embeddings 🗂️
-The __retriever acts like an internal search engine__: given the user query, it returns the most relevant documents from your knowledge base.
-
-> For the knowledge base, we use Langchain vector databases since __it offers a convenient [FAISS](https://github.com/facebookresearch/faiss) index and allows us to keep document metadata throughout the processing__.
-
-🛠️ __Options included:__
-
-- Tune the chunking method:
- - Size of the chunks
- - Method: split on different separators, use [semantic chunking](https://python.langchain.com/docs/modules/data_connection/document_transformers/semantic-chunker)...
-- Change the embedding model
-
-```{python}
-from langchain.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores.utils import DistanceStrategy
-import os
-
-
-def load_embeddings(
- langchain_docs: List[LangchainDocument],
- chunk_size: int,
- embedding_model_name: Optional[str] = "thenlper/gte-small",
-) -> FAISS:
- """
- Creates a FAISS index from the given embedding model and documents. Loads the index directly if it already exists.
-
- Args:
- langchain_docs: list of documents
- chunk_size: size of the chunks to split the documents into
- embedding_model_name: name of the embedding model to use
-
- Returns:
- FAISS index
- """
- # load embedding_model
- embedding_model = HuggingFaceEmbeddings(
- model_name=embedding_model_name,
- multi_process=True,
- model_kwargs={"device": "cuda"},
- encode_kwargs={"normalize_embeddings": True}, # set True to compute cosine similarity
- )
-
- # Check if embeddings already exist on disk
- index_name = f"index_chunk:{chunk_size}_embeddings:{embedding_model_name.replace('/', '~')}"
- index_folder_path = f"./data/indexes/{index_name}/"
- if os.path.isdir(index_folder_path):
- return FAISS.load_local(
- index_folder_path,
- embedding_model,
- distance_strategy=DistanceStrategy.COSINE,
- )
-
- else:
- print("Index not found, generating it...")
- docs_processed = split_documents(
- chunk_size,
- langchain_docs,
- embedding_model_name,
- )
- knowledge_index = FAISS.from_documents(
- docs_processed, embedding_model, distance_strategy=DistanceStrategy.COSINE
- )
- knowledge_index.save_local(index_folder_path)
- return knowledge_index
-```
-
-### 2.3. Reader - LLM 💬
-
-In this part, the __LLM Reader reads the retrieved documents to formulate its answer.__
-
-🛠️ Here we tried the following options to improve results:
-- Switch reranking on/off
-- Change the reader model
-
-```{python}
-RAG_PROMPT_TEMPLATE = """
-<|system|>
-Using the information contained in the context,
-give a comprehensive answer to the question.
-Respond only to the question asked, response should be concise and relevant to the question.
-Provide the number of the source document when relevant.
-If the answer cannot be deduced from the context, do not give an answer.
-<|user|>
-Context:
-{context}
----
-Now here is the question you need to answer.
-
-Question: {question}
-
-<|assistant|>
-"""
-```
-
-```{python}
-from langchain_community.llms import HuggingFaceHub
-
-repo_id = "HuggingFaceH4/zephyr-7b-beta"
-READER_MODEL_NAME = "zephyr-7b-beta"
-
-READER_LLM = HuggingFaceHub(
- repo_id=repo_id,
- task="text-generation",
- model_kwargs={
- "max_new_tokens": 512,
- "top_k": 30,
- "temperature": 0.1,
- "repetition_penalty": 1.03,
- },
-)
-```
-
-```{python}
-from ragatouille import RAGPretrainedModel
-from langchain_core.vectorstores import VectorStore
-from langchain_core.language_models.llms import LLM
-
-
-def answer_with_rag(
- question: str,
- llm: LLM,
- knowledge_index: VectorStore,
- reranker: Optional[RAGPretrainedModel] = None,
- num_retrieved_docs: int = 30,
- num_docs_final: int = 7,
-) -> Tuple[str, List[LangchainDocument]]:
- """Answer a question using RAG with the given knowledge index."""
- # Gather documents with retriever
- relevant_docs = knowledge_index.similarity_search(query=question, k=num_retrieved_docs)
- relevant_docs = [doc.page_content for doc in relevant_docs] # keep only the text
-
- # Optionally rerank results
- if reranker:
- relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)
- relevant_docs = [doc["content"] for doc in relevant_docs]
-
- relevant_docs = relevant_docs[:num_docs_final]
-
- # Build the final prompt
- context = "\nExtracted documents:\n"
- context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
-
- final_prompt = RAG_PROMPT_TEMPLATE.format(question=question, context=context)
-
- # Redact an answer
- answer = llm(final_prompt)
-
- return answer, relevant_docs
-```
-
-# 3. Benchmarking the RAG system
-
-The RAG system and the evaluation datasets are now ready. The last step is to judge the RAG system's output on this evlauation dataset.
-
-To this end, __we setup a judge agent__. ⚖️🤖
-
-Out of [the different RAG evaluation metrics](https://docs.ragas.io/en/latest/concepts/metrics/index.html), we choose to focus only on faithfulness since it the best end-to-end metric of our system's performance.
-
-> We use GPT4 as a judge for its empirically good performance, but you could try with other models such as [kaist-ai/prometheus-13b-v1.0](https://huggingface.co./kaist-ai/prometheus-13b-v1.0) or [BAAI/JudgeLM-33B-v1.0](https://huggingface.co./BAAI/JudgeLM-33B-v1.0).
-
-💡 _In the evaluation prompt, we give a detailed description each metric on the scale 1-5, as is done in [Prometheus's prompt template](https://huggingface.co./kaist-ai/prometheus-13b-v1.0): this helps the model ground its metric precisely. If instead you give the judge LLM a vague scale to work with, the outputs will not be consistent enough between different examples._
-
-💡 _Again, prompting the LLM to output rationale before giving its final score gives it more tokens to help it formalize and elaborate a judgement._
-
-```{python}
-def run_rag_tests(
- eval_dataset: datasets.Dataset,
- llm: BaseChatModel,
- knowledge_index: VectorStore,
- output_file: str,
- reranker: Optional[RAGPretrainedModel] = None,
- verbose: Optional[bool] = True,
- test_settings: Optional[str] = None, # To document the test settings used
-):
- """Runs RAG tests on the given dataset and saves the results to the given output file."""
- try: # load previous generations if they exist
- with open(output_file, "r") as f:
- outputs = json.load(f)
- except:
- outputs = []
-
- for example in tqdm(eval_dataset):
- question = example["question"]
- if question in [output["question"] for output in outputs]:
- continue
-
- answer, relevant_docs = answer_with_rag(question, llm, knowledge_index, reranker=reranker)
- if verbose:
- print("=======================================================")
- print(f"Question: {question}")
- print(f"Answer: {answer}")
- print(f'True answer: {example["answer"]}')
- result = {
- "question": question,
- "true_answer": example["answer"],
- "source_doc": example["source_doc"],
- "generated_answer": answer,
- "retrieved_docs": [doc for doc in relevant_docs],
- }
- if test_settings:
- result["test_settings"] = test_settings
- outputs.append(result)
-
- with open(output_file, "w") as f:
- json.dump(outputs, f)
-```
-
-```{python}
-EVALUATION_PROMPT = """###Task Description:
-An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given.
-1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general.
-2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric.
-3. The output format should look as follows: \"Feedback: {{write a feedback for criteria}} [RESULT] {{an integer number between 1 and 5}}\"
-4. Please do not generate any other opening, closing, and explanations. Be sure to include [RESULT] in your output.
-
-###The instruction to evaluate:
-{instruction}
-
-###Response to evaluate:
-{response}
-
-###Reference Answer (Score 5):
-{reference_answer}
-
-###Score Rubrics:
-[Is the response correct, accurate, and factual based on the reference answer?]
-Score 1: The response is completely incorrect, inaccurate, and/or not factual.
-Score 2: The response is mostly incorrect, inaccurate, and/or not factual.
-Score 3: The response is somewhat correct, accurate, and/or factual.
-Score 4: The response is mostly correct, accurate, and factual.
-Score 5: The response is completely correct, accurate, and factual.
-
-###Feedback:"""
-
-from langchain.prompts.chat import (
- ChatPromptTemplate,
- HumanMessagePromptTemplate,
-)
-from langchain.schema import SystemMessage
-
-
-evaluation_prompt_template = ChatPromptTemplate.from_messages(
- [
- SystemMessage(content="You are a fair evaluator language model."),
- HumanMessagePromptTemplate.from_template(EVALUATION_PROMPT),
- ]
-)
-```
-
-```{python}
-from langchain.chat_models import ChatOpenAI
-
-eval_chat_model = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
-evaluator_name = "GPT4"
-
-
-def evaluate_answers(
- answer_path: str,
- eval_chat_model: BaseChatModel,
- evaluator_name: str,
- evaluation_prompt_template: ChatPromptTemplate,
-) -> None:
- """Evaluates generated answers. Modifies the given answer file in place for better checkpointing."""
- answers = []
- if os.path.isfile(answer_path): # load previous generations if they exist
- answers = json.load(open(answer_path, "r"))
-
- for experiment in tqdm(answers):
- if f"eval_score_{evaluator_name}" in experiment:
- continue
-
- eval_prompt = evaluation_prompt_template.format_messages(
- instruction=experiment["question"],
- response=experiment["generated_answer"],
- reference_answer=experiment["true_answer"],
- )
- eval_result = eval_chat_model.invoke(eval_prompt)
- feedback, score = [item.strip() for item in eval_result.content.split("[RESULT]")]
- experiment[f"eval_score_{evaluator_name}"] = score
- experiment[f"eval_feedback_{evaluator_name}"] = feedback
-
- with open(answer_path, "w") as f:
- json.dump(answers, f)
-```
-
-🚀 Let's run the tests and evaluate answers!👇
-
-```{python}
-if not os.path.exists("./output"):
- os.mkdir("./output")
-
-for chunk_size in [200]: # Add other chunk sizes (in tokens) as needed
- for embeddings in ["thenlper/gte-small"]: # Add other embeddings as needed
- for rerank in [True, False]:
- settings_name = f"chunk:{chunk_size}_embeddings:{embeddings.replace('/', '~')}_rerank:{rerank}_reader-model:{READER_MODEL_NAME}"
- output_file_name = f"./output/rag_{settings_name}.json"
-
- print(f"Running evaluation for {settings_name}:")
-
- print("Loading knowledge base embeddings...")
- knowledge_index = load_embeddings(
- RAW_KNOWLEDGE_BASE,
- chunk_size=chunk_size,
- embedding_model_name=embeddings,
- )
-
- print("Running RAG...")
- reranker = (
- RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") if rerank else None
- )
- run_rag_tests(
- eval_dataset=eval_dataset,
- llm=READER_LLM,
- knowledge_index=knowledge_index,
- output_file=output_file_name,
- reranker=reranker,
- verbose=False,
- test_settings=settings_name,
- )
-
- print("Running evaluation...")
- evaluate_answers(
- output_file_name,
- eval_chat_model,
- evaluator_name,
- evaluation_prompt_template,
- )
-```
-
-### Inspect results
-
-```{python}
-import glob
-
-outputs = []
-for file in glob.glob("./output/*.json"):
- output = pd.DataFrame(json.load(open(file, "r")))
- output["settings"] = file
- outputs.append(output)
-result = pd.concat(outputs)
-```
-
-```{python}
-result["eval_score_GPT4"] = result["eval_score_GPT4"].apply(
- lambda x: int(x) if isinstance(x, str) else 1
-)
-result["eval_score_GPT4"] = (result["eval_score_GPT4"] - 1) / 4
-```
-
-```{python}
-average_scores = result.groupby("settings")["eval_score_GPT4"].mean()
-average_scores.sort_values()
-```
-
-## Example results
-
-Let us load the results that I obtained by tweaking the different options available in this notebook.
-For more detail on why these options could work on not, see the notebook on [advanced_RAG](advanced_rag).
-
-As you can see in the graph below, some tweaks do not bring any improvement, some give huge performance boosts.
-
-➡️ ___There is no single good recipe: you should try several different directions when tuning your RAG systems.___
-
-```{python}
-import plotly.express as px
-
-scores = datasets.load_dataset("m-ric/rag_scores_cookbook", split="train")
-scores = pd.Series(scores["score"], index=scores["settings"])
-```
-
-```{python}
-fig = px.bar(
- scores,
- color=scores,
- labels={
- "value": "Accuracy",
- "settings": "Configuration",
- },
- color_continuous_scale="bluered",
-)
-fig.update_layout(w
- width=1000,
- height=600,
- barmode="group",
- yaxis_range=[0, 100],
- title="Accuracy of different RAG configurations",
- xaxis_title="RAG settings",
- font=dict(size=15),
-)
-fig.layout.yaxis.ticksuffix = "%"
-fig.update_coloraxes(showscale=False)
-fig.update_traces(texttemplate="%{y:.1f}", textposition="outside")
-fig.show()
-```
-
-
-
-As you can see, these had varying impact on performance. In particular, tuning the chunk size is both easy and very impactful.
-
-But this is our case: your results could be very different: now that you have a robust evaluation pipeline, you can set on to explore other options! 🗺️
-
diff --git a/src/notebooks/rag_zephyr_langchain.qmd b/src/notebooks/rag_zephyr_langchain.qmd
deleted file mode 100644
index 8db9bf70750043f834b3a9c18391ed1189889c27..0000000000000000000000000000000000000000
--- a/src/notebooks/rag_zephyr_langchain.qmd
+++ /dev/null
@@ -1,232 +0,0 @@
----
-title: Simple RAG
-jupyter: python3
-eval: false
-code-annotations: hover
-
----
-
-```{python}
-!pip install -q torch transformers accelerate bitsandbytes transformers sentence-transformers faiss-gpu
-```
-
-```{python}
-!pip install -q langchain
-```
-
-::: callout-note
-If running in Google Colab, you may need to run this cell to make sure you're using UTF-8 locale to install LangChain
-```{python}
-import locale
-locale.getpreferredencoding = lambda: "UTF-8"
-```
-:::
-
-
-## Prepare the data
-
-In this example, we'll load all of the issues (both open and closed) from [PEFT library's repo](https://github.com/huggingface/peft).
-
-First, you need to acquire a [GitHub personal access token](https://github.com/settings/tokens?type=beta) to access the GitHub API.
-
-```{python}
-from getpass import getpass
-
-ACCESS_TOKEN = getpass("YOUR_GITHUB_PERSONAL_TOKEN") # <1>
-```
-1. You can also use an environment variable to store your token.
-
-Next, we'll load all of the issues in the [huggingface/peft](https://github.com/huggingface/peft) repo:
-- By default, pull requests are considered issues as well, here we chose to exclude them from data with by setting `include_prs=False`
-- Setting `state = "all"` means we will load both open and closed issues.
-
-```{python}
-from langchain.document_loaders import GitHubIssuesLoader
-
-loader = GitHubIssuesLoader(
- repo="huggingface/peft",
- access_token=ACCESS_TOKEN,
- include_prs=False,
- state="all"
-)
-
-docs = loader.load()
-```
-
-The content of individual GitHub issues may be longer than what an embedding model can take as input. If we want to embed all of the available content, we need to chunk the documents into appropriately sized pieces.
-
-The most common and straightforward approach to chunking is to define a fixed size of chunks and whether there should be any overlap between them. Keeping some overlap between chunks allows us to preserve some semantic context between the chunks.
-
-Other approaches are typically more involved and take into account the documents' structure and context. For example, one may want to split a document based on sentences or paragraphs, or create chunks based on the
-
-The fixed-size chunking, however, works well for most common cases, so that is what we'll do here.
-
-```{python}
-from langchain.text_splitter import CharacterTextSplitter
-
-splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=30)
-
-chunked_docs = splitter.split_documents(docs)
-```
-
-## Create the embeddings + retriever
-
-Now that the docs are all of the appropriate size, we can create a database with their embeddings.
-
-To create document chunk embeddings we'll use the `HuggingFaceEmbeddings` and the [`BAAI/bge-base-en-v1.5`](https://huggingface.co./BAAI/bge-base-en-v1.5) embeddings model. To create the vector database, we'll use `FAISS`, a library developed by Facebook AI. This library offers efficient similarity search and clustering of dense vectors, which is what we need here. FAISS is currently one of the most used libraries for NN search in massive datasets.
-
-::: callout-tip
-There are many other embeddings models available on the Hub, and you can keep an eye on the best performing ones by checking the [Massive Text Embedding Benchmark (MTEB) Leaderboard](https://huggingface.co./spaces/mteb/leaderboard).
-:::
-
-We'll access both the embeddings model and FAISS via LangChain API.
-
-```{python}
-from langchain.vectorstores import FAISS
-from langchain.embeddings import HuggingFaceEmbeddings
-
-db = FAISS.from_documents(chunked_docs,
- HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))
-```
-
-We need a way to return(retrieve) the documents given an unstructured query. For that, we'll use the `as_retriever` method using the `db` as a backbone:
-- `search_type="similarity"` means we want to perform similarity search between the query and documents
-- `search_kwargs={'k': 4}` instructs the retriever to return top 4 results.
-
-```{python}
-retriever = db.as_retriever(
- search_type="similarity", # <1>
- search_kwargs={'k': 4} # <1>
-)
-```
-1. The ideal search type is context dependent, and you should experiment to find the best one for your data.
-
-The vector database and retriever are now set up, next we need to set up the next piece of the chain - the model.
-
-## Load quantized model
-
-For this example, we chose [`HuggingFaceH4/zephyr-7b-beta`](https://huggingface.co./HuggingFaceH4/zephyr-7b-beta), a small but powerful model.
-To make inference faster, we will load the quantized version of the model:
-
-:::::: {.callout-tip}
-With many models being released every week, you may want to substitute this model to the latest and greatest. The best way to keep track of open source LLMs is to check the [Open-source LLM leaderboard](https://huggingface.co./spaces/HuggingFaceH4/open_llm_leaderboard).
-:::
-
-```{python}
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-
-model_name = 'HuggingFaceH4/zephyr-7b-beta'
-
-bnb_config = BitsAndBytesConfig(
- load_in_4bit=True,
- bnb_4bit_use_double_quant=True,
- bnb_4bit_quant_type="nf4",
- bnb_4bit_compute_dtype=torch.bfloat16
-)
-
-model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-```
-
-## Setup the LLM chain
-
-Finally, we have all the pieces we need to set up the LLM chain.
-
-First, create a text_generation pipeline using the loaded model and its tokenizer.
-
-Next, create a prompt template - this should follow the format of the model, so if you substitute the model checkpoint, make sure to use the appropriate formatting.
-
-```{python}
-from langchain.llms import HuggingFacePipeline
-from langchain.prompts import PromptTemplate
-from transformers import pipeline
-from langchain_core.output_parsers import StrOutputParser
-
-text_generation_pipeline = pipeline(
- model=model, # <1>
- tokenizer=tokenizer, # <2>
- task="text-generation", # <3>
- temperature=0.2, # <4>
- do_sample=True, # <5>
- repetition_penalty=1.1, # <6>
- return_full_text=True, # <7>
- max_new_tokens=400, # <8>
-)
-
-llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
-
-prompt_template = """
-<|system|>
-Answer the question based on your knowledge. Use the following context to help:
-
-{context}
-
-
-<|user|>
-{question}
-
-<|assistant|>
-
- """
-
-prompt = PromptTemplate(
- input_variables=["context", "question"],
- template=prompt_template,
-)
-
-llm_chain = prompt | llm | StrOutputParser()
-```
-
-1. The pre-trained model for text generation.
-2. Tokenizer to preprocess input text and postprocess generated output.
-3. Specifies the task as text generation.
-4. Controls the randomness in the output generation. Lower values make the output more deterministic.
-5. Enables sampling to introduce randomness in the output generation.
-6. Penalizes repetition in the output to encourage diversity.
-7. Returns the full generated text including the input prompt.
-8. Limits the maximum number of new tokens generated.
-
-Note: _You can also use `tokenizer.apply_chat_template` to convert a list of messages (as dicts: `{'role': 'user', 'content': '(...)'}`) into a string with the appropriate chat format._
-
-
-Finally, we need to combine the `llm_chain` with the retriever to create a RAG chain. We pass the original question through to the final generation step, as well as the retrieved context docs:
-
-```{python}
-from langchain_core.runnables import RunnablePassthrough
-
-retriever = db.as_retriever()
-
-rag_chain = (
- {"context": retriever, "question": RunnablePassthrough()}
- | llm_chain
-)
-```
-
-## Compare the results
-
-Let's see the difference RAG makes in generating answers to the library-specific questions.
-
-```{python}
-question = "How do you combine multiple adapters?"
-```
-
-First, let's see what kind of answer we can get with just the model itself, no context added:
-
-```{python}
-#| colab: {base_uri: 'https://localhost:8080/', height: 125}
-llm_chain.invoke({"context":"", "question": question})
-```
-
-As you can see, the model interpreted the question as one about physical computer adapters, while in the context of PEFT, "adapters" refer to LoRA adapters.
-Let's see if adding context from GitHub issues helps the model give a more relevant answer:
-
-```{python}
-#| colab: {base_uri: 'https://localhost:8080/', height: 125}
-rag_chain.invoke(question)
-```
-
-As we can see, the added context, really helps the exact same model, provide a much more relevant and informed answer to the library-specific question.
-
-Notably, combining multiple adapters for inference has been added to the library, and one can find this information in the documentation, so for the next iteration of this RAG it may be worth including documentation embeddings.
-
diff --git a/src/notebooks/single_gpu.ipynb b/src/notebooks/single_gpu.ipynb
deleted file mode 100644
index f59a9ad16e3388b316c89121ab19a4126a02e35a..0000000000000000000000000000000000000000
--- a/src/notebooks/single_gpu.ipynb
+++ /dev/null
@@ -1,1129 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "FNdZ-kD0l78P"
- },
- "source": [
- "---\n",
- "title: Single GPU Fine-tuning\n",
- "---\n",
- "\n",
- "# Fine-tuning a Code LLM on Custom Code on a single GPU\n",
- "\n",
- "_Authored by: [Maria Khalusova](https://github.com/MKhalusova)_\n",
- "\n",
- "Publicly available code LLMs such as Codex, StarCoder, and Code Llama are great at generating code that adheres to general programming principles and syntax, but they may not align with an organization's internal conventions, or be aware of proprietary libraries.\n",
- "\n",
- "In this notebook, we'll see show how you can fine-tune a code LLM on private code bases to enhance its contextual awareness and improve a model's usefulness to your organization's needs. Since the code LLMs are quite large, fine-tuning them in a traditional manner can be resource-draining. Worry not! We will show how you can optimize fine-tuning to fit on a single GPU.\n",
- "\n",
- "\n",
- "## Dataset\n",
- "\n",
- "For this example, we picked the top 10 Hugging Face public repositories on GitHub. We have excluded non-code files from the data, such as images, audio files, presentations, and so on. For Jupyter notebooks, we've kept only cells containing code. The resulting code is stored as a dataset that you can find on the Hugging Face Hub under [`smangrul/hf-stack-v1`](https://huggingface.co./datasets/smangrul/hf-stack-v1). It contains repo id, file path, and file content.\n",
- "\n",
- "\n",
- "## Model\n",
- "\n",
- "We'll finetune [`bigcode/starcoderbase-1b`](https://huggingface.co./bigcode/starcoderbase-1b), which is a 1B parameter model trained on 80+ programming languages. This is a gated model, so if you plan to run this notebook with this exact model, you'll need to gain access to it on the model's page. Log in to your Hugging Face account to do so:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "bPlCJYDK6vrF"
- },
- "outputs": [],
- "source": [
- "from huggingface_hub import notebook_login\n",
- "\n",
- "notebook_login()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "WMVe_c8q43Qo"
- },
- "source": [
- "To get started, let's install all the necessary libraries. As you can see, in addition to `transformers` and `datasets`, we'll be using `peft`, `bitsandbytes`, and `flash-attn` to optimize the training.\n",
- "\n",
- "By employing parameter-efficient training techniques, we can run this notebook on a single A100 High-RAM GPU."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Fp7i8WMCjKJG"
- },
- "outputs": [],
- "source": [
- "!pip install -q transformers datasets peft bitsandbytes flash-attn"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "16EdABzt3_Ig"
- },
- "source": [
- "Let's define some variables now. Feel free to play with these."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "hru3G-CLmqis"
- },
- "outputs": [],
- "source": [
- "MODEL=\"bigcode/starcoderbase-1b\" # Model checkpoint on the Hugging Face Hub\n",
- "DATASET=\"smangrul/hf-stack-v1\" # Dataset on the Hugging Face Hub\n",
- "DATA_COLUMN=\"content\" # Column name containing the code content\n",
- "\n",
- "SEQ_LENGTH=2048 # Sequence length\n",
- "\n",
- "# Training arguments\n",
- "MAX_STEPS=2000 # max_steps\n",
- "BATCH_SIZE=16 # batch_size\n",
- "GR_ACC_STEPS=1 # gradient_accumulation_steps\n",
- "LR=5e-4 # learning_rate\n",
- "LR_SCHEDULER_TYPE=\"cosine\" # lr_scheduler_type\n",
- "WEIGHT_DECAY=0.01 # weight_decay\n",
- "NUM_WARMUP_STEPS=30 # num_warmup_steps\n",
- "EVAL_FREQ=100 # eval_freq\n",
- "SAVE_FREQ=100 # save_freq\n",
- "LOG_FREQ=25 # log_freq\n",
- "OUTPUT_DIR=\"peft-starcoder-lora-a100\" # output_dir\n",
- "BF16=True # bf16\n",
- "FP16=False # no_fp16\n",
- "\n",
- "# FIM trasformations arguments\n",
- "FIM_RATE=0.5 # fim_rate\n",
- "FIM_SPM_RATE=0.5 # fim_spm_rate\n",
- "\n",
- "# LORA\n",
- "LORA_R=8 # lora_r\n",
- "LORA_ALPHA=32 # lora_alpha\n",
- "LORA_DROPOUT=0.0 # lora_dropout\n",
- "LORA_TARGET_MODULES=\"c_proj,c_attn,q_attn,c_fc,c_proj\" # lora_target_modules\n",
- "\n",
- "# bitsandbytes config\n",
- "USE_NESTED_QUANT=True # use_nested_quant\n",
- "BNB_4BIT_COMPUTE_DTYPE=\"bfloat16\"# bnb_4bit_compute_dtype\n",
- "\n",
- "SEED=0"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "FyZSXTbJrcnC"
- },
- "outputs": [],
- "source": [
- "from transformers import (\n",
- " AutoModelForCausalLM,\n",
- " AutoTokenizer,\n",
- " Trainer,\n",
- " TrainingArguments,\n",
- " logging,\n",
- " set_seed,\n",
- " BitsAndBytesConfig,\n",
- ")\n",
- "\n",
- "set_seed(SEED)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "pO7F5L5AtKo1"
- },
- "source": [
- "## Prepare the data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "1LmrIZqP0oUE"
- },
- "source": [
- "Begin by loading the data. As the dataset is likely to be quite large, make sure to enable the streaming mode. Streaming allows us to load the data progressively as we iterate over the dataset instead of downloading the whole dataset at once.\n",
- "\n",
- "We'll reserve the first 4000 examples as the validation set, and everything else will be the training data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "4oJZvZb-1J88"
- },
- "outputs": [],
- "source": [
- "from datasets import load_dataset\n",
- "import torch\n",
- "from tqdm import tqdm\n",
- "\n",
- "\n",
- "dataset = load_dataset(\n",
- " DATASET,\n",
- " data_dir=\"data\",\n",
- " split=\"train\",\n",
- " streaming=True,\n",
- ")\n",
- "\n",
- "valid_data = dataset.take(4000)\n",
- "train_data = dataset.skip(4000)\n",
- "train_data = train_data.shuffle(buffer_size=5000, seed=SEED)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "sLQ8t0LM2GR6"
- },
- "source": [
- "At this step, the dataset still contains raw data with code of arbitraty length. For training, we need inputs of fixed length. Let's create an Iterable dataset that would return constant-length chunks of tokens from a stream of text files.\n",
- "\n",
- "First, let's estimate the average number of characters per token in the dataset, which will help us later estimate the number of tokens in the text buffer later. By default, we'll only take 400 examples (`nb_examples`) from the dataset. Using only a subset of the entire dataset will reduce computational cost while still providing a reasonable estimate of the overall character-to-token ratio."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "KCiAvydztNsu",
- "outputId": "cabf7fd0-a922-4371-cbc6-60ee99ef7469"
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 400/400 [00:10<00:00, 39.87it/s] "
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The character to token ratio of the dataset is: 2.43\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)\n",
- "\n",
- "def chars_token_ratio(dataset, tokenizer, data_column, nb_examples=400):\n",
- " \"\"\"\n",
- " Estimate the average number of characters per token in the dataset.\n",
- " \"\"\"\n",
- "\n",
- " total_characters, total_tokens = 0, 0\n",
- " for _, example in tqdm(zip(range(nb_examples), iter(dataset)), total=nb_examples):\n",
- " total_characters += len(example[data_column])\n",
- " total_tokens += len(tokenizer(example[data_column]).tokens())\n",
- "\n",
- " return total_characters / total_tokens\n",
- "\n",
- "\n",
- "chars_per_token = chars_token_ratio(train_data, tokenizer, DATA_COLUMN)\n",
- "print(f\"The character to token ratio of the dataset is: {chars_per_token:.2f}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "6F13VGobB3Ma"
- },
- "source": [
- "The character-to-token ratio can also be used as an indicator of the quality of text tokenization. For instance, a character-to-token ratio of 1.0 would mean that each character is represented with a token, which is not very meaningful. This would indicate poor tokenization. In standard English text, one token is typically equivalent to approximately four characters, meaning the character-to-token ratio is around 4.0. We can expect a lower ratio in the code dataset, but generally speaking, a number between 2.0 and 3.5 can be considered good enough."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "rcwYFRPpwxea"
- },
- "source": [
- "**Optional FIM transformations**\n",
- "\n",
- "\n",
- "Autoregressive language models typically generate sequences from left to right. By applying the FIM transformations, the model can also learn to infill text. Check out [\"Efficient Training of Language Models to Fill in the Middle\" paper](https://arxiv.org/pdf/2207.14255.pdf) to learn more about the technique.\n",
- "We'll define the FIM transformations here and will use them when creating the Iterable Dataset. However, if you want to omit transformations, feel free to set `fim_rate` to 0."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "zmejYvEKw1E-"
- },
- "outputs": [],
- "source": [
- "import functools\n",
- "import numpy as np\n",
- "\n",
- "\n",
- "# Helper function to get token ids of the special tokens for prefix, suffix and middle for FIM transformations.\n",
- "@functools.lru_cache(maxsize=None)\n",
- "def get_fim_token_ids(tokenizer):\n",
- " try:\n",
- " FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD = tokenizer.special_tokens_map[\"additional_special_tokens\"][1:5]\n",
- " suffix_tok_id, prefix_tok_id, middle_tok_id, pad_tok_id = (\n",
- " tokenizer.vocab[tok] for tok in [FIM_SUFFIX, FIM_PREFIX, FIM_MIDDLE, FIM_PAD]\n",
- " )\n",
- " except KeyError:\n",
- " suffix_tok_id, prefix_tok_id, middle_tok_id, pad_tok_id = None, None, None, None\n",
- " return suffix_tok_id, prefix_tok_id, middle_tok_id, pad_tok_id\n",
- "\n",
- "\n",
- "## Adapted from https://github.com/bigcode-project/Megatron-LM/blob/6c4bf908df8fd86b4977f54bf5b8bd4b521003d1/megatron/data/gpt_dataset.py\n",
- "def permute(\n",
- " sample,\n",
- " np_rng,\n",
- " suffix_tok_id,\n",
- " prefix_tok_id,\n",
- " middle_tok_id,\n",
- " pad_tok_id,\n",
- " fim_rate=0.5,\n",
- " fim_spm_rate=0.5,\n",
- " truncate_or_pad=False,\n",
- "):\n",
- " \"\"\"\n",
- " Take in a sample (list of tokens) and perform a FIM transformation on it with a probability of fim_rate, using two FIM modes:\n",
- " PSM and SPM (with a probability of fim_spm_rate).\n",
- " \"\"\"\n",
- "\n",
- " # The if condition will trigger with the probability of fim_rate\n",
- " # This means FIM transformations will apply to samples with a probability of fim_rate\n",
- " if np_rng.binomial(1, fim_rate):\n",
- "\n",
- " # Split the sample into prefix, middle, and suffix, based on randomly generated indices stored in the boundaries list.\n",
- " boundaries = list(np_rng.randint(low=0, high=len(sample) + 1, size=2))\n",
- " boundaries.sort()\n",
- "\n",
- " prefix = np.array(sample[: boundaries[0]], dtype=np.int64)\n",
- " middle = np.array(sample[boundaries[0] : boundaries[1]], dtype=np.int64)\n",
- " suffix = np.array(sample[boundaries[1] :], dtype=np.int64)\n",
- "\n",
- " if truncate_or_pad:\n",
- " # calculate the new total length of the sample, taking into account tokens indicating prefix, middle, and suffix\n",
- " new_length = suffix.shape[0] + prefix.shape[0] + middle.shape[0] + 3\n",
- " diff = new_length - len(sample)\n",
- "\n",
- " # trancate or pad if there's a difference in length between the new length and the original\n",
- " if diff > 0:\n",
- " if suffix.shape[0] <= diff:\n",
- " return sample, np_rng\n",
- " suffix = suffix[: suffix.shape[0] - diff]\n",
- " elif diff < 0:\n",
- " suffix = np.concatenate([suffix, np.full((-1 * diff), pad_tok_id)])\n",
- "\n",
- " # With the probability of fim_spm_rateapply SPM variant of FIM transformations\n",
- " # SPM: suffix, prefix, middle\n",
- " if np_rng.binomial(1, fim_spm_rate):\n",
- " new_sample = np.concatenate(\n",
- " [\n",
- " [prefix_tok_id, suffix_tok_id],\n",
- " suffix,\n",
- " [middle_tok_id],\n",
- " prefix,\n",
- " middle,\n",
- " ]\n",
- " )\n",
- " # Otherwise, apply the PSM variant of FIM transformations\n",
- " # PSM: prefix, suffix, middle\n",
- " else:\n",
- "\n",
- " new_sample = np.concatenate(\n",
- " [\n",
- " [prefix_tok_id],\n",
- " prefix,\n",
- " [suffix_tok_id],\n",
- " suffix,\n",
- " [middle_tok_id],\n",
- " middle,\n",
- " ]\n",
- " )\n",
- " else:\n",
- " # don't apply FIM transformations\n",
- " new_sample = sample\n",
- "\n",
- " return list(new_sample), np_rng\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "AwW5FviD9xBH"
- },
- "source": [
- "Let's define the `ConstantLengthDataset`, an Iterable dataset that will return constant-length chunks of tokens. To do so, we'll read a buffer of text from the original dataset until we hit the size limits and then apply tokenizer to convert the raw text into tokenized inputs. Optionally, we'll perform FIM transformations on some sequences (the proportion of sequences affected is controlled by `fim_rate`).\n",
- "\n",
- "Once defined, we can create instances of the `ConstantLengthDataset` from both training and validation data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "AgDW-692wzOl"
- },
- "outputs": [],
- "source": [
- "from torch.utils.data import IterableDataset\n",
- "from torch.utils.data.dataloader import DataLoader\n",
- "import random\n",
- "\n",
- "# Create an Iterable dataset that returns constant-length chunks of tokens from a stream of text files.\n",
- "\n",
- "class ConstantLengthDataset(IterableDataset):\n",
- " \"\"\"\n",
- " Iterable dataset that returns constant length chunks of tokens from stream of text files.\n",
- " Args:\n",
- " tokenizer (Tokenizer): The processor used for proccessing the data.\n",
- " dataset (dataset.Dataset): Dataset with text files.\n",
- " infinite (bool): If True the iterator is reset after dataset reaches end else stops.\n",
- " seq_length (int): Length of token sequences to return.\n",
- " num_of_sequences (int): Number of token sequences to keep in buffer.\n",
- " chars_per_token (int): Number of characters per token used to estimate number of tokens in text buffer.\n",
- " fim_rate (float): Rate (0.0 to 1.0) that sample will be permuted with FIM.\n",
- " fim_spm_rate (float): Rate (0.0 to 1.0) of FIM permuations that will use SPM.\n",
- " seed (int): Seed for random number generator.\n",
- " \"\"\"\n",
- "\n",
- " def __init__(\n",
- " self,\n",
- " tokenizer,\n",
- " dataset,\n",
- " infinite=False,\n",
- " seq_length=1024,\n",
- " num_of_sequences=1024,\n",
- " chars_per_token=3.6,\n",
- " content_field=\"content\",\n",
- " fim_rate=0.5,\n",
- " fim_spm_rate=0.5,\n",
- " seed=0,\n",
- " ):\n",
- " self.tokenizer = tokenizer\n",
- " self.concat_token_id = tokenizer.eos_token_id\n",
- " self.dataset = dataset\n",
- " self.seq_length = seq_length\n",
- " self.infinite = infinite\n",
- " self.current_size = 0\n",
- " self.max_buffer_size = seq_length * chars_per_token * num_of_sequences\n",
- " self.content_field = content_field\n",
- " self.fim_rate = fim_rate\n",
- " self.fim_spm_rate = fim_spm_rate\n",
- " self.seed = seed\n",
- "\n",
- " (\n",
- " self.suffix_tok_id,\n",
- " self.prefix_tok_id,\n",
- " self.middle_tok_id,\n",
- " self.pad_tok_id,\n",
- " ) = get_fim_token_ids(self.tokenizer)\n",
- " if not self.suffix_tok_id and self.fim_rate > 0:\n",
- " print(\"FIM is not supported by tokenizer, disabling FIM\")\n",
- " self.fim_rate = 0\n",
- "\n",
- " def __iter__(self):\n",
- " iterator = iter(self.dataset)\n",
- " more_examples = True\n",
- " np_rng = np.random.RandomState(seed=self.seed)\n",
- " while more_examples:\n",
- " buffer, buffer_len = [], 0\n",
- " while True:\n",
- " if buffer_len >= self.max_buffer_size:\n",
- " break\n",
- " try:\n",
- " buffer.append(next(iterator)[self.content_field])\n",
- " buffer_len += len(buffer[-1])\n",
- " except StopIteration:\n",
- " if self.infinite:\n",
- " iterator = iter(self.dataset)\n",
- " else:\n",
- " more_examples = False\n",
- " break\n",
- " tokenized_inputs = self.tokenizer(buffer, truncation=False)[\"input_ids\"]\n",
- " all_token_ids = []\n",
- "\n",
- " for tokenized_input in tokenized_inputs:\n",
- " # optionally do FIM permutations\n",
- " if self.fim_rate > 0:\n",
- " tokenized_input, np_rng = permute(\n",
- " tokenized_input,\n",
- " np_rng,\n",
- " self.suffix_tok_id,\n",
- " self.prefix_tok_id,\n",
- " self.middle_tok_id,\n",
- " self.pad_tok_id,\n",
- " fim_rate=self.fim_rate,\n",
- " fim_spm_rate=self.fim_spm_rate,\n",
- " truncate_or_pad=False,\n",
- " )\n",
- "\n",
- " all_token_ids.extend(tokenized_input + [self.concat_token_id])\n",
- " examples = []\n",
- " for i in range(0, len(all_token_ids), self.seq_length):\n",
- " input_ids = all_token_ids[i : i + self.seq_length]\n",
- " if len(input_ids) == self.seq_length:\n",
- " examples.append(input_ids)\n",
- " random.shuffle(examples)\n",
- " for example in examples:\n",
- " self.current_size += 1\n",
- " yield {\n",
- " \"input_ids\": torch.LongTensor(example),\n",
- " \"labels\": torch.LongTensor(example),\n",
- " }\n",
- "\n",
- "\n",
- "train_dataset = ConstantLengthDataset(\n",
- " tokenizer,\n",
- " train_data,\n",
- " infinite=True,\n",
- " seq_length=SEQ_LENGTH,\n",
- " chars_per_token=chars_per_token,\n",
- " content_field=DATA_COLUMN,\n",
- " fim_rate=FIM_RATE,\n",
- " fim_spm_rate=FIM_SPM_RATE,\n",
- " seed=SEED,\n",
- ")\n",
- "eval_dataset = ConstantLengthDataset(\n",
- " tokenizer,\n",
- " valid_data,\n",
- " infinite=False,\n",
- " seq_length=SEQ_LENGTH,\n",
- " chars_per_token=chars_per_token,\n",
- " content_field=DATA_COLUMN,\n",
- " fim_rate=FIM_RATE,\n",
- " fim_spm_rate=FIM_SPM_RATE,\n",
- " seed=SEED,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "rxev1sk6tRW9"
- },
- "source": [
- "## Prepare the model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "UCtWV-U42Eq_"
- },
- "source": [
- "Now that the data is prepared, it's time to load the model! We're going to load the quantized version of the model.\n",
- "\n",
- "This will allow us to reduce memory usage, as quantization represents data with fewer bits. We'll use the `bitsandbytes` library to quantize the model, as it has a nice integration with `transformers`. All we need to do is define a `bitsandbytes` config, and then use it when loading the model.\n",
- "\n",
- "There are different variants of 4bit quantization, but generally, we recommend using NF4 quantization for better performance (`bnb_4bit_quant_type=\"nf4\"`).\n",
- "\n",
- "The `bnb_4bit_use_double_quant` option adds a second quantization after the first one to save an additional 0.4 bits per parameter.\n",
- "\n",
- "To learn more about quantization, check out the [\"Making LLMs even more accessible with bitsandbytes, 4-bit quantization and QLoRA\" blog post](https://huggingface.co./blog/4bit-transformers-bitsandbytes).\n",
- "\n",
- "Once defined, pass the config to the `from_pretrained` method to load the quantized version of the model."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "XuwoX6U2DUvK"
- },
- "outputs": [],
- "source": [
- "from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training\n",
- "from peft.tuners.lora import LoraLayer\n",
- "\n",
- "load_in_8bit = False\n",
- "\n",
- "# 4-bit quantization\n",
- "compute_dtype = getattr(torch, BNB_4BIT_COMPUTE_DTYPE)\n",
- "\n",
- "bnb_config = BitsAndBytesConfig(\n",
- " load_in_4bit=True,\n",
- " bnb_4bit_quant_type=\"nf4\",\n",
- " bnb_4bit_compute_dtype=compute_dtype,\n",
- " bnb_4bit_use_double_quant=USE_NESTED_QUANT,\n",
- ")\n",
- "\n",
- "device_map = {\"\": 0}\n",
- "\n",
- "model = AutoModelForCausalLM.from_pretrained(\n",
- " MODEL,\n",
- " load_in_8bit=load_in_8bit,\n",
- " quantization_config=bnb_config,\n",
- " device_map=device_map,\n",
- " use_cache=False, # We will be using gradient checkpointing\n",
- " trust_remote_code=True,\n",
- " use_flash_attention_2=True,\n",
- ")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "bO9e2FV8D8ZF"
- },
- "source": [
- "When using a quantized model for training, you need to call the `prepare_model_for_kbit_training()` function to preprocess the quantized model for training."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Qb_eB4xzEDBk"
- },
- "outputs": [],
- "source": [
- "model = prepare_model_for_kbit_training(model)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "lmnLjPZpDVtg"
- },
- "source": [
- "Now that the quantized model is ready, we can set up a LoRA configuration. LoRA makes fine-tuning more efficient by drastically reducing the number of trainable parameters.\n",
- "\n",
- "To train a model using LoRA technique, we need to wrap the base model as a `PeftModel`. This involves definign LoRA configuration with `LoraConfig`, and wrapping the original model with `get_peft_model()` using the `LoraConfig`.\n",
- "\n",
- "To learn more about LoRA and its parameters, refer to [PEFT documentation](https://huggingface.co./docs/peft/conceptual_guides/lora)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "_pAUU2FR2Gey",
- "outputId": "63328c2b-e693-49b1-ce0a-3ca8722f852a"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "trainable params: 5,554,176 || all params: 1,142,761,472 || trainable%: 0.4860310866343243\n"
- ]
- }
- ],
- "source": [
- "# Set up lora\n",
- "peft_config = LoraConfig(\n",
- " lora_alpha=LORA_ALPHA,\n",
- " lora_dropout=LORA_DROPOUT,\n",
- " r=LORA_R,\n",
- " bias=\"none\",\n",
- " task_type=\"CAUSAL_LM\",\n",
- " target_modules=LORA_TARGET_MODULES.split(\",\"),\n",
- ")\n",
- "\n",
- "model = get_peft_model(model, peft_config)\n",
- "model.print_trainable_parameters()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "tHe7AElXzXVV"
- },
- "source": [
- "As you can see, by applying LoRA technique we will now need to train less than 1% of the parameters."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "T_CqVydc40IM"
- },
- "source": [
- "## Train the model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Q_iN2khjrbD3"
- },
- "source": [
- "Now that we have prepared the data, and optimized the model, we are ready to bring everything together to start the training.\n",
- "\n",
- "To instantiate a `Trainer`, you need to define the training configuration. The most important is the `TrainingArguments`, which is a class that contains all the attributes to configure the training.\n",
- "\n",
- "These are similar to any other kind of model training you may run, so we won't go into detail here."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "65QHS8l1tKQe"
- },
- "outputs": [],
- "source": [
- "train_data.start_iteration = 0\n",
- "\n",
- "\n",
- "training_args = TrainingArguments(\n",
- " output_dir=f\"Your_HF_username/{OUTPUT_DIR}\",\n",
- " dataloader_drop_last=True,\n",
- " evaluation_strategy=\"steps\",\n",
- " save_strategy=\"steps\",\n",
- " max_steps=MAX_STEPS,\n",
- " eval_steps=EVAL_FREQ,\n",
- " save_steps=SAVE_FREQ,\n",
- " logging_steps=LOG_FREQ,\n",
- " per_device_train_batch_size=BATCH_SIZE,\n",
- " per_device_eval_batch_size=BATCH_SIZE,\n",
- " learning_rate=LR,\n",
- " lr_scheduler_type=LR_SCHEDULER_TYPE,\n",
- " warmup_steps=NUM_WARMUP_STEPS,\n",
- " gradient_accumulation_steps=GR_ACC_STEPS,\n",
- " gradient_checkpointing=True,\n",
- " fp16=FP16,\n",
- " bf16=BF16,\n",
- " weight_decay=WEIGHT_DECAY,\n",
- " push_to_hub=True,\n",
- " include_tokens_per_second=True,\n",
- ")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "kB_fLRex09ut"
- },
- "source": [
- "As a final step, instantiate the `Trainer` and call the `train` method. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- },
- "id": "rS3nVwhUC69O",
- "outputId": "61a5bdb2-b7d0-4aed-8290-4bf20c2ccd38"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Training...\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- "
\n",
- " [2000/2000 4:16:10, Epoch 1/9223372036854775807]\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Step | \n",
- " Training Loss | \n",
- " Validation Loss | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 100 | \n",
- " 5.524600 | \n",
- " 7.456872 | \n",
- "
\n",
- " \n",
- " 200 | \n",
- " 5.617800 | \n",
- " 7.262190 | \n",
- "
\n",
- " \n",
- " 300 | \n",
- " 5.129100 | \n",
- " 6.410039 | \n",
- "
\n",
- " \n",
- " 400 | \n",
- " 5.052200 | \n",
- " 6.306774 | \n",
- "
\n",
- " \n",
- " 500 | \n",
- " 5.202900 | \n",
- " 6.117062 | \n",
- "
\n",
- " \n",
- " 600 | \n",
- " 4.654100 | \n",
- " 6.018349 | \n",
- "
\n",
- " \n",
- " 700 | \n",
- " 5.100200 | \n",
- " 6.000355 | \n",
- "
\n",
- " \n",
- " 800 | \n",
- " 5.049800 | \n",
- " 5.889457 | \n",
- "
\n",
- " \n",
- " 900 | \n",
- " 4.541200 | \n",
- " 5.813823 | \n",
- "
\n",
- " \n",
- " 1000 | \n",
- " 5.000700 | \n",
- " 5.834208 | \n",
- "
\n",
- " \n",
- " 1100 | \n",
- " 5.026500 | \n",
- " 5.781939 | \n",
- "
\n",
- " \n",
- " 1200 | \n",
- " 4.411800 | \n",
- " 5.720596 | \n",
- "
\n",
- " \n",
- " 1300 | \n",
- " 4.782500 | \n",
- " 5.736376 | \n",
- "
\n",
- " \n",
- " 1400 | \n",
- " 4.980200 | \n",
- " 5.712276 | \n",
- "
\n",
- " \n",
- " 1500 | \n",
- " 4.368700 | \n",
- " 5.689637 | \n",
- "
\n",
- " \n",
- " 1600 | \n",
- " 4.884700 | \n",
- " 5.675920 | \n",
- "
\n",
- " \n",
- " 1700 | \n",
- " 4.914400 | \n",
- " 5.662421 | \n",
- "
\n",
- " \n",
- " 1800 | \n",
- " 4.248700 | \n",
- " 5.660122 | \n",
- "
\n",
- " \n",
- " 1900 | \n",
- " 4.798400 | \n",
- " 5.664026 | \n",
- "
\n",
- " \n",
- " 2000 | \n",
- " 4.704200 | \n",
- " 5.655665 | \n",
- "
\n",
- " \n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "TrainOutput(global_step=2000, training_loss=4.885598585128784, metrics={'train_runtime': 15380.3075, 'train_samples_per_second': 2.081, 'train_steps_per_second': 0.13, 'train_tokens_per_second': 4261.033, 'total_flos': 4.0317260660736e+17, 'train_loss': 4.885598585128784, 'epoch': 1.0})"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "trainer = Trainer(\n",
- " model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset\n",
- ")\n",
- "\n",
- "print(\"Training...\")\n",
- "trainer.train()\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "aAERlCnt1PEW"
- },
- "source": [
- "Finally, you can push the fine-tuned model to your Hub repository to share with your team."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "1h7_AUTTDwE1"
- },
- "outputs": [],
- "source": [
- "trainer.push_to_hub()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "KBVH7uFOM_UF"
- },
- "source": [
- "## Inference\n",
- "\n",
- "Once the model is uploaded to Hub, we can use it for inference. To do so we first initialize the original base model and its tokenizer. Next, we need to merge the fine-duned weights with the base model."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "jtL37piINBFe"
- },
- "outputs": [],
- "source": [
- "from peft import PeftModel\n",
- "import torch\n",
- "\n",
- "# load the original model first\n",
- "tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)\n",
- "base_model = AutoModelForCausalLM.from_pretrained(\n",
- " MODEL,\n",
- " quantization_config=None,\n",
- " device_map=None,\n",
- " trust_remote_code=True,\n",
- " torch_dtype=torch.bfloat16,\n",
- ").cuda()\n",
- "\n",
- "# merge fine-tuned weights with the base model\n",
- "peft_model_id = f\"Your_HF_username/{OUTPUT_DIR}\"\n",
- "model = PeftModel.from_pretrained(base_model, peft_model_id)\n",
- "model.merge_and_unload()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "3USQ2suvDi9M"
- },
- "source": [
- "Now we can use the merged model for inference. For convenience, we'll define a `get_code_completion` - feel free to experiment with text generation parameters!"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "RoTGpNbjDeWI"
- },
- "outputs": [],
- "source": [
- "def get_code_completion(prefix, suffix):\n",
- " text = prompt = f\"\"\"{prefix}{suffix}\"\"\"\n",
- " model.eval()\n",
- " outputs = model.generate(\n",
- " input_ids=tokenizer(text, return_tensors=\"pt\").input_ids.cuda(),\n",
- " max_new_tokens=128,\n",
- " temperature=0.2,\n",
- " top_k=50,\n",
- " top_p=0.95,\n",
- " do_sample=True,\n",
- " repetition_penalty=1.0,\n",
- " )\n",
- " return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0kMJiGDfDrBf"
- },
- "source": [
- "Now all we need to do to get code completion is call the `get_code_complete` function and pass the first few lines that we want to be completed as a prefix, and an empty string as a suffix."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "nXlco2_-YcvM",
- "outputId": "41c411ad-b7dc-4277-f975-c173888234bb"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "from peft import LoraConfig, TaskType, get_peft_model\n",
- "from transformers import AutoModelForCausalLM\n",
- "peft_config = LoraConfig(\n",
- " task_type=TaskType.CAUSAL_LM,\n",
- " r=8,\n",
- " lora_alpha=32,\n",
- " target_modules=[\"q_proj\", \"v_proj\"],\n",
- " lora_dropout=0.1,\n",
- " bias=\"none\",\n",
- " modules_to_save=[\"q_proj\", \"v_proj\"],\n",
- " inference_mode=False,\n",
- ")\n",
- "model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n",
- "model = get_peft_model(model, peft_config)\n",
- "model.print_trainable_parameters()\n"
- ]
- }
- ],
- "source": [
- "prefix = \"\"\"from peft import LoraConfig, TaskType, get_peft_model\n",
- "from transformers import AutoModelForCausalLM\n",
- "peft_config = LoraConfig(\n",
- "\"\"\"\n",
- "suffix =\"\"\"\"\"\"\n",
- "\n",
- "print(get_code_completion(prefix, suffix))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Ql2563kGlnmu"
- },
- "source": [
- "As someone who has just used the PEFT library earlier in this notebook, you can see that the generated result for creating a `LoraConfig` is rather good!\n",
- "\n",
- "If you go back to the cell where we instantiate the model for inference, and comment out the lines where we merge the fine-tuned weights, you can see what the original model would've generated for the exact same prefix:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "29xxp1eHTgJ9",
- "outputId": "c6d597a2-01da-4d25-a32f-3a551212c5b4"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "from peft import LoraConfig, TaskType, get_peft_model\n",
- "from transformers import AutoModelForCausalLM\n",
- "peft_config = LoraConfig(\n",
- " model_name_or_path=\"facebook/wav2vec2-base-960h\",\n",
- " num_labels=1,\n",
- " num_features=1,\n",
- " num_hidden_layers=1,\n",
- " num_attention_heads=1,\n",
- " num_hidden_layers_per_attention_head=1,\n",
- " num_attention_heads_per_hidden_layer=1,\n",
- " hidden_size=1024,\n",
- " hidden_dropout_prob=0.1,\n",
- " hidden_act=\"gelu\",\n",
- " hidden_act_dropout_prob=0.1,\n",
- " hidden\n"
- ]
- }
- ],
- "source": [
- "prefix = \"\"\"from peft import LoraConfig, TaskType, get_peft_model\n",
- "from transformers import AutoModelForCausalLM\n",
- "peft_config = LoraConfig(\n",
- "\"\"\"\n",
- "suffix =\"\"\"\"\"\"\n",
- "\n",
- "print(get_code_completion(prefix, suffix))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Pwy2ZC7U8Ema"
- },
- "source": [
- "While it is Python syntax, you can see that the original model has no understanding of what a `LoraConfig` should be doing."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "CATYE8pp2drQ"
- },
- "source": [
- "To learn how this kind of fine-tuning compares to full fine-tuning, and how to use a model like this as your copilot in VS Code via Inference Endpoints, or locally, check out the [\"Personal Copilot: Train Your Own Coding Assistant\" blog post](https://huggingface.co./blog/personal-copilot). This notebook complements the original blog post.\n"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "A100",
- "machine_shape": "hm",
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "name": "python"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/src/ref.bib b/src/ref.bib
new file mode 100644
index 0000000000000000000000000000000000000000..71bb1f66dd2f45854015e709c8b72f2cb82cf5f0
--- /dev/null
+++ b/src/ref.bib
@@ -0,0 +1,29 @@
+
+@misc{kojima_large_2023,
+ title = {Large {Language} {Models} are {Zero}-{Shot} {Reasoners}},
+ url = {http://arxiv.org/abs/2205.11916},
+ abstract = {Pretrained large language models (LLMs) are widely used in many sub-fields of natural language processing (NLP) and generally known as excellent few-shot learners with task-specific exemplars. Notably, chain of thought (CoT) prompting, a recent technique for eliciting complex multi-step reasoning through step-by-step answer examples, achieved the state-of-the-art performances in arithmetics and symbolic reasoning, difficult system-2 tasks that do not follow the standard scaling laws for LLMs. While these successes are often attributed to LLMs' ability for few-shot learning, we show that LLMs are decent zero-shot reasoners by simply adding "Let's think step by step" before each answer. Experimental results demonstrate that our Zero-shot-CoT, using the same single prompt template, significantly outperforms zero-shot LLM performances on diverse benchmark reasoning tasks including arithmetics (MultiArith, GSM8K, AQUA-RAT, SVAMP), symbolic reasoning (Last Letter, Coin Flip), and other logical reasoning tasks (Date Understanding, Tracking Shuffled Objects), without any hand-crafted few-shot examples, e.g. increasing the accuracy on MultiArith from 17.7\% to 78.7\% and GSM8K from 10.4\% to 40.7\% with large InstructGPT model (text-davinci-002), as well as similar magnitudes of improvements with another off-the-shelf large model, 540B parameter PaLM. The versatility of this single prompt across very diverse reasoning tasks hints at untapped and understudied fundamental zero-shot capabilities of LLMs, suggesting high-level, multi-task broad cognitive capabilities may be extracted by simple prompting. We hope our work not only serves as the minimal strongest zero-shot baseline for the challenging reasoning benchmarks, but also highlights the importance of carefully exploring and analyzing the enormous zero-shot knowledge hidden inside LLMs before crafting finetuning datasets or few-shot exemplars.},
+ urldate = {2023-06-07},
+ publisher = {arXiv},
+ author = {Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
+ month = jan,
+ year = {2023},
+ note = {arXiv:2205.11916 [cs]},
+ keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Machine Learning},
+ file = {arXiv.org Snapshot:/home/rok/Zotero/storage/BT2HY266/2205.html:text/html;Full Text PDF:/home/rok/Zotero/storage/292GVMWC/Kojima 等 - 2023 - Large Language Models are Zero-Shot Reasoners.pdf:application/pdf},
+}
+
+@misc{hoffmann_training_2022,
+ title = {Training {Compute}-{Optimal} {Large} {Language} {Models}},
+ url = {http://arxiv.org/abs/2203.15556},
+ doi = {10.48550/arXiv.2203.15556},
+ abstract = {We investigate the optimal model size and number of tokens for training a transformer language model under a given compute budget. We find that current large language models are significantly undertrained, a consequence of the recent focus on scaling language models whilst keeping the amount of training data constant. By training over 400 language models ranging from 70 million to over 16 billion parameters on 5 to 500 billion tokens, we find that for compute-optimal training, the model size and the number of training tokens should be scaled equally: for every doubling of model size the number of training tokens should also be doubled. We test this hypothesis by training a predicted compute-optimal model, Chinchilla, that uses the same compute budget as Gopher but with 70B parameters and 4\${\textbackslash}times\$ more more data. Chinchilla uniformly and significantly outperforms Gopher (280B), GPT-3 (175B), Jurassic-1 (178B), and Megatron-Turing NLG (530B) on a large range of downstream evaluation tasks. This also means that Chinchilla uses substantially less compute for fine-tuning and inference, greatly facilitating downstream usage. As a highlight, Chinchilla reaches a state-of-the-art average accuracy of 67.5\% on the MMLU benchmark, greater than a 7\% improvement over Gopher.},
+ urldate = {2023-06-08},
+ publisher = {arXiv},
+ author = {Hoffmann, Jordan and Borgeaud, Sebastian and Mensch, Arthur and Buchatskaya, Elena and Cai, Trevor and Rutherford, Eliza and Casas, Diego de Las and Hendricks, Lisa Anne and Welbl, Johannes and Clark, Aidan and Hennigan, Tom and Noland, Eric and Millican, Katie and Driessche, George van den and Damoc, Bogdan and Guy, Aurelia and Osindero, Simon and Simonyan, Karen and Elsen, Erich and Rae, Jack W. and Vinyals, Oriol and Sifre, Laurent},
+ month = mar,
+ year = {2022},
+ note = {arXiv:2203.15556 [cs]},
+ keywords = {Computer Science - Machine Learning, Computer Science - Computation and Language},
+ file = {arXiv Fulltext PDF:/home/rok/Zotero/storage/DXSZCEVL/Hoffmann 等 - 2022 - Training Compute-Optimal Large Language Models.pdf:application/pdf;arXiv.org Snapshot:/home/rok/Zotero/storage/FC3TJ3H2/2203.html:text/html},
+}
diff --git a/src/scripts/hf.py b/src/scripts/hf.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c22136f7ddc30b77c559bcbae4e3be02ecd6861
--- /dev/null
+++ b/src/scripts/hf.py
@@ -0,0 +1,16 @@
+
+import datasets
+from transformers import AutoTokenizer
+
+dataset = datasets.load_dataset( # <1>
+ "rotten_tomatoes", # <1>
+ split="train", # <1>
+) # <1>
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+
+dataset = datasets.map( # <2>
+ lambda examples: tokenizer(examples["text"]), # <2>
+ batched=True, # <2>
+) # <2>
+
+... # <3>
diff --git a/src/scripts/llm.csv b/src/scripts/llm.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b88275da82476e58896ccf670adf0b51611904d9
--- /dev/null
+++ b/src/scripts/llm.csv
@@ -0,0 +1,11 @@
+dataset,year,size,task
+gpt-3,2018,750000.0,text-generation
+gpt-neox,2020,800000.0,text-generation
+chinchilla,2022,10500000.0,text-generation
+Gopher,2023,10000000.0,text-generation
+MT-NLG,2023,1200000.0,text-generation
+MT-NLG,2023,150000.0,text-generation
+LaMDA,2020,12600000.0,text-generation
+Stability,2022,50000000.0,text-generation
+RedPajama,2023,40000000.0,text-generation
+Llama 3,2024,400000000.0,text-generation
diff --git a/src/scripts/nlp_datas.csv b/src/scripts/nlp_datas.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4a0263fa5d5c4b67c819385957b6d937af347b5a
--- /dev/null
+++ b/src/scripts/nlp_datas.csv
@@ -0,0 +1,213 @@
+dataset,year,size,task
+acronym_identification,2020,8.556464,token-classification
+aeslc,2019,11.643743,summarization
+ag_news,2015,31.327765,text-classification
+ajgt_twitter_ar,2017,0.107395,text-classification
+alt,2016,47.849734,translation
+amazon_polarity,2013,688.339454,text-classification
+ambig_qa,2020,39.4018,question-answering
+amttl,2018,0.685534,token-classification
+app_reviews,2017,42.592679,text-classification
+aqua_rat,2017,99.837029,question-answering
+arsentd_lev,2018,0.392666,text-classification
+arxiv_dataset,2019,0.0,translation
+aslg_pc12,2012,12.773431,translation
+assin,2016,2.249205,text-classification
+assin2,2020,2.113646,text-classification
+atomic,2019,19.083782,text2text-generation
+autshumato,2010,32.124009,translation
+bc2gm_corpus,2008,4.636753,token-classification
+best2009,2009,13.89126,token-classification
+billsum,2019,67.260676,summarization
+biosses,2017,0.036324,text-classification
+blimp,2019,29.576684,text-classification
+blog_authorship_corpus,2006,632.898892,text-classification
+bn_hate_speech,2020,0.974312,text-classification
+break_data,2020,79.85539,text2text-generation
+c3,2020,9.834177,question-answering
+capes,2018,162.229298,translation
+cdt,2019,0.375476,text-classification
+cedr,2021,2.515548,text-classification
+clickbait_news_bg,2017,8.569575,text-classification
+climate_fever,2020,0.687133,text-classification
+cmu_hinglish_dog,2018,8.749685,translation
+cnn_dailymail,2015,1756.318416,summarization
+coarse_discourse,2017,4.636201,text-classification
+codah,2019,2.91078,question-answering
+code_x_glue_cc_clone_detection_big_clone_bench,2014,47.955874,text-classification
+code_x_glue_cc_clone_detection_poj104,2016,8.658581,text-retrieval
+code_x_glue_cc_defect_detection,2019,61.685715,text-classification
+code_x_glue_ct_code_to_text,2019,5191.751695,translation
+code_x_glue_tc_nl_code_search_adv,2019,966.025624,text-retrieval
+code_x_glue_tc_text_to_code,2018,100.769638,translation
+competition_math,2021,20.327424,text2text-generation
+conllpp,2019,4.8596,token-classification
+cos_e,2019,10.830854,question-answering
+covid_qa_castorini,2020,4.520993,question-answering
+covid_qa_deepset,2020,4.418117,question-answering
+covid_qa_ucsd,2020,0.0,question-answering
+cryptonite,2021,46.98957,question-answering
+cuad,2021,18.309308,question-answering
+datacommons_factcheck,2019,1.343792,text-classification
+dbpedia_14,2015,68.341743,text-classification
+definite_pronoun_resolution,2012,0.227452,token-classification
+dengue_filipino,2018,0.156014,text-classification
+docred,2019,458.040413,text-retrieval
+dream,2019,5.55819,question-answering
+drop,2019,8.308692,question-answering
+dyk,2013,0.685462,question-answering
+emo,2019,3.362556,text-classification
+ethos,2020,0.186755,text-classification
+europa_eac_tm,2014,84.513984,translation
+europa_ecdc_tm,2014,102.879264,translation
+event2Mind,2018,1.30077,text2text-generation
+exams,2020,4583.119779,question-answering
+fake_news_filipino,2020,1.313458,text-classification
+financial_phrasebank,2014,2.72756,text-classification
+finer,2019,3.733127,token-classification
+flores,2019,3.085562,translation
+flue,2019,867.236435,text-classification
+freebase_qa,2019,33.204999,question-answering
+generated_reviews_enth,2020,59.490601,translation
+germaner,2015,4.363657,token-classification
+gigaword,2003,578.402958,summarization
+glue,2019,1001.736261,text-classification
+gooaq,2021,2111.358901,question-answering
+google_wellformed_query,2018,1.157019,text-classification
+grail_qa,2020,17.636773,question-answering
+guardian_authorship,2017,49.611984,text-classification
+gutenberg_time,2020,35.853781,text-classification
+hard,2018,8.508677,text-classification
+harem,2006,3.603154,token-classification
+has_part,2020,7.437382,text-classification
+hate_speech_filipino,2019,0.822927,text-classification
+hatexplain,2020,12.848091,text-classification
+hindi_discourse,2020,4.176677,text-classification
+hlgd,2021,1.858948,text-classification
+hotpot_qa,2018,1272.841016,question-answering
+hover,2020,12.257835,text-retrieval
+humicroedit,2019,3.242912,text-classification
+hybrid_qa,2020,217.436855,question-answering
+hyperpartisan_news_detection,2019,1004.195772,text-classification
+igbo_english_machine_translation,2020,2.580255,translation
+igbo_ner,2020,4.443355,token-classification
+jnlpba,2004,3.171072,token-classification
+journalists_questions,2016,0.271039,text-classification
+kan_hope,2021,0.568972,text-classification
+kinnews_kirnews,2020,65.127732,text-classification
+kor_3i4k,2018,2.956114,text-classification
+kor_nli,2020,126.339696,text-classification
+kor_sae,2019,2.545926,text-classification
+labr,2013,39.953712,text-classification
+lama,2019,298.569546,text-retrieval
+lc_quad,2019,3.959901,question-answering
+lex_glue,2021,343.07123,question-answering
+linnaeus,2010,18.204624,token-classification
+lst20,2020,0.0,token-classification
+mac_morpho,2015,2.463485,token-classification
+masakhaner,2021,5.387138,token-classification
+mbpp,2021,0.818796,text2text-generation
+med_hop,2018,679.686122,question-answering
+medical_dialog,2020,2082.878369,question-answering
+medical_questions_pairs,2020,0.665688,text-classification
+metooma,2020,0.408889,text-classification
+metrec,2020,2.267882,text-classification
+mlqa,2019,4150.871116,question-answering
+mlsum,2020,6020.125939,summarization
+mocha,2020,14.452311,question-answering
+mrqa,2019,1479.518355,question-answering
+msr_sqa,2017,4.796932,question-answering
+msr_text_compression,2016,0.0,summarization
+msr_zhen_translation_parity,2018,0.0,translation
+multi_news,2019,756.785627,summarization
+multi_re_qa,2020,75.245778,question-answering
+multi_x_science_sum,2020,61.329304,summarization
+multidoc2dial,2021,19.353432,question-answering
+narrativeqa_manual,2018,22.638273,text2text-generation
+ncbi_disease,2014,1.546492,token-classification
+nchlt,2014,238.450416,token-classification
+ncslgr,2007,4119.164501,translation
+newsph_nli,2020,76.565287,text-classification
+newspop,2018,30.338277,text-classification
+newsqa,2017,0.0,question-answering
+nkjp-ner,2012,0.821629,token-classification
+norne,2020,246.710964,token-classification
+norwegian_ner,2019,36.365354,token-classification
+oclar,2019,0.382976,text-classification
+offcombr,2017,0.185171,text-classification
+offenseval_dravidian,2021,7.99568,text-classification
+openai_humaneval,2021,0.044877,text2text-generation
+openbookqa,2018,2.892196,question-answering
+opinosis,2010,0.757398,summarization
+opus100,2020,2610.517142,translation
+opus_elhuyar,2012,44.468751,translation
+orange_sum,2020,50.379977,summarization
+parsinlu_reading_comprehension,2020,4.117863,question-answering
+per_sent,2020,23.117196,text-classification
+pn_summary,2020,89.591141,summarization
+poem_sentiment,2020,0.04987,text-classification
+pragmeval,2019,106.61448,text-classification
+proto_qa,2020,7.493391,question-answering
+pubmed_qa,2019,2063.6481,question-answering
+qa_srl,2015,1.087729,question-answering
+qanta,2019,170.754918,question-answering
+qed,2020,14.083968,question-answering
+reasoning_bg,2019,8.768975,question-answering
+reddit_tifu,2018,1341.215712,summarization
+riddle_sense,2021,2.083122,question-answering
+ro_sent,2020,14.700057,text-classification
+ro_sts,2021,1.267607,text-classification
+ro_sts_parallel,2021,4.503388,translation
+ronec,2019,14.675943,token-classification
+ropes,2019,3.516917,question-answering
+samsum,2019,2.9441,summarization
+sberquad,2020,66.047276,question-answering
+scan,2018,45.159884,text2text-generation
+scb_mt_enth_2020,2020,276.831118,translation
+scielo,2018,391.247854,translation
+scientific_papers,2018,9009.292694,summarization
+sciq,2017,2.821345,question-answering
+sede,2021,6.318959,token-classification
+selqa,2016,137.518059,question-answering
+sem_eval_2020_task_11,2020,0.0,text-classification
+sharc,2018,5.230207,question-answering
+simple_questions_v2,2015,1270.30677,question-answering
+sms_spam,2011,0.203415,text-classification
+social_bias_frames,2020,9.464583,text2text-generation
+species_800,2013,18.204624,token-classification
+spider,2018,99.736136,text2text-generation
+squad_it,2018,8.776531,question-answering
+squad_kor_v1,2019,42.408533,question-answering
+squad_kor_v2,2020,1373.763305,question-answering
+stereoset,2020,25.00449,text-classification
+stsb_mt_sv,2020,0.383047,text-classification
+stsb_multi_mt,2021,12.992041,text-classification
+super_glue,2019,58.368572,text-classification
+swag,2018,84.49243,text-classification
+swedish_medical_ner,2016,156.818136,token-classification
+tep_en_fa_para,2011,16.353318,translation
+text2log,2021,9.746473,translation
+thai_toxicity_tweet,2019,0.19474,text-classification
+thainer,2019,5.456461,token-classification
+turkic_xwmt,2021,1157.61564,translation
+tweet_eval,2020,18.982053,text-classification
+tweet_qa,2019,1.57398,question-answering
+tweets_ar_en_parallel,2020,8.812878,translation
+tweets_hate_speech_detection,2018,4.738708,text-classification
+universal_morphologies,2016,467.757708,token-classification
+urdu_fake_news,2020,1.042653,text-classification
+urdu_sentiment_corpus,2020,0.051583,text-classification
+wiki_hop,2018,679.686122,question-answering
+wiki_movies,2016,57.070041,question-answering
+xcopa,2020,2.715704,question-answering
+xed_en_fi,2020,9.68494,text-classification
+xglue,2020,9634.964581,question-answering
+xor_tydi_qa,2020,17.720586,question-answering
+xquad_r,2020,196.497587,question-answering
+xsum,2018,257.302866,summarization
+yelp_review_full,2015,196.146755,text-classification
+roman_urdu_hate_speech,2020,2.44736,text-classification
+adv_glue,2021,0.243972,text-classification
+gsm8k,2021,11.290661,text2text-generation
+sst2,2014,7.439277,text-classification
+gpt-3,2018,750000.0,text2text-generation
diff --git a/src/scripts/plot.ju.py b/src/scripts/plot.ju.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa20cdb0520914952dc44227d8bdf433fe31d7e1
--- /dev/null
+++ b/src/scripts/plot.ju.py
@@ -0,0 +1,461 @@
+# %%
+
+# %cd ~/docs/0425-ml_summit/scripts/
+import plotly.express as px
+from plotly.graph_objs import Figure, FigureWidget
+import datasets
+import pandas as pd
+import huggingface_hub
+import plotly.graph_objs as go
+import numpy as np
+from PIL import Image
+
+FIGURES: dict[str, Figure] = {}
+# %%
+
+df = pd.read_csv("nlp_datas.csv")
+fig = px.treemap(
+ df,
+ path=[px.Constant("nlp-datasets"), "task", "dataset"],
+ values="size",
+ # color="dataset",
+ # hover_data=["iso_alpha"],
+ # color_continuous_scale="RdBu",
+)
+
+FIGURES["nlp"] = fig
+fig.update_layout(
+ paper_bgcolor="rgba(0,0,0,0)",
+ # autosize=True,
+ margin=dict(t=0, l=0, r=0, b=0),
+ # plot_bgcolor='rgba(0,0,0,0)',
+)
+# fig.update_traces(marker=dict(pattern=dict(shape=["|"], solidity=0.80)))
+# fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
+# figs.append(fig)
+fig
+# %%
+df = pd.read_csv("llm.csv")
+fig = px.treemap(
+ df,
+ path=[px.Constant("LLM"), "dataset"],
+ values="size",
+ # color="dataset",
+ # hover_data=["iso_alpha"],
+ # color_continuous_scale="RdBu",
+)
+FIGURES["gpt"] = fig
+fig.update_layout(
+ paper_bgcolor="rgba(0,0,0,0)",
+ # autosize=True,
+ margin=dict(t=0, l=0, r=0, b=0),
+ # plot_bgcolor='rgba(0,0,0,0)',
+)
+# fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
+fig
+# %%
+
+df = pd.read_csv("./seq-time.csv", index_col=0)
+df.index = df.index.map(lambda x: eval(x.replace("k", "*1024")))
+df["platformers"] = df["platformers"] / 7
+df.drop([df.columns[-1]], axis=1, inplace=True)
+df = df.reset_index(names="sequence length").melt(
+ id_vars="sequence length", var_name="model", value_name="time"
+)
+fig = px.line(df, x="sequence length", y="time", color="model")
+FIGURES["seq-time"] = fig
+fig.update_layout(
+ paper_bgcolor="rgba(0,0,0,0)",
+ # autosize=True,
+ margin=dict(t=0, l=0, r=0, b=0),
+ plot_bgcolor="rgba(0,0,0,0)",
+ legend_font=dict(color="white"),
+)
+fig.update_xaxes(
+ color="white",
+)
+fig.update_yaxes(
+ # showticklabels=False,
+ # zeroline=False,
+ # showline=False,
+ # griddash="4px",
+ # gridcolor="rgba(255,255,255,0.3)",
+ # title="Loss",
+ color="white",
+)
+fig
+# %%
+
+df = pd.read_csv("seq-tflops.csv", index_col=0)
+# df['sequence length']
+# df.index = df.index.map(lambda x: eval(x.replace("K", "*1024")))
+df = df.reset_index(names="sequence length").melt(
+ id_vars="sequence length", var_name="model", value_name="tflops"
+)
+fig = px.bar(df, x="sequence length", y="tflops", color="model", barmode="group")
+FIGURES["seq-tflops"] = fig
+fig.update_layout(
+ paper_bgcolor="rgba(0,0,0,0)",
+ # autosize=True,
+ margin=dict(t=0, l=0, r=0, b=0),
+ plot_bgcolor="rgba(0,0,0,0)",
+ legend_font=dict(color="white"),
+)
+
+fig.update_xaxes(
+ color="white",
+)
+fig.update_yaxes(
+ # showticklabels=False,
+ # zeroline=False,
+ # showline=False,
+ # griddash="4px",
+ # gridcolor="rgba(255,255,255,0.3)",
+ # title="Loss",
+ color="white",
+)
+fig
+# %%
+
+
+df = datasets.load_dataset("SUSTech/webvid", split="train[:100]").to_pandas()
+
+df = df.drop(["duration"], axis=1)
+
+
+fig = go.Figure(
+ data=[
+ go.Table(
+ header=dict(
+ values=list(df.columns), fill_color="paleturquoise", align="left"
+ ),
+ cells=dict(
+ values=[df[col] for col in df.columns],
+ fill_color="lavender",
+ align="left",
+ # alignsrc="center",
+ ),
+ )
+ ]
+)
+
+fig.update_layout(
+ paper_bgcolor="rgba(0,0,0,0)",
+ # autosize=True,
+ margin=dict(t=0, l=0, r=0, b=0),
+ # plot_bgcolor='rgba(0,0,0,0)',
+)
+# fig.show()
+FIGURES["webvid"] = fig
+# %%
+
+fig = go.Figure()
+
+data = {
+ "402-page transcripts from Apollo 11’s mission to the moon": 326914,
+ "44-minute silent Buster Keaton movie": 696417,
+ "more than 100,000 lines of code": 816767,
+ "Generate 1min video": 1000000,
+}
+
+df = pd.Series(data, name="token").to_frame().reset_index(names="task")
+
+
+# df = px.data.gapminder().query("continent == 'Europe' and year == 2007 and pop > 2.e6")
+fig = px.bar(
+ df,
+ y="token",
+ x="task",
+ text_auto=".2s",
+ # template="ggplot2",
+ # color="white",
+ # orientation="h",
+)
+FIGURES["token-bar"] = fig
+
+fig.update_traces(
+ textfont_size=12,
+ textangle=0,
+ textposition="outside",
+ cliponaxis=False,
+ textfont_color="white",
+)
+fig.update_layout(
+ paper_bgcolor="rgba(0,0,0,0)",
+ # autosize=True,
+ margin=dict(t=0, l=0, r=0, b=0),
+ plot_bgcolor="rgba(0,0,0,0)",
+ legend_font=dict(color="white"),
+)
+
+fig.update_xaxes(
+ color="white",
+ # showticklabels=False,
+ zeroline=False,
+ showline=False,
+ showgrid=False,
+ title="",
+)
+fig.update_yaxes(
+ # showticklabels=False,
+ showline=False,
+ showgrid=False,
+ zeroline=False,
+ # griddash="4px",
+ # gridcolor="rgba(255,255,255,0.3)",
+ # title="Loss",
+ color="white",
+)
+fig
+
+
+# %%
+def generate_loss(steps, initial_loss, decay_rate, noise_factor):
+ loss = initial_loss * np.exp(-decay_rate * steps)
+ noise = noise_factor * loss * np.random.randn(*steps.shape)
+ return loss + noise
+
+
+def splitpoints(total, split):
+ step = total // split
+ for i in range(split - 1):
+ yield slice(i * step, (i + 1) * step)
+ yield slice((i + 1) * step, None)
+
+
+meta = [
+ {
+ "name": "2xDGX on aws",
+ "color": "red",
+ "icon": "../figures/gc.png",
+ },
+ {
+ "name": "16xDGX on aliyun",
+ "color": "orange",
+ "icon": "../figures/aws-white.png",
+ },
+ {
+ "name": "128xDGX on ucloud",
+ "color": "blue",
+ "icon": "../figures/aliyun.png",
+ },
+]
+
+
+steps = np.linspace(0, 1, 1000)
+loss = generate_loss(steps, initial_loss=1, decay_rate=5, noise_factor=0.1)
+fig = go.Figure()
+# fig.update_layout(
+# title="Training Loss by Steps", xaxis_title="Steps", yaxis_title="Loss"
+# )
+
+FIGURES["cloud-switch"] = fig
+for i, idx in enumerate(splitpoints(1000, len(meta))):
+ fig.add_trace(
+ go.Scatter(
+ x=steps[idx],
+ y=loss[idx],
+ mode="lines",
+ name=meta[i]["name"],
+ line=dict(color=meta[i]["color"]),
+ )
+ )
+fig.add_layout_image(
+ x=0.8,
+ sizex=0.2,
+ y=0.2,
+ sizey=0.2,
+ xref="paper",
+ yref="paper",
+ opacity=1.0,
+ layer="above",
+ source=Image.open("../figures/logo/ucloud.png"),
+)
+fig.add_layout_image(
+ x=0.17,
+ sizex=0.15,
+ y=0.7,
+ sizey=0.15,
+ xref="paper",
+ yref="paper",
+ opacity=1.0,
+ layer="above",
+ source=Image.open("../figures/aws-white.png"),
+)
+fig.add_layout_image(
+ x=0.43,
+ sizex=0.15,
+ y=0.3,
+ sizey=0.15,
+ xref="paper",
+ yref="paper",
+ opacity=1.0,
+ layer="above",
+ source=Image.open("../figures/aliyun.png"),
+)
+
+fig.update_layout(
+ showlegend=False,
+ paper_bgcolor="rgba(0,0,0,0)",
+ plot_bgcolor="rgba(255,255,255,0)",
+ # plot_bgcolor="rgba(255,255,0)",
+ # width=1120,
+)
+fig.update_xaxes(
+ showticklabels=False,
+ # ticklabelposition="inside left",
+ showline=False,
+ zeroline=False,
+ showgrid=False,
+ # title=dict(text="Steps", standoff=250),
+ automargin=True,
+)
+fig.update_yaxes(
+ showticklabels=False,
+ zeroline=False,
+ showline=False,
+ griddash="4px",
+ gridcolor="rgba(255,255,255,0.3)",
+ title="Loss",
+ color="white",
+)
+fig
+
+
+# %%
+def plot_gantt(df):
+ fig = px.timeline(df, x_start="Start", x_end="End", y="Task", color="Task")
+
+ fig.update_layout(xaxis_tickformat="%H:%M")
+
+ fig.update_layout(
+ showlegend=False,
+ paper_bgcolor="rgba(0,0,0,0)",
+ # plot_bgcolor="rgba(255,255,255,0.3)",
+ plot_bgcolor="rgba(255,255,255,0)",
+ # plot_bgcolor="rgba(255,255,0)",
+ # width=1120,
+ )
+ fig.update_xaxes(
+ showticklabels=False,
+ # ticklabelposition="inside left",
+ showline=False,
+ zeroline=False,
+ showgrid=False,
+ # title=dict(text="Steps", standoff=250),
+ automargin=True,
+ )
+ fig.update_yaxes(
+ # showticklabels=False,
+ zeroline=False,
+ showline=False,
+ griddash="4px",
+ gridcolor="rgba(0,0,0,0.3)",
+ title="",
+ color="white",
+ tickfont=dict(size=20),
+ )
+
+ return fig
+
+
+# for hour slots randonly assign a task
+num_rows = 1000
+download_prop = 0.65
+df = pd.DataFrame(
+ {"Start": pd.date_range("1-jan-2021", periods=num_rows, freq="4h")}
+).assign(
+ End=lambda d: d.Start + pd.Timedelta(hours=1),
+ Task=np.random.choice(
+ ["Read", "Transform"], num_rows, p=(download_prop, 1 - download_prop)
+ ),
+)
+
+df.loc[0, "Task"] = "Read"
+df.loc[len(df) - 1, "Task"] = "Transform"
+df = df.groupby(df.Task.ne(df.Task.shift()).cumsum()).agg(
+ {"Start": "min", "End": "max", "Task": "first"}
+)
+
+timeline = df.copy()
+# %%
+
+df = timeline.copy()
+ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=10)
+for start, end in zip(ddi[2:-1:3], ddi[3::3]):
+ df.loc[df["Start"].between(start, end), "Task"] = "Train"
+ df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"})
+
+FIGURES["profile-naive"] = plot_gantt(df)
+FIGURES["profile-naive"]
+# %%
+
+df = timeline.copy()
+prop = 10
+ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=(prop + 1) * 10)
+for start, end in zip(ddi[1 : -1 : prop + 1], ddi[prop :: prop + 1]):
+ df.loc[df["Start"].between(start, end), "Task"] = "Train"
+ df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"})
+FIGURES["profile-old"] = plot_gantt(df)
+FIGURES["profile-old"]
+# %%
+
+df = timeline.copy()
+
+df.loc[len(df) + 1] = pd.Series(
+ {"Start": df.iloc[0].Start, "End": df.iloc[-1].Start, "Task": "Train"}
+)
+FIGURES["profile-stream"] = plot_gantt(df)
+FIGURES["profile-stream"]
+
+# %%
+
+for k, v in FIGURES.items():
+ print(k)
+ v.write_html(
+ f"../components/{k}.qmd",
+ full_html=False,
+ include_plotlyjs="cdn",
+ )
+
+# for i in range(100):
+# print(i)
+# %%
+import qrcode
+from qrcode.image.styledpil import StyledPilImage
+from qrcode.image.styles.moduledrawers.pil import RoundedModuleDrawer
+from qrcode.image.styles.colormasks import RadialGradiantColorMask
+
+qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L)
+qr.add_data("https://u.wechat.com/MAmdMGMYjGFC4-2ESxZ1oyw")
+
+# img_1 = qr.make_image(image_factory=StyledPilImage, module_drawer=RoundedModuleDrawer())
+img_2 = qr.make_image(
+ # image_factory=StyledPilImage,
+ # color_mask=RadialGradiantColorMask(),
+ fill_color="white",
+ back_color="transparent",
+)
+# img_3 = qr.make_image(
+# image_factory=StyledPilImage, embeded_image_path="../figures/qr/code.png"
+# )
+img_2.save("../figures/qr/jing.png")
+# %%
+
+
+qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L)
+qr.add_data("mailto:data@sustech.edu.cn?subject=Hello&body=")
+
+# img_1 = qr.make_image(image_factory=StyledPilImage, module_drawer=RoundedModuleDrawer())
+img_2 = qr.make_image(
+ # image_factory=StyledPilImage,
+ # color_mask=RadialGradiantColorMask(),
+ fill_color="white",
+ back_color="transparent",
+)
+# img_3 = qr.make_image(
+# image_factory=StyledPilImage, embeded_image_path="../figures/qr/code.png"
+# )
+img_2.save("../figures/qr/mail-data.png")
+
+
diff --git a/src/scripts/plot.ju1.py b/src/scripts/plot.ju1.py
new file mode 100644
index 0000000000000000000000000000000000000000..8968ae719d5b68f1d096c539358b0be3a00fa2d8
--- /dev/null
+++ b/src/scripts/plot.ju1.py
@@ -0,0 +1,160 @@
+# %%
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import datasets
+import plotly.graph_objects as go
+import numpy as np
+import polars as pl
+
+
+tokenizer = AutoTokenizer.from_pretrained("01-ai/Yi-34B", trust_remote_code=True)
+alpaca = datasets.load_dataset("tatsu-lab/alpaca", split="train").map(
+ lambda ex: {"tokens": tokenizer(ex["text"])["input_ids"].__len__()}, num_proc=4
+)
+
+
+pdf = pl.DataFrame(alpaca.to_pandas()).with_columns(index=pl.int_range(0, pl.count()))
+tokens = pdf["tokens"].to_numpy()
+
+# %%
+
+
+def plot_batch(batch_size):
+ # 数据
+ data = pdf["tokens"].to_numpy().copy()
+ # np.random.shuffle(data)
+ data = data[:batch_size]
+ # 计算最大值
+ max_value = max(data)
+
+ # 创建横向柱状图
+ fig = go.Figure()
+
+ # 为每个数据点添加两个柱子,一个表示原始值,另一个表示与最大值的差
+ for i, value in enumerate(data):
+ fig.add_trace(
+ go.Bar(
+ x=[value],
+ y=[i + 1],
+ # name='原始值',
+ orientation="h",
+ marker_color="blue",
+ )
+ )
+ fig.add_trace(
+ go.Bar(
+ x=[max_value - value],
+ y=[i + 1],
+ # name='与最大值的差',
+ orientation="h",
+ marker_color="red",
+ )
+ )
+
+ # 更新图表布局
+ fig.update_layout(
+ barmode="stack", # 堆叠模式
+ # title="横向柱状图:蓝色表示原始数值,红色表示与最大值的差",
+ # xaxis_title="数值",
+ # yaxis_title="数据点",
+ showlegend=False,
+ xaxis=dict(range=[0, max_value]),
+ )
+
+ # 显示图表
+ return fig
+
+
+def packing(pocket=8192):
+ num_pocket = 0
+ buffers = 0
+
+ for token in tokens:
+ tmp_len = buffers + token
+ if tmp_len > pocket:
+ num_pocket += 1
+ buffers = token
+ else:
+ buffers = tmp_len
+ if buffers:
+ num_pocket += 1
+ return num_pocket * pocket / tokens.sum()
+
+
+# %%
+
+plot_batch(30)
+
+# %%
+arrs = []
+# for batch_size in np.linspace(1, len(pdf), 100, dtype=int):
+for batch_size in range(1, 100):
+ arr = (
+ pdf.with_columns(
+ batch=pl.col("tokens").max().over(pl.col("index") // batch_size)
+ )
+ .select(
+ pl.col("tokens").sum().over(pl.col("index") // batch_size).mean(),
+ ((pl.col("batch")) / pl.col("tokens")).mean(),
+ )
+ .to_numpy()
+ )
+ arrs.append(arr)
+x_values, y_values = np.concatenate(arrs).transpose()
+pxs = np.linspace(tokens.max(), x_values[-1], 100)
+pys = [packing(pocket) for pocket in pxs]
+
+
+fig = go.Figure()
+# Adding the line plot for the function
+fig.add_trace(go.Scatter(x=x_values, y=y_values, mode="lines", name="Batching"))
+
+
+# Adding a special point (70, 100)
+fig.add_trace(
+ go.Scatter(
+ x=pxs,
+ y=pys,
+ mode="lines",
+ name="Packing",
+ # marker=dict(color="red", size=10),
+ )
+)
+
+worst = tokens.max() / tokens.mean()
+fig.add_trace(
+ go.Scatter(
+ x=x_values,
+ y=[worst] * len(x_values),
+ mode="lines",
+ name="Worst",
+ line=dict(dash="dash"),
+ )
+)
+fig.add_trace(
+ go.Scatter(
+ x=[8192],
+ y=[packing(8192)],
+ mode="markers",
+ name="Chosen",
+ # marker=dict(color="green", size=10),
+ )
+)
+# fig.add_hline(
+# y=worst,
+# # mode="markers",
+# line_dash="dash",
+# annotation_text="Worst",
+# # marker=dict(color="green", size=10),
+# )
+# Updating the layout
+fig.update_layout(
+ # title="Sample Function Plot with a Special Point",
+ xaxis_title="throughput(tokens)",
+ yaxis_title="computational cost(ratio)",
+ yaxis=dict(range=[0, worst + 1]),
+)
+
+# The plot is ready to be shown
+
+# fig.write_image("../../docs/1227-moda/figures/packing.png")
+fig.show()
diff --git a/src/scripts/seq-tflops.csv b/src/scripts/seq-tflops.csv
new file mode 100644
index 0000000000000000000000000000000000000000..be3914b3c3a087a6b1231d24c33643e3630dd5db
--- /dev/null
+++ b/src/scripts/seq-tflops.csv
@@ -0,0 +1,9 @@
+,Ulysses,Megatron LM,ColAI-SP,Platformers
+8K,165,44,77,175
+16K,158,67,82,154
+32K,152,81,91,157
+64K,148,105,,151
+128K,140,,,145
+256K,134,,,152
+512K,,,,140
+768K,,,,132
diff --git a/src/scripts/seq-time.csv b/src/scripts/seq-time.csv
new file mode 100644
index 0000000000000000000000000000000000000000..95dc9db36b9348d4417deab8de008e702fa1bb2e
--- /dev/null
+++ b/src/scripts/seq-time.csv
@@ -0,0 +1,16 @@
+,FlashAttnention 2,platformers,temp
+7k,0.000920748,0.0133,0.0019
+14k,0.0028996,0.036113,0.005159
+21k,0.0060077,0.027916,0.003988
+27k,0.0101806,0.034447,0.004921
+42k,0.018362879,0.040026,0.005718
+56k,0.0320755,0.059668,0.008524
+70k,0.053443,0.082418,0.011774
+84k,0.0764627,0.095704,0.013672
+98k,0.1013102,0.122332,0.017476
+112k,0.134225,0.152481,0.021783
+224k,0.537223,0.593285,0.084755
+336k,1.22278,1.192632,0.170376
+448k,2.18487,2.09349,0.29907
+896k,8.8227,8.23515,1.17645
+1792k,35.625787,32.9273,4.7039
diff --git a/src/styles.css b/src/styles.css
deleted file mode 100644
index 2ddf50c7b4236e4b67c3e9fc369f6a7a562cd27d..0000000000000000000000000000000000000000
--- a/src/styles.css
+++ /dev/null
@@ -1 +0,0 @@
-/* css styles */
diff --git a/src/sustech.scss b/src/sustech.scss
new file mode 100644
index 0000000000000000000000000000000000000000..85d6f989b99435ea2f1f188b357644d3ee6f58c4
--- /dev/null
+++ b/src/sustech.scss
@@ -0,0 +1,270 @@
+/*-- scss:defaults --*/
+
+@import url('https://fonts.googleapis.com/css2?family=Crimson+Text:ital@0;1&family=Sail&display=swap');
+@import url('https://fonts.googleapis.com/css2?family=Source+Code_Pro&display=swap');
+@import url('https://fonts.googleapis.com/css2?family=Jersey+10&family=Waiting+for+the+Sunrise&display=swap');
+
+$font-family-sans-serif: "Crimson Text";
+$font-family-monospace: 'Source Code Pro', monospace;
+
+$theme-purple: #BF65C5;
+$theme-blue: #76AADB;
+$theme-teal: #50847B;
+$theme-cream: #F5F5F5;
+$theme-dark-purple: #1A1626;
+$theme-white: #ffffff;
+$theme-martinique: #312745;
+$theme-yellow: #FFD571;
+$theme-brown: #a37100;
+$theme-pink: #FED7E1;
+$theme-orange: #ff8831;
+$theme-red: #e31c54;
+$theme-green: #3faf72;
+$theme-citron: #87ad25;
+$theme-lemon: #f9f991;
+$theme-ml: #10144C;
+
+$body-bg: white;
+$body-color: black;
+// $link-color: darken($theme-cream, 20%);
+$selection-bg: $theme-blue;
+
+
+// $body-bg: $theme-martinique;
+$body-bg: $theme-ml;
+$link-color: $theme-yellow;
+$code-color: $theme-orange;
+$body-color: $theme-white;
+$presentation-heading-color: $theme-white;
+// $presentation-heading-font: 'Amatic SC', sans-serif;
+// $font-family-sans-serif: 'ABeeZee', sans-serif;
+
+
+/*-- scss:rules --*/
+
+.adlery {
+ font-family: "Waiting for the Sunrise";
+ font-weight: 400;
+ font-style: normal;
+}
+
+.waiting-for-the-sunrise-regular {
+ font-family: "Waiting for the Sunrise", cursive;
+ font-weight: 400;
+ font-style: normal;
+}
+
+.titlebox {
+ padding: 0.2em 0.4em;
+ background-color: rgba(255, 255, 255, .15);
+ backdrop-filter: blur(10px);
+ box-shadow: 0 0 3rem 0 rgba(0, 0, 0, .2);
+ border-radius: 5px;
+}
+
+
+@mixin center-text {
+ position: absolute;
+ transform: translate(-50%, -50%);
+}
+
+@mixin background-full {
+ background-size: cover;
+ background-position: center;
+ background-repeat: no-repeat;
+}
+
+
+.theme-content {
+ &:is(.slide-background) {
+ background-image: url('../../../../../background/content.png');
+ @include background-full;
+ }
+
+}
+
+.theme-title {
+ &:is(.slide-background) {
+ background-image: url('../../../../../background/title.png');
+ @include background-full;
+ }
+
+ h2 {
+ text-align: center;
+ font-size: 3em;
+ }
+ h3 {
+ text-align: center;
+ font-size: 2em;
+ }
+}
+
+.theme-section {
+ &:is(.slide-background) {
+ background-image: url('../../../../../background/section.png');
+ @include background-full;
+ }
+
+}
+
+.theme-end {
+ &:is(.slide-background) {
+ background-image: url('../../../../../background/end.png');
+ @include background-full;
+ }
+
+ h1 {
+ margin-top: 200px;
+ text-align: center;
+ font-size: 5em;
+ }
+ h2 {
+ text-align: center;
+ font-size: 2em;
+ }
+}
+
+// Added SUSTech color classes
+
+.sustech-bg-orange {
+ background-color: #ed6c00 !important;
+}
+.sustech-bg-green {
+ background-color: #003f43 !important;
+}
+.sustech-bg-cyan {
+ background-color: #2bb7b3 !important;
+}
+
+.sustech-orange,
+.sustech-orange code,
+.sustech-orange a {
+ color: #ed6c00 !important;
+}
+.sustech-green,
+.sustech-green code,
+.sustech-green a {
+ color: #003f43 !important;
+}
+.sustech-cyan,
+.sustech-cyan code,
+.sustech-cyan a {
+ color: #2bb7b3 !important;
+}
+
+.red,
+.red code,
+.red a {
+ color: $theme-red !important;
+}
+
+.sustech-green,
+.sustech-green code,
+.sustech-green a {
+ color: #5f5 !important;
+}
+
+.sustech-border-orange {
+ border-color: #ed6c00 !important;
+}
+.sustech-border-green {
+ border-color: #003f43 !important;
+}
+.sustech-border-cyan {
+ border-color: #2bb7b3 !important;
+}
+
+
+.flow {
+ background-image: url(https://images.unsplash.com/photo-1532153975070-2e9ab71f1b14?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=1740&q=80);
+ background-attachment: fixed;
+ -webkit-text-fill-color: transparent;
+ -webkit-background-clip: text;
+ animation: animate 40s linear infinite;
+ font-weight: 900;
+}
+
+@keyframes animate {
+ 0% {
+ background-position: left 0px top 10px;
+ }
+ 40% {
+ background-position: left 800px top 10px;
+ }
+}
+
+
+
+// 混入定义阴影效果
+
+
+.title {
+ text-align: center;
+ width: 65%;
+ height: 150px;
+ margin: auto;
+ position: absolute;
+ top: 0;
+ left: 0;
+ right: 0;
+ bottom: 0;
+ user-select: none;
+}
+
+.title b {
+ font: 400 10vh "Vibur";
+ color: #fee;
+ text-shadow: 0 -40px 100px, 0 0 2px, 0 0 1em $theme-purple, 0 0 0.5em $theme-purple, 0 0 0.1em $theme-purple, 0 10px 3px #000;
+}
+.title b span{
+ animation: blink linear infinite 2s;
+}
+.title b span:nth-of-type(2){
+ animation: blink linear infinite 3s;
+}
+@keyframes blink {
+ 78% {
+ color: inherit;
+ text-shadow: inherit;
+ }
+ 79%{
+ color: #333;
+ }
+ 80% {
+
+ text-shadow: none;
+ }
+ 81% {
+ color: inherit;
+ text-shadow: inherit;
+ }
+ 82% {
+ color: #333;
+ text-shadow: none;
+ }
+ 83% {
+ color: inherit;
+ text-shadow: inherit;
+ }
+ 92% {
+ color: #333;
+ text-shadow: none;
+ }
+ 92.5% {
+ color: inherit;
+ text-shadow: inherit;
+ }
+}
+
+
+.slide-deck {
+ width: 100%;
+ height: 400px;
+ border-radius: 5px;
+ margin-bottom: 10px !important;
+ box-shadow: 0 15px 30px rgba($theme-dark-purple, 0.5);
+}
+
+// svg.main-svg > :not(g):not(path):not(circle):not(rect):not(text) {
+// style: "background: transparent;"
+// }