import gradio as gr import json import uuid from langfuse import Langfuse from langfuse.decorators import observe, langfuse_context from langfuse.openai import openai, OpenAI import os # set Langfuse api keys and host address LANGFUSE_PUBLIC_KEY = os.getenv('LANGFUSE_PUBLIC_KEY') LANGFUSE_SECRET_KEY = os.getenv('LANGFUSE_SECRET_KEY') LANGFUSE_HOST = os.getenv('LANGFUSE_HOST') # initialize the client but point it to TGI client = OpenAI( base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" + "/v1/", # replace with your endpoint url api_key= os.getenv('HUGGINGFACE_ACCESS_TOKEN'), # replace with your token ) langfuse = Langfuse() session_id = None def set_new_session_id(): global session_id session_id = str(uuid.uuid4()) # Initialize set_new_session_id() # Global reference for the current trace_id which is used to later add user feedback current_trace_id = None # Add decorator here to capture overall timings, input/output, and manipulate trace metadata via `langfuse_context` @observe() async def create_response( prompt: str, history, ): # Save trace id in global var to add feedback later global current_trace_id current_trace_id = langfuse_context.get_current_trace_id() # Add session_id to Langfuse Trace to enable session tracking global session_id langfuse_context.update_current_trace( name="gradio_demo_chat", session_id=session_id, input=prompt, ) # Add prompt to history if not history: history = [{"role": "system", "content": "You are a friendly chatbot"}] history.append({"role": "user", "content": prompt}) yield history # Get completion via OpenAI SDK # Auto-instrumented by Langfuse via the import, see alternative in note above response = {"role": "assistant", "content": ""} oai_response = client.chat.completions.create( messages=history, model="tgi", ) response["content"] = oai_response.choices[0].message.content or "" # Customize trace ouput for better readability in Langfuse Sessions langfuse_context.update_current_trace( output=response["content"], ) yield history + [response] async def respond(prompt: str, history): async for message in create_response(prompt, history): yield message def handle_like(data: gr.LikeData): global current_trace_id if data.liked: langfuse.score(value=1, name="user-feedback", trace_id=current_trace_id) else: langfuse.score(value=0, name="user-feedback", trace_id=current_trace_id) async def handle_retry(history, retry_data: gr.RetryData): new_history = history[: retry_data.index] previous_prompt = history[retry_data.index]["content"] async for message in respond(previous_prompt, new_history): yield message with gr.Blocks() as demo: gr.Markdown("# Chatbot using 🤗 Gradio + 🪢 Langfuse") chatbot = gr.Chatbot( label="Chat", type="messages", show_copy_button=True, avatar_images=( None, "https://static.langfuse.com/cookbooks/gradio/hf-logo.png", ), ) prompt = gr.Textbox(max_lines=1, label="Chat Message") prompt.submit(respond, [prompt, chatbot], [chatbot]) chatbot.retry(handle_retry, chatbot, [chatbot]) chatbot.like(handle_like, None, None) chatbot.clear(set_new_session_id) if __name__ == "__main__": demo.launch(share=True, debug=True)