File size: 3,503 Bytes
bd91e63
 
 
 
24d947d
 
9fabe04
bd91e63
df09131
62f7f42
 
 
bd91e63
bd46d25
 
 
 
 
bd91e63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24d947d
bd91e63
24d947d
bd91e63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import json
import uuid
from langfuse import Langfuse
from langfuse.decorators import observe, langfuse_context
from langfuse.openai import openai, OpenAI
import os

# set Langfuse api keys and host address
LANGFUSE_PUBLIC_KEY = os.getenv('LANGFUSE_PUBLIC_KEY')
LANGFUSE_SECRET_KEY = os.getenv('LANGFUSE_SECRET_KEY')
LANGFUSE_HOST = os.getenv('LANGFUSE_HOST')

# initialize the client but point it to TGI
client = OpenAI(
    base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" + "/v1/",  # replace with your endpoint url
    api_key= os.getenv('HUGGINGFACE_ACCESS_TOKEN'),  # replace with your token
)

langfuse = Langfuse()

session_id = None
def set_new_session_id():
    global session_id
    session_id = str(uuid.uuid4())

# Initialize
set_new_session_id()

# Global reference for the current trace_id which is used to later add user feedback
current_trace_id = None

# Add decorator here to capture overall timings, input/output, and manipulate trace metadata via `langfuse_context`
@observe()
async def create_response(
    prompt: str,
    history,
):
    # Save trace id in global var to add feedback later
    global current_trace_id
    current_trace_id = langfuse_context.get_current_trace_id()

    # Add session_id to Langfuse Trace to enable session tracking
    global session_id
    langfuse_context.update_current_trace(
        name="gradio_demo_chat",
        session_id=session_id,
        input=prompt,
    )

    # Add prompt to history
    if not history:
        history = [{"role": "system", "content": "You are a friendly chatbot"}]
    history.append({"role": "user", "content": prompt})
    yield history

    # Get completion via OpenAI SDK
    # Auto-instrumented by Langfuse via the import, see alternative in note above
    response = {"role": "assistant", "content": ""}
    oai_response = client.chat.completions.create(
        messages=history,
        model="tgi",
    )
    response["content"] = oai_response.choices[0].message.content or ""

    # Customize trace ouput for better readability in Langfuse Sessions
    langfuse_context.update_current_trace(
        output=response["content"],
    )

    yield history + [response]

async def respond(prompt: str, history):
    async for message in create_response(prompt, history):
        yield message

def handle_like(data: gr.LikeData):
    global current_trace_id
    if data.liked:
        langfuse.score(value=1, name="user-feedback", trace_id=current_trace_id)
    else:
        langfuse.score(value=0, name="user-feedback", trace_id=current_trace_id)


async def handle_retry(history, retry_data: gr.RetryData):
    new_history = history[: retry_data.index]
    previous_prompt = history[retry_data.index]["content"]
    async for message in respond(previous_prompt, new_history):
        yield message

with gr.Blocks() as demo:
    gr.Markdown("# Chatbot using 🤗 Gradio + 🪢 Langfuse")
    chatbot = gr.Chatbot(
        label="Chat",
        type="messages",
        show_copy_button=True,
        avatar_images=(
            None,
            "https://static.langfuse.com/cookbooks/gradio/hf-logo.png",
        ),
    )
    prompt = gr.Textbox(max_lines=1, label="Chat Message")
    prompt.submit(respond, [prompt, chatbot], [chatbot])
    chatbot.retry(handle_retry, chatbot, [chatbot])
    chatbot.like(handle_like, None, None)
    chatbot.clear(set_new_session_id)


if __name__ == "__main__":
    demo.launch(share=True, debug=True)