Spaces:

microsoft
/

MInference

Running on Zero

iofu728 commited on Jun 17

Commit

ad9d4f6

•

1 Parent(s): 00e9e5c

Feature(MInference): add minference

Files changed (1) hide show

app.py CHANGED Viewed

@@ -81,20 +81,21 @@ def chat_llama3_8b(message: str,
         str: The generated response.
     """
     global model
-    subprocess.run(
-        "pip install pycuda==2023.1",
-        shell=True,
-    )
-    if "has_patch" not in model.__dict__:
-        from minference import MInference
-        minference_patch = MInference("minference", model_name)
-        model = minference_patch(model)
     conversation = []
     for user, assistant in history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

         str: The generated response.
     """
     global model
     conversation = []
     for user, assistant in history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
+    print(model.device)
+    # subprocess.run(
+    #     "pip install pycuda==2023.1",
+    #     shell=True,
+    # )
+    # if "has_patch" not in model.__dict__:
+    #     from minference import MInference
+    #     minference_patch = MInference("minference", model_name)
+    #     model = minference_patch(model)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)