Spaces:

vilarin
/

podcast

Running

App Files Files Community

vilarin commited on Jun 5, 2024

Commit

f55597f

verified ·

1 Parent(s): f0f4b7b

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -28

app.py CHANGED Viewed

@@ -18,14 +18,14 @@ system_prompt = '''
     You are an educational podcast generator. You have to create short conversations between Alice and Bob that gives an overview of the News given by the user.
     Please provide the script in the following JSON format directly:
     {
-      "title": "strings",
       "content": {
-        "Alice_0": "strings",
-        "BOB_0": "strings",
         ...
       }
     }
-    Please note that the strings you generate now must be based on the tone of people's daily life, and the punctuation marks only include commas and periods.
     No more than five rounds of conversation.
 '''
@@ -48,12 +48,15 @@ footer {
 }
 """
 model = AutoModelForCausalLM.from_pretrained(
-    "Qwen/Qwen1.5-1.8B-Chat",
-    torch_dtype="auto",
     device_map="auto"
-)
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B-Chat")
 def validate_url(url):
@@ -121,25 +124,15 @@ async def gen_show(script):
     return output_filename
-@spaces.GPU(duration=100)
 def generator(messages):
-    answer = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
     )
-    model_inputs = tokenizer([answer], return_tensors="pt").to(0)
-    generated_ids = model.generate(
-        model_inputs.input_ids,
-        max_new_tokens=4096
-    )
-    generated_ids = [
-        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-    ]
-    results = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return results
@@ -167,9 +160,6 @@ async def main(link):
     if not generated_script or not generated_script.strip().startswith('{'):
         raise ValueError("Failed to generate a valid script.")
     script_json = json.loads(generated_script)  # Use the generated script as input
     output_filename = await gen_show(script_json)
     print("Output File:"+output_filename)

     You are an educational podcast generator. You have to create short conversations between Alice and Bob that gives an overview of the News given by the user.
     Please provide the script in the following JSON format directly:
     {
+      "title": "Strings",
       "content": {
+        "Alice_0": "Strings",
+        "BOB_0": "Strings",
         ...
       }
     }
+    Please note that the Strings you generate now must be based on the tone of people's daily life, and the punctuation marks only include commas and periods.
     No more than five rounds of conversation.
 '''
 }
 """
+MODEL_ID = "01-ai/Yi-1.5-6B-Chat"
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float16,
     device_map="auto"
+).eval()
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 def validate_url(url):
     return output_filename
+@spaces.GPU
 def generator(messages):
+    input_ids = tokenizer.apply_chat_template(
+        conversation=messages,
+        tokenize=True,
+        return_tensors='pt'
     )
+    output_ids = model.generate(input_ids.to('cuda'), eos_token_id=tokenizer.eos_token_id)
+    results = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
     return results
     if not generated_script or not generated_script.strip().startswith('{'):
         raise ValueError("Failed to generate a valid script.")
     script_json = json.loads(generated_script)  # Use the generated script as input
     output_filename = await gen_show(script_json)
     print("Output File:"+output_filename)