jbilcke-hf HF staff commited on
Commit
9902278
·
verified ·
1 Parent(s): 91a9d80

Update example.py

Browse files
Files changed (1) hide show
  1. example.py +84 -20
example.py CHANGED
@@ -3,12 +3,12 @@ import base64
3
 
4
  # Important: the NVIDIA L40S will only support small resolutions, short length and no post-processing.
5
  # If you want those features, you might need to use the NVIDIA A100.
 
6
  # Use your own Inference Endpoint URL
7
  API_URL = "https://<use your own Inference Endpoint here>.endpoints.huggingface.cloud"
8
 
9
  # Use you own API token
10
  API_TOKEN = "hf_<replace by your own Hugging Face token>"
11
-
12
  def query(payload):
13
  response = requests.post(API_URL, headers={
14
  "Accept": "application/json",
@@ -17,7 +17,16 @@ def query(payload):
17
  }, json=payload)
18
  return response.json()
19
 
20
- def save_video(json_response):
 
 
 
 
 
 
 
 
 
21
  video_data_uri = ""
22
  try:
23
  # Extract the video data URI from the response
@@ -35,32 +44,78 @@ def save_video(json_response):
35
  video_data = base64.b64decode(base64_data)
36
 
37
  # Write the binary data to an MP4 file
38
- with open("video.mp4", "wb") as f:
39
  f.write(video_data)
40
 
41
- # Make the API call
42
- output = query({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  "inputs": {
44
- "prompt": "Portrait photo, selfie of a beautiful young caucasian woman called Charlotte, wearing a pastel-blue hoodie. She is livestreaming from NYC streets. She looks straight into the camera, looking serious, and she talks. The camera is fixed, static, a medium-shot centered on her face. 4K webcam footage. Intricate details, super resolution, sharp image, award winning."
 
 
45
  },
 
46
  "parameters": {
 
47
  # ------------------- settings for LTX-Video -----------------------
48
- # for a vertical video look
49
- "width": 480,
50
- "height": 768,
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  # LTX-Video requires a frame number divisible by 8, plus one frame
53
  # note: glitches might appear if you use more than 168 frames
54
- "num_frames": (8 * 14) + 1,
55
 
56
  # using 30 steps seems to be enough for most cases, otherwise use 50 for best quality
57
  # I think using a large number of steps (> 30) might create some overexposure and saturation
58
- "num_inference_steps": 40,
59
 
60
  # values between 3.0 and 4.0 are nice
61
- "guidance_scale": 3.5,
62
 
63
- # seed: -1,
 
 
64
 
65
  # ------------------- settings for Varnish -----------------------
66
  # This will double the number of frames.
@@ -83,15 +138,24 @@ output = query({
83
  # and if you do, adding more than 12% will start to negatively impact file size (video codecs aren't great are compressing film grain)
84
  # 0% = no grain
85
  # 10% = a bit of grain
86
- "grain_amount": 10, # value between 0-100
 
87
 
88
- # the following parameters are a work in progress
89
- "enable_audio": False,
90
- #"audio_prompt": "voices, voice, talking, speaking, speech",
91
- #"audio_negative_prompt": "",
 
 
 
 
 
92
 
93
  }
94
- })
 
 
 
95
 
96
  # Save the video
97
- save_video(output)
 
3
 
4
  # Important: the NVIDIA L40S will only support small resolutions, short length and no post-processing.
5
  # If you want those features, you might need to use the NVIDIA A100.
6
+
7
  # Use your own Inference Endpoint URL
8
  API_URL = "https://<use your own Inference Endpoint here>.endpoints.huggingface.cloud"
9
 
10
  # Use you own API token
11
  API_TOKEN = "hf_<replace by your own Hugging Face token>"
 
12
  def query(payload):
13
  response = requests.post(API_URL, headers={
14
  "Accept": "application/json",
 
17
  }, json=payload)
18
  return response.json()
19
 
20
+ def save_video(json_response, filename):
21
+
22
+ try:
23
+ error = json_response["error"]
24
+ if error:
25
+ print(error)
26
+ return
27
+ except Exception as e:
28
+ pass
29
+
30
  video_data_uri = ""
31
  try:
32
  # Extract the video data URI from the response
 
44
  video_data = base64.b64decode(base64_data)
45
 
46
  # Write the binary data to an MP4 file
47
+ with open(filename, "wb") as f:
48
  f.write(video_data)
49
 
50
+ def encode_image(image_path):
51
+ """
52
+ Load and encode an image file to base64
53
+
54
+ Args:
55
+ image_path (str): Path to the image file
56
+
57
+ Returns:
58
+ str: Base64 encoded image data URI
59
+ """
60
+
61
+ with Image.open(image_path) as img:
62
+ # Convert to RGB if necessary
63
+ if img.mode != "RGB":
64
+ img = img.convert("RGB")
65
+
66
+ # Save image to bytes
67
+ img_byte_arr = BytesIO()
68
+ img.save(img_byte_arr, format="JPEG")
69
+
70
+ # Encode to base64
71
+ base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
72
+ return f"data:image/jpeg;base64,{base64_encoded}"
73
+
74
+ # Example usage with image-to-video generation
75
+ image_filename = "input.jpg"
76
+ video_filename = "output.mp4"
77
+
78
+ config = {
79
  "inputs": {
80
+ #"prompt": "magnificent underwater footage, clownfishes swimming around coral inside the carribean sea, real gopro footage",
81
+ # OR
82
+ "image": encode_image(image_filename)
83
  },
84
+
85
  "parameters": {
86
+
87
  # ------------------- settings for LTX-Video -----------------------
88
+
89
+ #"negative_prompt": "saturated, highlight, overexposed, highlighted, overlit, shaking, too bright, worst quality, inconsistent motion, blurry, jittery, distorted, cropped, watermarked, watermark, logo, subtitle, subtitles, lowres",
90
+
91
+ # note about resolution:
92
+ # we cannot use 720 since it cannot be divided by 32
93
+ #
94
+ # for a cinematic look:
95
+ "width": 768,
96
+ "height": 480,
97
+
98
+ # this is a hack to fool LTX-Video into believing our input image is an actual video frame with poor encoding quality
99
+ #"input_image_quality": 70,
100
+
101
+ # for a vertical video look:
102
+ #"width": 480,
103
+ #"height": 768,
104
 
105
  # LTX-Video requires a frame number divisible by 8, plus one frame
106
  # note: glitches might appear if you use more than 168 frames
107
+ "num_frames": (8 * 16) + 1,
108
 
109
  # using 30 steps seems to be enough for most cases, otherwise use 50 for best quality
110
  # I think using a large number of steps (> 30) might create some overexposure and saturation
111
+ "num_inference_steps": 50,
112
 
113
  # values between 3.0 and 4.0 are nice
114
+ "guidance_scale": 4.0,
115
 
116
+ #"seed": 1209877,
117
+
118
+ # ----------------------------------------------------------------
119
 
120
  # ------------------- settings for Varnish -----------------------
121
  # This will double the number of frames.
 
138
  # and if you do, adding more than 12% will start to negatively impact file size (video codecs aren't great are compressing film grain)
139
  # 0% = no grain
140
  # 10% = a bit of grain
141
+ "grain_amount": 12, # value between 0-100
142
+
143
 
144
+ # The range of the CRF scale is 0–51, where:
145
+ # 0 is lossless (for 8 bit only, for 10 bit use -qp 0)
146
+ # 23 is the default
147
+ # 51 is worst quality possible
148
+ # A lower value generally leads to higher quality, and a subjectively sane range is 17–28.
149
+ # Consider 17 or 18 to be visually lossless or nearly so;
150
+ # it should look the same or nearly the same as the input but it isn't technically lossless.
151
+ # The range is exponential, so increasing the CRF value +6 results in roughly half the bitrate / file size, while -6 leads to roughly twice the bitrate.
152
+ #"quality": 18,
153
 
154
  }
155
+ }
156
+
157
+ # Make the API call
158
+ output = query(config)
159
 
160
  # Save the video
161
+ save_video(output, video_filename)