Skip to content
代码片段 群组 项目
未验证 提交 67c7184b 编辑于 作者: Andrew Kvochko's avatar Andrew Kvochko 提交者: GitHub
浏览文件

ltxv: relax frame_idx divisibility for single frames. (#7146)


This commit relaxes divisibility constraint for single-frame
conditionings. For single frames, the index can be arbitrary, while
multi-frame conditionings (>= 9 frames) must still be aligned to 8
frames.

Co-authored-by: default avatarAndrew Kvochko <a.kvochko@lightricks.com>
上级 6f8e7665
No related branches found
No related tags found
无相关合并请求
...@@ -99,12 +99,13 @@ class LTXVAddGuide: ...@@ -99,12 +99,13 @@ class LTXVAddGuide:
"negative": ("CONDITIONING", ), "negative": ("CONDITIONING", ),
"vae": ("VAE",), "vae": ("VAE",),
"latent": ("LATENT",), "latent": ("LATENT",),
"image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames." \ "image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames."
"If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}), "If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}),
"frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999, "frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999,
"tooltip": "Frame index to start the conditioning at. Must be divisible by 8. " \ "tooltip": "Frame index to start the conditioning at. For single-frame images or "
"If a frame is not divisible by 8, it will be rounded down to the nearest multiple of 8. " \ "videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ "
"Negative values are counted from the end of the video."}), "frames, frame_idx must be divisible by 8, otherwise it will be rounded down to "
"the nearest multiple of 8. Negative values are counted from the end of the video."}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
} }
} }
...@@ -127,12 +128,13 @@ class LTXVAddGuide: ...@@ -127,12 +128,13 @@ class LTXVAddGuide:
t = vae.encode(encode_pixels) t = vae.encode(encode_pixels)
return encode_pixels, t return encode_pixels, t
def get_latent_index(self, cond, latent_length, frame_idx, scale_factors): def get_latent_index(self, cond, latent_length, guide_length, frame_idx, scale_factors):
time_scale_factor, _, _ = scale_factors time_scale_factor, _, _ = scale_factors
_, num_keyframes = get_keyframe_idxs(cond) _, num_keyframes = get_keyframe_idxs(cond)
latent_count = latent_length - num_keyframes latent_count = latent_length - num_keyframes
frame_idx = frame_idx if frame_idx >= 0 else max((latent_count - 1) * 8 + 1 + frame_idx, 0) frame_idx = frame_idx if frame_idx >= 0 else max((latent_count - 1) * time_scale_factor + 1 + frame_idx, 0)
frame_idx = frame_idx // time_scale_factor * time_scale_factor # frame index must be divisible by 8 if guide_length > 1:
frame_idx = frame_idx // time_scale_factor * time_scale_factor # frame index must be divisible by 8
latent_idx = (frame_idx + time_scale_factor - 1) // time_scale_factor latent_idx = (frame_idx + time_scale_factor - 1) // time_scale_factor
...@@ -191,7 +193,7 @@ class LTXVAddGuide: ...@@ -191,7 +193,7 @@ class LTXVAddGuide:
_, _, latent_length, latent_height, latent_width = latent_image.shape _, _, latent_length, latent_height, latent_width = latent_image.shape
image, t = self.encode(vae, latent_width, latent_height, image, scale_factors) image, t = self.encode(vae, latent_width, latent_height, image, scale_factors)
frame_idx, latent_idx = self.get_latent_index(positive, latent_length, frame_idx, scale_factors) frame_idx, latent_idx = self.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence." assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
num_prefix_frames = min(self._num_prefix_frames, t.shape[2]) num_prefix_frames = min(self._num_prefix_frames, t.shape[2])
......
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册