SAM2 on Databricks#
To successfully run this notebook you must first create a new Databricks Git Folder configured with the SAM2 Github (facebookresearch/segment-anything-2) and import this notebook into the notebooks folder within the created Git folder segment-anything-2/notebooks
. Paths in this notebook assume the correct location of the notebook within the repository.
You can now attach and run this notebook on a GPU enabled cluster
%sh pip install ../../segment-anything-2
Trigger sam2._C extension build#
%sh cd ../ && python setup.py build_ext --inplace
Download model checkpoints#
This will download all sizes. You can choose which checkpoint and configuration to use in the initialization later.
%sh cd ../checkpoints && ./download_ckpts.sh
%sh ls ../checkpoints
download_ckpts.sh
sam2_hiera_base_plus.pt
sam2_hiera_small.pt
sam2_hiera_tiny.pt
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
if torch.cuda.get_device_properties(0).major >= 8:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
from sam2.build_sam import build_sam2_video_predictor
checkpoint = "../checkpoints/sam2_hiera_base_plus.pt"
model_cfg = "sam2_hiera_b+.yaml"
predictor = build_sam2_video_predictor(model_cfg, checkpoint)
/Workspace/Users/[email protected]/segment-anything-2/sam2/modeling/sam/transformer.py:23: UserWarning: Flash Attention is disabled as it requires a GPU with Ampere (8.0) CUDA capability.
OLD_GPU, USE_FLASH_ATTN, MATH_KERNEL_ON = get_sdpa_settings()
pyplot based helpers for visualizing#
def show_mask(mask, ax, obj_id=None, random_color=False):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
cmap = plt.get_cmap("tab10")
cmap_idx = 0 if obj_id is None else obj_id
color = np.array([*cmap(cmap_idx)[:3], 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=200):
pos_points = coords[labels==1]
neg_points = coords[labels==0]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
Downloading the keynote from YouTube#
Note: youtube may complain and you’ll have to grab the video directly and upload into your Databricks environment
%pip install yt-dlp
%sh yt-dlp -o ./videos/keynote/keynote.mp4 -f "bestvideo[height<=480]" -u "username" -p "password" "https://www.youtube.com/watch?v=-6dt7eJ3cMs"
%sh ffmpeg -ss 00:00:15 -i ./videos/keynote/keynote.mp4 -t 00:00:10 -q:v 2 -start_number 0 /Volumes/sam/default/frames/'%05d.jpg'
# `video_dir` a directory of JPEG frames with filenames like `<frame_index>.jpg`
video_dir = "/Volumes/sam/default/frames"
# scan all the JPEG frame names in this directory
frame_names = [
p for p in os.listdir(video_dir)
if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
]
frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
# take a look the first video frame
frame_idx = 0
plt.figure(figsize=(12, 8))
plt.title(f"frame {frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[frame_idx])))
<matplotlib.image.AxesImage at 0x7efcec14ad10>

inference_state = predictor.init_state(video_path=video_dir)
frame loading (JPEG): 0%| | 0/300 [00:00<?, ?it/s]
frame loading (JPEG): 1%| | 2/300 [00:00<00:21, 13.61it/s]
frame loading (JPEG): 1%|▏ | 4/300 [00:00<00:29, 10.09it/s]
frame loading (JPEG): 2%|▏ | 6/300 [00:00<00:28, 10.43it/s]
frame loading (JPEG): 3%|▎ | 8/300 [00:00<00:26, 10.89it/s]
frame loading (JPEG): 3%|▎ | 10/300 [00:00<00:25, 11.58it/s]
frame loading (JPEG): 4%|▍ | 12/300 [00:01<00:26, 10.91it/s]
frame loading (JPEG): 5%|▍ | 14/300 [00:01<00:25, 11.03it/s]
frame loading (JPEG): 5%|▌ | 16/300 [00:01<00:26, 10.79it/s]
frame loading (JPEG): 6%|▌ | 18/300 [00:01<00:27, 10.28it/s]
frame loading (JPEG): 7%|▋ | 20/300 [00:01<00:25, 11.19it/s]
frame loading (JPEG): 7%|▋ | 22/300 [00:01<00:24, 11.38it/s]
frame loading (JPEG): 8%|▊ | 24/300 [00:02<00:23, 11.97it/s]
frame loading (JPEG): 9%|▊ | 26/300 [00:02<00:22, 11.95it/s]
frame loading (JPEG): 9%|▉ | 28/300 [00:02<00:24, 11.18it/s]
frame loading (JPEG): 10%|█ | 30/300 [00:02<00:23, 11.43it/s]
frame loading (JPEG): 11%|█ | 32/300 [00:02<00:25, 10.63it/s]
frame loading (JPEG): 11%|█▏ | 34/300 [00:03<00:22, 11.60it/s]
frame loading (JPEG): 12%|█▏ | 36/300 [00:03<00:21, 12.41it/s]
frame loading (JPEG): 13%|█▎ | 38/300 [00:03<00:21, 12.44it/s]
frame loading (JPEG): 13%|█▎ | 40/300 [00:03<00:24, 10.41it/s]
frame loading (JPEG): 14%|█▍ | 42/300 [00:03<00:23, 10.97it/s]
frame loading (JPEG): 15%|█▍ | 44/300 [00:03<00:21, 11.65it/s]
frame loading (JPEG): 15%|█▌ | 46/300 [00:04<00:21, 11.60it/s]
frame loading (JPEG): 16%|█▌ | 48/300 [00:04<00:21, 11.90it/s]
frame loading (JPEG): 17%|█▋ | 50/300 [00:04<00:20, 12.34it/s]
frame loading (JPEG): 17%|█▋ | 52/300 [00:04<00:21, 11.38it/s]
frame loading (JPEG): 18%|█▊ | 54/300 [00:04<00:21, 11.59it/s]
frame loading (JPEG): 19%|█▊ | 56/300 [00:04<00:20, 12.10it/s]
frame loading (JPEG): 19%|█▉ | 58/300 [00:05<00:21, 11.14it/s]
frame loading (JPEG): 20%|██ | 60/300 [00:05<00:23, 10.41it/s]
frame loading (JPEG): 21%|██ | 62/300 [00:05<00:22, 10.50it/s]
frame loading (JPEG): 21%|██▏ | 64/300 [00:05<00:23, 9.90it/s]
frame loading (JPEG): 22%|██▏ | 66/300 [00:06<00:26, 8.82it/s]
frame loading (JPEG): 23%|██▎ | 68/300 [00:06<00:24, 9.55it/s]
frame loading (JPEG): 23%|██▎ | 70/300 [00:06<00:22, 10.40it/s]
frame loading (JPEG): 24%|██▍ | 72/300 [00:06<00:22, 9.93it/s]
frame loading (JPEG): 25%|██▍ | 74/300 [00:06<00:22, 10.15it/s]
frame loading (JPEG): 25%|██▌ | 76/300 [00:07<00:23, 9.55it/s]
frame loading (JPEG): 26%|██▌ | 78/300 [00:07<00:21, 10.31it/s]
frame loading (JPEG): 27%|██▋ | 80/300 [00:07<00:19, 11.15it/s]
frame loading (JPEG): 27%|██▋ | 82/300 [00:07<00:19, 10.99it/s]
frame loading (JPEG): 28%|██▊ | 84/300 [00:07<00:19, 11.29it/s]
frame loading (JPEG): 29%|██▊ | 86/300 [00:07<00:17, 12.14it/s]
frame loading (JPEG): 29%|██▉ | 88/300 [00:07<00:17, 12.10it/s]
frame loading (JPEG): 30%|███ | 90/300 [00:08<00:19, 11.03it/s]
frame loading (JPEG): 31%|███ | 92/300 [00:08<00:18, 11.04it/s]
frame loading (JPEG): 31%|███▏ | 94/300 [00:08<00:18, 11.22it/s]
frame loading (JPEG): 32%|███▏ | 96/300 [00:08<00:18, 11.24it/s]
frame loading (JPEG): 33%|███▎ | 98/300 [00:08<00:17, 11.44it/s]
frame loading (JPEG): 33%|███▎ | 100/300 [00:09<00:17, 11.46it/s]
frame loading (JPEG): 34%|███▍ | 102/300 [00:09<00:19, 10.39it/s]
frame loading (JPEG): 35%|███▍ | 104/300 [00:09<00:20, 9.63it/s]
frame loading (JPEG): 35%|███▌ | 106/300 [00:09<00:18, 10.45it/s]
frame loading (JPEG): 36%|███▌ | 108/300 [00:09<00:19, 9.90it/s]
frame loading (JPEG): 37%|███▋ | 110/300 [00:10<00:17, 10.73it/s]
frame loading (JPEG): 37%|███▋ | 112/300 [00:10<00:20, 9.10it/s]
frame loading (JPEG): 38%|███▊ | 114/300 [00:10<00:19, 9.59it/s]
frame loading (JPEG): 39%|███▊ | 116/300 [00:10<00:18, 10.07it/s]
frame loading (JPEG): 39%|███▉ | 118/300 [00:10<00:17, 10.45it/s]
frame loading (JPEG): 40%|████ | 120/300 [00:11<00:16, 11.20it/s]
frame loading (JPEG): 41%|████ | 122/300 [00:11<00:15, 11.43it/s]
frame loading (JPEG): 41%|████▏ | 124/300 [00:11<00:19, 8.97it/s]
frame loading (JPEG): 42%|████▏ | 125/300 [00:11<00:19, 8.82it/s]
frame loading (JPEG): 42%|████▏ | 127/300 [00:11<00:17, 9.61it/s]
frame loading (JPEG): 43%|████▎ | 129/300 [00:11<00:16, 10.43it/s]
frame loading (JPEG): 44%|████▎ | 131/300 [00:12<00:15, 11.06it/s]
frame loading (JPEG): 44%|████▍ | 133/300 [00:12<00:17, 9.66it/s]
frame loading (JPEG): 45%|████▌ | 135/300 [00:12<00:15, 10.48it/s]
frame loading (JPEG): 46%|████▌ | 137/300 [00:12<00:15, 10.81it/s]
frame loading (JPEG): 46%|████▋ | 139/300 [00:12<00:14, 11.11it/s]
frame loading (JPEG): 47%|████▋ | 141/300 [00:13<00:14, 10.82it/s]
frame loading (JPEG): 48%|████▊ | 143/300 [00:13<00:14, 11.06it/s]
frame loading (JPEG): 48%|████▊ | 145/300 [00:13<00:16, 9.12it/s]
frame loading (JPEG): 49%|████▉ | 147/300 [00:13<00:15, 9.93it/s]
frame loading (JPEG): 50%|████▉ | 149/300 [00:13<00:14, 10.16it/s]
frame loading (JPEG): 50%|█████ | 151/300 [00:14<00:14, 10.28it/s]
frame loading (JPEG): 51%|█████ | 153/300 [00:14<00:13, 10.98it/s]
frame loading (JPEG): 52%|█████▏ | 155/300 [00:14<00:13, 10.83it/s]
frame loading (JPEG): 52%|█████▏ | 157/300 [00:14<00:12, 11.15it/s]
frame loading (JPEG): 53%|█████▎ | 159/300 [00:14<00:13, 10.46it/s]
frame loading (JPEG): 54%|█████▎ | 161/300 [00:15<00:12, 10.95it/s]
frame loading (JPEG): 54%|█████▍ | 163/300 [00:15<00:12, 10.57it/s]
frame loading (JPEG): 55%|█████▌ | 165/300 [00:15<00:15, 8.92it/s]
frame loading (JPEG): 56%|█████▌ | 167/300 [00:15<00:14, 9.41it/s]
frame loading (JPEG): 56%|█████▋ | 169/300 [00:15<00:13, 10.04it/s]
frame loading (JPEG): 57%|█████▋ | 171/300 [00:16<00:12, 10.29it/s]
frame loading (JPEG): 58%|█████▊ | 173/300 [00:16<00:12, 9.84it/s]
frame loading (JPEG): 58%|█████▊ | 175/300 [00:16<00:12, 9.82it/s]
frame loading (JPEG): 59%|█████▉ | 177/300 [00:16<00:12, 10.20it/s]
frame loading (JPEG): 60%|█████▉ | 179/300 [00:16<00:11, 10.41it/s]
frame loading (JPEG): 60%|██████ | 181/300 [00:17<00:12, 9.91it/s]
frame loading (JPEG): 61%|██████ | 183/300 [00:17<00:10, 10.86it/s]
frame loading (JPEG): 62%|██████▏ | 185/300 [00:17<00:11, 10.40it/s]
frame loading (JPEG): 62%|██████▏ | 187/300 [00:17<00:10, 10.58it/s]
frame loading (JPEG): 63%|██████▎ | 189/300 [00:17<00:10, 10.75it/s]
frame loading (JPEG): 64%|██████▎ | 191/300 [00:17<00:09, 11.11it/s]
frame loading (JPEG): 64%|██████▍ | 193/300 [00:18<00:09, 11.58it/s]
frame loading (JPEG): 65%|██████▌ | 195/300 [00:18<00:12, 8.67it/s]
frame loading (JPEG): 66%|██████▌ | 197/300 [00:18<00:11, 9.26it/s]
frame loading (JPEG): 66%|██████▋ | 199/300 [00:18<00:10, 9.54it/s]
frame loading (JPEG): 67%|██████▋ | 201/300 [00:19<00:10, 9.80it/s]
frame loading (JPEG): 68%|██████▊ | 203/300 [00:19<00:10, 8.84it/s]
frame loading (JPEG): 68%|██████▊ | 205/300 [00:19<00:09, 9.73it/s]
frame loading (JPEG): 69%|██████▉ | 207/300 [00:19<00:09, 10.21it/s]
frame loading (JPEG): 70%|██████▉ | 209/300 [00:19<00:09, 9.84it/s]
frame loading (JPEG): 70%|███████ | 211/300 [00:20<00:09, 9.45it/s]
frame loading (JPEG): 71%|███████ | 212/300 [00:20<00:11, 7.92it/s]
frame loading (JPEG): 71%|███████▏ | 214/300 [00:20<00:09, 9.07it/s]
frame loading (JPEG): 72%|███████▏ | 216/300 [00:20<00:09, 9.26it/s]
frame loading (JPEG): 73%|███████▎ | 218/300 [00:20<00:08, 10.05it/s]
frame loading (JPEG): 73%|███████▎ | 220/300 [00:21<00:08, 9.65it/s]
frame loading (JPEG): 74%|███████▍ | 222/300 [00:21<00:07, 10.56it/s]
frame loading (JPEG): 75%|███████▍ | 224/300 [00:21<00:07, 10.43it/s]
frame loading (JPEG): 75%|███████▌ | 226/300 [00:21<00:06, 11.14it/s]
frame loading (JPEG): 76%|███████▌ | 228/300 [00:21<00:07, 10.01it/s]
frame loading (JPEG): 77%|███████▋ | 230/300 [00:22<00:06, 10.26it/s]
frame loading (JPEG): 77%|███████▋ | 232/300 [00:22<00:06, 10.76it/s]
frame loading (JPEG): 78%|███████▊ | 234/300 [00:22<00:05, 11.58it/s]
frame loading (JPEG): 79%|███████▊ | 236/300 [00:22<00:05, 11.18it/s]
frame loading (JPEG): 79%|███████▉ | 238/300 [00:22<00:05, 11.18it/s]
frame loading (JPEG): 80%|████████ | 240/300 [00:22<00:05, 11.51it/s]
frame loading (JPEG): 81%|████████ | 242/300 [00:23<00:05, 11.31it/s]
frame loading (JPEG): 81%|████████▏ | 244/300 [00:23<00:05, 10.35it/s]
frame loading (JPEG): 82%|████████▏ | 246/300 [00:23<00:05, 10.32it/s]
frame loading (JPEG): 83%|████████▎ | 248/300 [00:23<00:04, 10.90it/s]
frame loading (JPEG): 83%|████████▎ | 250/300 [00:23<00:04, 10.87it/s]
frame loading (JPEG): 84%|████████▍ | 252/300 [00:23<00:04, 10.99it/s]
frame loading (JPEG): 85%|████████▍ | 254/300 [00:24<00:03, 11.52it/s]
frame loading (JPEG): 85%|████████▌ | 256/300 [00:24<00:03, 11.11it/s]
frame loading (JPEG): 86%|████████▌ | 258/300 [00:24<00:03, 11.56it/s]
frame loading (JPEG): 87%|████████▋ | 260/300 [00:24<00:03, 12.09it/s]
frame loading (JPEG): 87%|████████▋ | 262/300 [00:24<00:03, 11.27it/s]
frame loading (JPEG): 88%|████████▊ | 264/300 [00:25<00:03, 10.56it/s]
frame loading (JPEG): 89%|████████▊ | 266/300 [00:25<00:03, 10.56it/s]
frame loading (JPEG): 89%|████████▉ | 268/300 [00:25<00:03, 9.80it/s]
frame loading (JPEG): 90%|█████████ | 270/300 [00:25<00:02, 10.94it/s]
frame loading (JPEG): 91%|█████████ | 272/300 [00:25<00:02, 10.57it/s]
frame loading (JPEG): 91%|█████████▏| 274/300 [00:25<00:02, 11.05it/s]
frame loading (JPEG): 92%|█████████▏| 276/300 [00:26<00:02, 11.21it/s]
frame loading (JPEG): 93%|█████████▎| 278/300 [00:26<00:01, 11.66it/s]
frame loading (JPEG): 93%|█████████▎| 280/300 [00:26<00:01, 11.24it/s]
frame loading (JPEG): 94%|█████████▍| 282/300 [00:26<00:01, 11.43it/s]
frame loading (JPEG): 95%|█████████▍| 284/300 [00:26<00:01, 11.89it/s]
frame loading (JPEG): 95%|█████████▌| 286/300 [00:27<00:01, 11.02it/s]
frame loading (JPEG): 96%|█████████▌| 288/300 [00:27<00:01, 10.38it/s]
frame loading (JPEG): 97%|█████████▋| 290/300 [00:27<00:00, 10.84it/s]
frame loading (JPEG): 97%|█████████▋| 292/300 [00:27<00:00, 10.87it/s]
frame loading (JPEG): 98%|█████████▊| 294/300 [00:27<00:00, 10.33it/s]
frame loading (JPEG): 99%|█████████▊| 296/300 [00:27<00:00, 11.02it/s]
frame loading (JPEG): 99%|█████████▉| 298/300 [00:28<00:00, 10.32it/s]
frame loading (JPEG): 100%|██████████| 300/300 [00:28<00:00, 9.63it/s]
frame loading (JPEG): 100%|██████████| 300/300 [00:28<00:00, 10.55it/s]
ann_frame_idx = 0 # the frame index we interact with
ann_obj_id = 1 # give a unique id to each object we interact with (it can be any integers)
# Let's add a positive click at (x, y) = (210, 350) to get started
points = np.array([[350, 100]], dtype=np.float32)
# for labels, `1` means positive click and `0` means negative click
labels = np.array([1], np.int32)
_, out_obj_ids, out_mask_logits = predictor.add_new_points(
inference_state=inference_state,
frame_idx=ann_frame_idx,
obj_id=ann_obj_id,
points=points,
labels=labels,
)
# show the results on the current (interacted) frame
plt.figure(figsize=(12, 8))
plt.title(f"frame {ann_frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[ann_frame_idx])))
show_points(points, labels, plt.gca())
show_mask((out_mask_logits[0] > 0.0).cpu().numpy(), plt.gca(), obj_id=out_obj_ids[0])

# run propagation throughout the video and collect the results in a dict
video_segments = {} # video_segments contains the per-frame segmentation results
for out_frame_idx, out_obj_ids, out_mask_logits in predictor.propagate_in_video(inference_state):
video_segments[out_frame_idx] = {
out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
for i, out_obj_id in enumerate(out_obj_ids)
}
propagate in video: 0%| | 0/300 [00:00<?, ?it/s]
propagate in video: 1%| | 2/300 [00:00<01:17, 3.82it/s]
propagate in video: 1%| | 3/300 [00:00<01:36, 3.07it/s]
propagate in video: 1%|▏ | 4/300 [00:01<01:50, 2.69it/s]
propagate in video: 2%|▏ | 5/300 [00:01<02:01, 2.44it/s]
propagate in video: 2%|▏ | 6/300 [00:02<02:10, 2.26it/s]
propagate in video: 2%|▏ | 7/300 [00:02<02:19, 2.11it/s]
propagate in video: 3%|▎ | 8/300 [00:03<02:27, 1.99it/s]
propagate in video: 3%|▎ | 9/300 [00:04<02:32, 1.91it/s]
propagate in video: 3%|▎ | 10/300 [00:04<02:35, 1.86it/s]
propagate in video: 4%|▎ | 11/300 [00:05<02:37, 1.83it/s]
propagate in video: 4%|▍ | 12/300 [00:05<02:38, 1.81it/s]
propagate in video: 4%|▍ | 13/300 [00:06<02:39, 1.80it/s]
propagate in video: 5%|▍ | 14/300 [00:06<02:39, 1.79it/s]
propagate in video: 5%|▌ | 15/300 [00:07<02:39, 1.78it/s]
propagate in video: 5%|▌ | 16/300 [00:08<02:40, 1.77it/s]
propagate in video: 6%|▌ | 17/300 [00:08<02:39, 1.77it/s]
propagate in video: 6%|▌ | 18/300 [00:09<02:39, 1.77it/s]
propagate in video: 6%|▋ | 19/300 [00:09<02:38, 1.77it/s]
propagate in video: 7%|▋ | 20/300 [00:10<02:38, 1.77it/s]
propagate in video: 7%|▋ | 21/300 [00:10<02:37, 1.77it/s]
propagate in video: 7%|▋ | 22/300 [00:11<02:37, 1.77it/s]
propagate in video: 8%|▊ | 23/300 [00:11<02:36, 1.77it/s]
propagate in video: 8%|▊ | 24/300 [00:12<02:36, 1.76it/s]
propagate in video: 8%|▊ | 25/300 [00:13<02:36, 1.75it/s]
propagate in video: 9%|▊ | 26/300 [00:13<02:36, 1.75it/s]
propagate in video: 9%|▉ | 27/300 [00:14<02:35, 1.75it/s]
propagate in video: 9%|▉ | 28/300 [00:14<02:35, 1.75it/s]
propagate in video: 10%|▉ | 29/300 [00:15<02:34, 1.75it/s]
propagate in video: 10%|█ | 30/300 [00:15<02:34, 1.75it/s]
propagate in video: 10%|█ | 31/300 [00:16<02:34, 1.75it/s]
propagate in video: 11%|█ | 32/300 [00:17<02:33, 1.75it/s]
propagate in video: 11%|█ | 33/300 [00:17<02:32, 1.75it/s]
propagate in video: 11%|█▏ | 34/300 [00:18<02:32, 1.74it/s]
propagate in video: 12%|█▏ | 35/300 [00:18<02:31, 1.75it/s]
propagate in video: 12%|█▏ | 36/300 [00:19<02:31, 1.74it/s]
propagate in video: 12%|█▏ | 37/300 [00:20<02:30, 1.74it/s]
propagate in video: 13%|█▎ | 38/300 [00:20<02:30, 1.74it/s]
propagate in video: 13%|█▎ | 39/300 [00:21<02:29, 1.74it/s]
propagate in video: 13%|█▎ | 40/300 [00:21<02:29, 1.74it/s]
propagate in video: 14%|█▎ | 41/300 [00:22<02:28, 1.75it/s]
propagate in video: 14%|█▍ | 42/300 [00:22<02:28, 1.74it/s]
propagate in video: 14%|█▍ | 43/300 [00:23<02:27, 1.74it/s]
propagate in video: 15%|█▍ | 44/300 [00:24<02:27, 1.74it/s]
propagate in video: 15%|█▌ | 45/300 [00:24<02:26, 1.74it/s]
propagate in video: 15%|█▌ | 46/300 [00:25<02:26, 1.74it/s]
propagate in video: 16%|█▌ | 47/300 [00:25<02:25, 1.74it/s]
propagate in video: 16%|█▌ | 48/300 [00:26<02:25, 1.74it/s]
propagate in video: 16%|█▋ | 49/300 [00:26<02:24, 1.74it/s]
propagate in video: 17%|█▋ | 50/300 [00:27<02:23, 1.74it/s]
propagate in video: 17%|█▋ | 51/300 [00:28<02:23, 1.74it/s]
propagate in video: 17%|█▋ | 52/300 [00:28<02:22, 1.74it/s]
propagate in video: 18%|█▊ | 53/300 [00:29<02:22, 1.73it/s]
propagate in video: 18%|█▊ | 54/300 [00:29<02:21, 1.74it/s]
propagate in video: 18%|█▊ | 55/300 [00:30<02:21, 1.73it/s]
propagate in video: 19%|█▊ | 56/300 [00:30<02:20, 1.74it/s]
propagate in video: 19%|█▉ | 57/300 [00:31<02:20, 1.73it/s]
propagate in video: 19%|█▉ | 58/300 [00:32<02:19, 1.74it/s]
propagate in video: 20%|█▉ | 59/300 [00:32<02:19, 1.73it/s]
propagate in video: 20%|██ | 60/300 [00:33<02:18, 1.73it/s]
propagate in video: 20%|██ | 61/300 [00:33<02:17, 1.73it/s]
propagate in video: 21%|██ | 62/300 [00:34<02:17, 1.74it/s]
propagate in video: 21%|██ | 63/300 [00:34<02:16, 1.73it/s]
propagate in video: 21%|██▏ | 64/300 [00:35<02:15, 1.74it/s]
propagate in video: 22%|██▏ | 65/300 [00:36<02:15, 1.73it/s]
propagate in video: 22%|██▏ | 66/300 [00:36<02:14, 1.74it/s]
propagate in video: 22%|██▏ | 67/300 [00:37<02:14, 1.73it/s]
propagate in video: 23%|██▎ | 68/300 [00:37<02:13, 1.74it/s]
propagate in video: 23%|██▎ | 69/300 [00:38<02:13, 1.73it/s]
propagate in video: 23%|██▎ | 70/300 [00:39<02:13, 1.73it/s]
propagate in video: 24%|██▎ | 71/300 [00:39<02:12, 1.73it/s]
propagate in video: 24%|██▍ | 72/300 [00:40<02:11, 1.73it/s]
propagate in video: 24%|██▍ | 73/300 [00:40<02:11, 1.73it/s]
propagate in video: 25%|██▍ | 74/300 [00:41<02:10, 1.73it/s]
propagate in video: 25%|██▌ | 75/300 [00:41<02:09, 1.73it/s]
propagate in video: 25%|██▌ | 76/300 [00:42<02:09, 1.73it/s]
propagate in video: 26%|██▌ | 77/300 [00:43<02:08, 1.73it/s]
propagate in video: 26%|██▌ | 78/300 [00:43<02:08, 1.73it/s]
propagate in video: 26%|██▋ | 79/300 [00:44<02:07, 1.73it/s]
propagate in video: 27%|██▋ | 80/300 [00:44<02:07, 1.73it/s]
propagate in video: 27%|██▋ | 81/300 [00:45<02:06, 1.73it/s]
propagate in video: 27%|██▋ | 82/300 [00:45<02:06, 1.73it/s]
propagate in video: 28%|██▊ | 83/300 [00:46<02:05, 1.73it/s]
propagate in video: 28%|██▊ | 84/300 [00:47<02:05, 1.73it/s]
propagate in video: 28%|██▊ | 85/300 [00:47<02:04, 1.73it/s]
propagate in video: 29%|██▊ | 86/300 [00:48<02:04, 1.72it/s]
propagate in video: 29%|██▉ | 87/300 [00:48<02:03, 1.72it/s]
propagate in video: 29%|██▉ | 88/300 [00:49<02:02, 1.73it/s]
propagate in video: 30%|██▉ | 89/300 [00:50<02:02, 1.72it/s]
propagate in video: 30%|███ | 90/300 [00:50<02:01, 1.72it/s]
propagate in video: 30%|███ | 91/300 [00:51<02:01, 1.73it/s]
propagate in video: 31%|███ | 92/300 [00:51<02:00, 1.72it/s]
propagate in video: 31%|███ | 93/300 [00:52<02:00, 1.72it/s]
propagate in video: 31%|███▏ | 94/300 [00:52<01:59, 1.72it/s]
propagate in video: 32%|███▏ | 95/300 [00:53<01:59, 1.72it/s]
propagate in video: 32%|███▏ | 96/300 [00:54<01:58, 1.72it/s]
propagate in video: 32%|███▏ | 97/300 [00:54<01:57, 1.72it/s]
propagate in video: 33%|███▎ | 98/300 [00:55<01:57, 1.73it/s]
propagate in video: 33%|███▎ | 99/300 [00:55<01:56, 1.72it/s]
propagate in video: 33%|███▎ | 100/300 [00:56<01:55, 1.72it/s]
propagate in video: 34%|███▎ | 101/300 [00:56<01:55, 1.73it/s]
propagate in video: 34%|███▍ | 102/300 [00:57<01:54, 1.72it/s]
propagate in video: 34%|███▍ | 103/300 [00:58<01:54, 1.73it/s]
propagate in video: 35%|███▍ | 104/300 [00:58<01:53, 1.72it/s]
propagate in video: 35%|███▌ | 105/300 [00:59<01:53, 1.72it/s]
propagate in video: 35%|███▌ | 106/300 [00:59<01:52, 1.72it/s]
propagate in video: 36%|███▌ | 107/300 [01:00<01:51, 1.73it/s]
propagate in video: 36%|███▌ | 108/300 [01:01<01:51, 1.72it/s]
propagate in video: 36%|███▋ | 109/300 [01:01<01:50, 1.73it/s]
propagate in video: 37%|███▋ | 110/300 [01:02<01:50, 1.72it/s]
propagate in video: 37%|███▋ | 111/300 [01:02<01:49, 1.72it/s]
propagate in video: 37%|███▋ | 112/300 [01:03<01:49, 1.72it/s]
propagate in video: 38%|███▊ | 113/300 [01:03<01:48, 1.72it/s]
propagate in video: 38%|███▊ | 114/300 [01:04<01:47, 1.72it/s]
propagate in video: 38%|███▊ | 115/300 [01:05<01:47, 1.72it/s]
propagate in video: 39%|███▊ | 116/300 [01:05<01:47, 1.72it/s]
propagate in video: 39%|███▉ | 117/300 [01:06<01:46, 1.72it/s]
propagate in video: 39%|███▉ | 118/300 [01:06<01:45, 1.72it/s]
propagate in video: 40%|███▉ | 119/300 [01:07<01:45, 1.72it/s]
propagate in video: 40%|████ | 120/300 [01:08<01:44, 1.72it/s]
propagate in video: 40%|████ | 121/300 [01:08<01:43, 1.72it/s]
propagate in video: 41%|████ | 122/300 [01:09<01:43, 1.72it/s]
propagate in video: 41%|████ | 123/300 [01:09<01:42, 1.72it/s]
propagate in video: 41%|████▏ | 124/300 [01:10<01:42, 1.72it/s]
propagate in video: 42%|████▏ | 125/300 [01:10<01:41, 1.72it/s]
propagate in video: 42%|████▏ | 126/300 [01:11<01:41, 1.72it/s]
propagate in video: 42%|████▏ | 127/300 [01:12<01:40, 1.72it/s]
propagate in video: 43%|████▎ | 128/300 [01:12<01:40, 1.72it/s]
propagate in video: 43%|████▎ | 129/300 [01:13<01:39, 1.72it/s]
propagate in video: 43%|████▎ | 130/300 [01:13<01:38, 1.72it/s]
propagate in video: 44%|████▎ | 131/300 [01:14<01:38, 1.72it/s]
propagate in video: 44%|████▍ | 132/300 [01:14<01:37, 1.72it/s]
propagate in video: 44%|████▍ | 133/300 [01:15<01:37, 1.72it/s]
propagate in video: 45%|████▍ | 134/300 [01:16<01:36, 1.72it/s]
propagate in video: 45%|████▌ | 135/300 [01:16<01:35, 1.72it/s]
propagate in video: 45%|████▌ | 136/300 [01:17<01:35, 1.72it/s]
propagate in video: 46%|████▌ | 137/300 [01:17<01:34, 1.72it/s]
propagate in video: 46%|████▌ | 138/300 [01:18<01:34, 1.72it/s]
propagate in video: 46%|████▋ | 139/300 [01:19<01:33, 1.71it/s]
propagate in video: 47%|████▋ | 140/300 [01:19<01:33, 1.72it/s]
propagate in video: 47%|████▋ | 141/300 [01:20<01:32, 1.72it/s]
propagate in video: 47%|████▋ | 142/300 [01:20<01:31, 1.72it/s]
propagate in video: 48%|████▊ | 143/300 [01:21<01:31, 1.72it/s]
propagate in video: 48%|████▊ | 144/300 [01:21<01:31, 1.71it/s]
propagate in video: 48%|████▊ | 145/300 [01:22<01:30, 1.72it/s]
propagate in video: 49%|████▊ | 146/300 [01:23<01:29, 1.72it/s]
propagate in video: 49%|████▉ | 147/300 [01:23<01:29, 1.72it/s]
propagate in video: 49%|████▉ | 148/300 [01:24<01:28, 1.72it/s]
propagate in video: 50%|████▉ | 149/300 [01:24<01:28, 1.71it/s]
propagate in video: 50%|█████ | 150/300 [01:25<01:27, 1.72it/s]
propagate in video: 50%|█████ | 151/300 [01:26<01:26, 1.72it/s]
propagate in video: 51%|█████ | 152/300 [01:26<01:26, 1.72it/s]
propagate in video: 51%|█████ | 153/300 [01:27<01:25, 1.71it/s]
propagate in video: 51%|█████▏ | 154/300 [01:27<01:25, 1.72it/s]
propagate in video: 52%|█████▏ | 155/300 [01:28<01:24, 1.72it/s]
propagate in video: 52%|█████▏ | 156/300 [01:28<01:23, 1.72it/s]
propagate in video: 52%|█████▏ | 157/300 [01:29<01:23, 1.71it/s]
propagate in video: 53%|█████▎ | 158/300 [01:30<01:22, 1.71it/s]
propagate in video: 53%|█████▎ | 159/300 [01:30<01:22, 1.72it/s]
propagate in video: 53%|█████▎ | 160/300 [01:31<01:21, 1.71it/s]
propagate in video: 54%|█████▎ | 161/300 [01:31<01:20, 1.72it/s]
propagate in video: 54%|█████▍ | 162/300 [01:32<01:20, 1.72it/s]
propagate in video: 54%|█████▍ | 163/300 [01:33<01:19, 1.71it/s]
propagate in video: 55%|█████▍ | 164/300 [01:33<01:19, 1.71it/s]
propagate in video: 55%|█████▌ | 165/300 [01:34<01:18, 1.72it/s]
propagate in video: 55%|█████▌ | 166/300 [01:34<01:18, 1.72it/s]
propagate in video: 56%|█████▌ | 167/300 [01:35<01:17, 1.72it/s]
propagate in video: 56%|█████▌ | 168/300 [01:35<01:17, 1.71it/s]
propagate in video: 56%|█████▋ | 169/300 [01:36<01:16, 1.71it/s]
propagate in video: 57%|█████▋ | 170/300 [01:37<01:15, 1.72it/s]
propagate in video: 57%|█████▋ | 171/300 [01:37<01:15, 1.72it/s]
propagate in video: 57%|█████▋ | 172/300 [01:38<01:14, 1.72it/s]
propagate in video: 58%|█████▊ | 173/300 [01:38<01:13, 1.72it/s]
propagate in video: 58%|█████▊ | 174/300 [01:39<01:13, 1.72it/s]
propagate in video: 58%|█████▊ | 175/300 [01:40<01:12, 1.71it/s]
propagate in video: 59%|█████▊ | 176/300 [01:40<01:12, 1.71it/s]
propagate in video: 59%|█████▉ | 177/300 [01:41<01:11, 1.71it/s]
propagate in video: 59%|█████▉ | 178/300 [01:41<01:11, 1.71it/s]
propagate in video: 60%|█████▉ | 179/300 [01:42<01:10, 1.71it/s]
propagate in video: 60%|██████ | 180/300 [01:42<01:10, 1.71it/s]
propagate in video: 60%|██████ | 181/300 [01:43<01:09, 1.71it/s]
propagate in video: 61%|██████ | 182/300 [01:44<01:08, 1.71it/s]
propagate in video: 61%|██████ | 183/300 [01:44<01:08, 1.71it/s]
propagate in video: 61%|██████▏ | 184/300 [01:45<01:07, 1.71it/s]
propagate in video: 62%|██████▏ | 185/300 [01:45<01:07, 1.71it/s]
propagate in video: 62%|██████▏ | 186/300 [01:46<01:08, 1.67it/s]
propagate in video: 62%|██████▏ | 187/300 [01:47<01:06, 1.69it/s]
propagate in video: 63%|██████▎ | 188/300 [01:47<01:05, 1.70it/s]
propagate in video: 63%|██████▎ | 189/300 [01:48<01:05, 1.70it/s]
propagate in video: 63%|██████▎ | 190/300 [01:48<01:04, 1.70it/s]
propagate in video: 64%|██████▎ | 191/300 [01:49<01:03, 1.71it/s]
propagate in video: 64%|██████▍ | 192/300 [01:50<01:03, 1.71it/s]
propagate in video: 64%|██████▍ | 193/300 [01:50<01:02, 1.71it/s]
propagate in video: 65%|██████▍ | 194/300 [01:51<01:02, 1.71it/s]
propagate in video: 65%|██████▌ | 195/300 [01:51<01:01, 1.71it/s]
propagate in video: 65%|██████▌ | 196/300 [01:52<01:00, 1.71it/s]
propagate in video: 66%|██████▌ | 197/300 [01:52<01:00, 1.71it/s]
propagate in video: 66%|██████▌ | 198/300 [01:53<00:59, 1.71it/s]
propagate in video: 66%|██████▋ | 199/300 [01:54<00:59, 1.71it/s]
propagate in video: 67%|██████▋ | 200/300 [01:54<00:58, 1.71it/s]
propagate in video: 67%|██████▋ | 201/300 [01:55<00:57, 1.72it/s]
propagate in video: 67%|██████▋ | 202/300 [01:55<00:57, 1.71it/s]
propagate in video: 68%|██████▊ | 203/300 [01:56<00:56, 1.71it/s]
propagate in video: 68%|██████▊ | 204/300 [01:57<00:56, 1.71it/s]
propagate in video: 68%|██████▊ | 205/300 [01:57<00:55, 1.71it/s]
propagate in video: 69%|██████▊ | 206/300 [01:58<00:54, 1.71it/s]
propagate in video: 69%|██████▉ | 207/300 [01:58<00:54, 1.71it/s]
propagate in video: 69%|██████▉ | 208/300 [01:59<00:53, 1.71it/s]
propagate in video: 70%|██████▉ | 209/300 [01:59<00:53, 1.71it/s]
propagate in video: 70%|███████ | 210/300 [02:00<00:52, 1.71it/s]
propagate in video: 70%|███████ | 211/300 [02:01<00:52, 1.71it/s]
propagate in video: 71%|███████ | 212/300 [02:01<00:51, 1.71it/s]
propagate in video: 71%|███████ | 213/300 [02:02<00:50, 1.71it/s]
propagate in video: 71%|███████▏ | 214/300 [02:02<00:50, 1.71it/s]
propagate in video: 72%|███████▏ | 215/300 [02:03<00:49, 1.72it/s]
propagate in video: 72%|███████▏ | 216/300 [02:04<00:49, 1.71it/s]
propagate in video: 72%|███████▏ | 217/300 [02:04<00:48, 1.71it/s]
propagate in video: 73%|███████▎ | 218/300 [02:05<00:47, 1.71it/s]
propagate in video: 73%|███████▎ | 219/300 [02:05<00:47, 1.71it/s]
propagate in video: 73%|███████▎ | 220/300 [02:06<00:46, 1.71it/s]
propagate in video: 74%|███████▎ | 221/300 [02:06<00:46, 1.71it/s]
propagate in video: 74%|███████▍ | 222/300 [02:07<00:45, 1.71it/s]
propagate in video: 74%|███████▍ | 223/300 [02:08<00:45, 1.71it/s]
propagate in video: 75%|███████▍ | 224/300 [02:08<00:44, 1.71it/s]
propagate in video: 75%|███████▌ | 225/300 [02:09<00:43, 1.71it/s]
propagate in video: 75%|███████▌ | 226/300 [02:09<00:43, 1.71it/s]
propagate in video: 76%|███████▌ | 227/300 [02:10<00:42, 1.71it/s]
propagate in video: 76%|███████▌ | 228/300 [02:11<00:42, 1.71it/s]
propagate in video: 76%|███████▋ | 229/300 [02:11<00:42, 1.65it/s]
propagate in video: 77%|███████▋ | 230/300 [02:12<00:41, 1.67it/s]
propagate in video: 77%|███████▋ | 231/300 [02:12<00:41, 1.68it/s]
propagate in video: 77%|███████▋ | 232/300 [02:13<00:41, 1.64it/s]
propagate in video: 78%|███████▊ | 233/300 [02:14<00:40, 1.66it/s]
propagate in video: 78%|███████▊ | 234/300 [02:14<00:39, 1.68it/s]
propagate in video: 78%|███████▊ | 235/300 [02:15<00:38, 1.69it/s]
propagate in video: 79%|███████▊ | 236/300 [02:15<00:37, 1.69it/s]
propagate in video: 79%|███████▉ | 237/300 [02:16<00:37, 1.70it/s]
propagate in video: 79%|███████▉ | 238/300 [02:17<00:36, 1.70it/s]
propagate in video: 80%|███████▉ | 239/300 [02:17<00:35, 1.70it/s]
propagate in video: 80%|████████ | 240/300 [02:18<00:35, 1.71it/s]
propagate in video: 80%|████████ | 241/300 [02:18<00:34, 1.71it/s]
propagate in video: 81%|████████ | 242/300 [02:19<00:33, 1.71it/s]
propagate in video: 81%|████████ | 243/300 [02:19<00:33, 1.71it/s]
propagate in video: 81%|████████▏ | 244/300 [02:20<00:32, 1.71it/s]
propagate in video: 82%|████████▏ | 245/300 [02:21<00:32, 1.71it/s]
propagate in video: 82%|████████▏ | 246/300 [02:21<00:31, 1.71it/s]
propagate in video: 82%|████████▏ | 247/300 [02:22<00:31, 1.66it/s]
propagate in video: 83%|████████▎ | 248/300 [02:22<00:30, 1.68it/s]
propagate in video: 83%|████████▎ | 249/300 [02:23<00:30, 1.69it/s]
propagate in video: 83%|████████▎ | 250/300 [02:24<00:29, 1.70it/s]
propagate in video: 84%|████████▎ | 251/300 [02:24<00:28, 1.70it/s]
propagate in video: 84%|████████▍ | 252/300 [02:25<00:28, 1.70it/s]
propagate in video: 84%|████████▍ | 253/300 [02:25<00:27, 1.70it/s]
propagate in video: 85%|████████▍ | 254/300 [02:26<00:26, 1.70it/s]
propagate in video: 85%|████████▌ | 255/300 [02:27<00:26, 1.71it/s]
propagate in video: 85%|████████▌ | 256/300 [02:27<00:25, 1.71it/s]
propagate in video: 86%|████████▌ | 257/300 [02:28<00:25, 1.71it/s]
propagate in video: 86%|████████▌ | 258/300 [02:28<00:24, 1.71it/s]
propagate in video: 86%|████████▋ | 259/300 [02:29<00:23, 1.71it/s]
propagate in video: 87%|████████▋ | 260/300 [02:30<00:25, 1.59it/s]
propagate in video: 87%|████████▋ | 261/300 [02:30<00:23, 1.63it/s]
propagate in video: 87%|████████▋ | 262/300 [02:31<00:22, 1.65it/s]
propagate in video: 88%|████████▊ | 263/300 [02:31<00:22, 1.67it/s]
propagate in video: 88%|████████▊ | 264/300 [02:32<00:21, 1.68it/s]
propagate in video: 88%|████████▊ | 265/300 [02:32<00:20, 1.69it/s]
propagate in video: 89%|████████▊ | 266/300 [02:33<00:20, 1.70it/s]
propagate in video: 89%|████████▉ | 267/300 [02:34<00:19, 1.70it/s]
propagate in video: 89%|████████▉ | 268/300 [02:34<00:18, 1.70it/s]
propagate in video: 90%|████████▉ | 269/300 [02:35<00:18, 1.71it/s]
propagate in video: 90%|█████████ | 270/300 [02:35<00:17, 1.71it/s]
propagate in video: 90%|█████████ | 271/300 [02:36<00:16, 1.71it/s]
propagate in video: 91%|█████████ | 272/300 [02:37<00:16, 1.71it/s]
propagate in video: 91%|█████████ | 273/300 [02:37<00:15, 1.71it/s]
propagate in video: 91%|█████████▏| 274/300 [02:38<00:15, 1.71it/s]
propagate in video: 92%|█████████▏| 275/300 [02:38<00:14, 1.71it/s]
propagate in video: 92%|█████████▏| 276/300 [02:39<00:14, 1.71it/s]
propagate in video: 92%|█████████▏| 277/300 [02:40<00:13, 1.71it/s]
propagate in video: 93%|█████████▎| 278/300 [02:40<00:13, 1.67it/s]
propagate in video: 93%|█████████▎| 279/300 [02:41<00:12, 1.68it/s]
propagate in video: 93%|█████████▎| 280/300 [02:41<00:11, 1.69it/s]
propagate in video: 94%|█████████▎| 281/300 [02:42<00:11, 1.70it/s]
propagate in video: 94%|█████████▍| 282/300 [02:42<00:10, 1.70it/s]
propagate in video: 94%|█████████▍| 283/300 [02:43<00:09, 1.70it/s]
propagate in video: 95%|█████████▍| 284/300 [02:44<00:09, 1.71it/s]
propagate in video: 95%|█████████▌| 285/300 [02:44<00:08, 1.71it/s]
propagate in video: 95%|█████████▌| 286/300 [02:45<00:08, 1.71it/s]
propagate in video: 96%|█████████▌| 287/300 [02:45<00:07, 1.71it/s]
propagate in video: 96%|█████████▌| 288/300 [02:46<00:07, 1.71it/s]
propagate in video: 96%|█████████▋| 289/300 [02:47<00:06, 1.71it/s]
propagate in video: 97%|█████████▋| 290/300 [02:47<00:05, 1.71it/s]
propagate in video: 97%|█████████▋| 291/300 [02:48<00:05, 1.71it/s]
propagate in video: 97%|█████████▋| 292/300 [02:48<00:04, 1.71it/s]
propagate in video: 98%|█████████▊| 293/300 [02:49<00:04, 1.71it/s]
propagate in video: 98%|█████████▊| 294/300 [02:49<00:03, 1.71it/s]
propagate in video: 98%|█████████▊| 295/300 [02:50<00:02, 1.71it/s]
propagate in video: 99%|█████████▊| 296/300 [02:51<00:02, 1.71it/s]
propagate in video: 99%|█████████▉| 297/300 [02:51<00:01, 1.71it/s]
propagate in video: 99%|█████████▉| 298/300 [02:52<00:01, 1.71it/s]
propagate in video: 100%|█████████▉| 299/300 [02:52<00:00, 1.71it/s]
propagate in video: 100%|██████████| 300/300 [02:53<00:00, 1.71it/s]
propagate in video: 100%|██████████| 300/300 [02:53<00:00, 1.73it/s]
# render the segmentation results every few frames
vis_frame_stride = 15
plt.close("all")
for out_frame_idx in range(0, len(frame_names), vis_frame_stride):
plt.figure(figsize=(6, 4))
plt.title(f"frame {out_frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
for out_obj_id, out_mask in video_segments[out_frame_idx].items():
show_mask(out_mask, plt.gca(), obj_id=out_obj_id)




















IPython widget based slider#
import os
from PIL import Image
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
vis_frame_stride = 15
# Create an output widget to display the image
output = widgets.Output()
# Function to render a frame with segmentation results
def render_frame(out_frame_idx):
with output:
clear_output(wait=True) # Clear only the output for the image, not the controls
plt.figure(figsize=(6, 4))
plt.title(f"frame {out_frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
for out_obj_id, out_mask in video_segments[out_frame_idx].items():
show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
plt.axis('off')
plt.show()
# Callback function for slider and buttons
def update_frame(change):
render_frame(change['new']) # Call render_frame when slider value changes
# Initial frame to start
out_frame_idx = 0
# Create a slider for selecting frames
frame_slider = widgets.IntSlider(
value=0, min=0, max=len(frame_names)-1, step=vis_frame_stride,
description="Frame", continuous_update=False
)
frame_slider.observe(update_frame, names='value')
# Buttons for Previous and Next
def on_prev_clicked(b):
if frame_slider.value > 0:
frame_slider.value -= vis_frame_stride
def on_next_clicked(b):
if frame_slider.value < len(frame_names) - 1:
frame_slider.value += vis_frame_stride
prev_button = widgets.Button(description="Previous")
next_button = widgets.Button(description="Next")
prev_button.on_click(on_prev_clicked)
next_button.on_click(on_next_clicked)
# Display the controls and the output widget
controls = widgets.HBox([prev_button, frame_slider, next_button])
display(controls, output)
# Initial rendering of the first frame
render_frame(out_frame_idx)
Use Databricks displayHTML to build custom image slider
#
import os
import base64
def encode_image_to_base64(file_path):
with open(file_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def process_images(directory):
images = sorted([f for f in os.listdir(directory) if f.endswith('.jpg')])
imgs = ""
# Iterate through every 10th image starting with the first frame
for i in range(0, len(images), 10):
image_path = os.path.join(directory, images[i])
imgs += f'"data:image/jpeg;base64,{encode_image_to_base64(image_path)}",'
return imgs
imgs = process_images("/Volumes/sam/default/frames/")
html = f'''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Image Slider with Coordinates</title>
<style>
#slider-container {{
width: 50%;
margin: auto;
text-align: center;
position: relative;
}}
#image {{
width: 100%;
display: block;
}}
.nav-button {{
position: absolute;
top: 50%;
transform: translateY(-50%);
background-color: rgba(255, 255, 255, 0.5);
border: none;
font-size: 2em;
cursor: pointer;
}}
#prev {{
left: 0;
}}
#next {{
right: 0;
}}
#coords {{
position: absolute;
top: 10px;
left: 10px;
background: white;
padding: 5px;
}}
</style>
</head>
<body>
<div id="slider-container">
<button id="prev" class="nav-button">◀</button>
<img id="image" src="" alt="Image Slider">
<button id="next" class="nav-button">▶</button>
<div id="coords">Coordinates: (x, y)</div>
</div>
<script>
let images = [
{imgs}
];
let currentIndex = 0;
let imgElement = document.getElementById('image');
let prevButton = document.getElementById('prev');
let nextButton = document.getElementById('next');
let coords = document.getElementById('coords');
function showImage(index) {{
imgElement.src = images[index];
}}
prevButton.addEventListener('click', () => {{
currentIndex = (currentIndex > 0) ? currentIndex - 1 : images.length - 1;
showImage(currentIndex);
}});
nextButton.addEventListener('click', () => {{
currentIndex = (currentIndex < images.length - 1) ? currentIndex + 1 : 0;
showImage(currentIndex);
}});
imgElement.addEventListener('mousemove', (e) => {{
let rect = e.target.getBoundingClientRect();
let scaleX = imgElement.naturalWidth / rect.width;
let scaleY = imgElement.naturalHeight / rect.height;
let x = (e.clientX - rect.left) * scaleX;
let y = (e.clientY - rect.top) * scaleY;
coords.textContent = `Coordinates: (${{Math.round(x)}}, ${{Math.round(y)}})`;
}});
// Initialize the slider with the first image
showImage(currentIndex);
</script>
</body>
</html>
'''
displayHTML(html)
Coordinates: (x, y)