在图像/视频中裁剪出人脸区域

1. 在图像中裁剪人脸区域

import face_alignment
import skimage.io
import numpy
from argparse import ArgumentParser
from skimage import img_as_ubyte
from skimage.transform import resize
from tqdm import tqdm
import os
import numpy as np
import warnings
warnings.filterwarnings("ignore")# =================================================================================================
# Kun(20250306): 输出裁剪图像的命令
# =================================================================================================def extract_bbox(frame, fa):if max(frame.shape[0], frame.shape[1]) > 640:scale_factor =  max(frame.shape[0], frame.shape[1]) / 640.0frame = resize(frame, (int(frame.shape[0] / scale_factor), int(frame.shape[1] / scale_factor)))frame = img_as_ubyte(frame)else:scale_factor = 1frame = frame[..., :3]bboxes = fa.face_detector.detect_from_image(frame[..., ::-1])if len(bboxes) == 0:return []return np.array(bboxes)[:, :-1] * scale_factordef bb_intersection_over_union(boxA, boxB):xA = max(boxA[0], boxB[0])yA = max(boxA[1], boxB[1])xB = min(boxA[2], boxB[2])yB = min(boxA[3], boxB[3])interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)iou = interArea / float(boxAArea + boxBArea - interArea)return ioudef join(tube_bbox, bbox):xA = min(tube_bbox[0], bbox[0])yA = min(tube_bbox[1], bbox[1])xB = max(tube_bbox[2], bbox[2])yB = max(tube_bbox[3], bbox[3])return (xA, yA, xB, yB)def compute_bbox(tube_bbox, frame_shape, inp, image_shape, increase_area=0.1):left, top, right, bot = tube_bboxwidth = right - leftheight = bot - top#Computing aspect preserving bboxwidth_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))left = int(left - width_increase * width)top = int(top - height_increase * height)right = int(right + width_increase * width)bot = int(bot + height_increase * height)top, bot, left, right = max(0, top), min(bot, frame_shape[0]), max(0, left), min(right, frame_shape[1])h, w = bot - top, right - leftscale = f'{image_shape[0]}:{image_shape[1]}'return f'ffmpeg -i {inp} -filter:v "crop={w}:{h}:{left}:{top}, scale={scale}" crop.png'def process_image(args):device = 'cpu' if args.cpu else 'cuda'fa = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, flip_input=False, device=device)frame = skimage.io.imread(args.inp)frame_shape = frame.shapebboxes = extract_bbox(frame, fa)if len(bboxes) == 0:print("No faces detected.")return []tube_bbox = bboxes[0]  # Assuming we take the first detected facecommand = compute_bbox(tube_bbox, frame_shape, inp=args.inp, image_shape=args.image_shape, increase_area=args.increase)return [command]if __name__ == "__main__":parser = ArgumentParser()parser.add_argument("--image_shape", default=(256, 256), type=lambda x: tuple(map(int, x.split(','))),help="Image shape")parser.add_argument("--increase", default=0.1, type=float, help='Increase bbox by this amount')parser.add_argument("--iou_with_initial", type=float, default=0.25, help="The minimal allowed iou with inital bbox")parser.add_argument("--inp", required=True, help='Input image')parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")args = parser.parse_args()commands = process_image(args)for command in commands:print(command)

2. 在视频中裁剪人脸区域

import face_alignment
import skimage.io
import numpy
from argparse import ArgumentParser
from skimage import img_as_ubyte
from skimage.transform import resize
from tqdm import tqdm
import os
import imageio
import numpy as np
import warnings
warnings.filterwarnings("ignore")# =================================================================================================
# Kun(20250306): 输出裁剪视频的命令
# =================================================================================================def extract_bbox(frame, fa):if max(frame.shape[0], frame.shape[1]) > 640:scale_factor =  max(frame.shape[0], frame.shape[1]) / 640.0frame = resize(frame, (int(frame.shape[0] / scale_factor), int(frame.shape[1] / scale_factor)))frame = img_as_ubyte(frame)else:scale_factor = 1frame = frame[..., :3]bboxes = fa.face_detector.detect_from_image(frame[..., ::-1])if len(bboxes) == 0:return []return np.array(bboxes)[:, :-1] * scale_factordef bb_intersection_over_union(boxA, boxB):xA = max(boxA[0], boxB[0])yA = max(boxA[1], boxB[1])xB = min(boxA[2], boxB[2])yB = min(boxA[3], boxB[3])interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)iou = interArea / float(boxAArea + boxBArea - interArea)return ioudef join(tube_bbox, bbox):xA = min(tube_bbox[0], bbox[0])yA = min(tube_bbox[1], bbox[1])xB = max(tube_bbox[2], bbox[2])yB = max(tube_bbox[3], bbox[3])return (xA, yA, xB, yB)def compute_bbox(start, end, fps, tube_bbox, frame_shape, inp, image_shape, increase_area=0.1):left, top, right, bot = tube_bboxwidth = right - leftheight = bot - top#Computing aspect preserving bboxwidth_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))left = int(left - width_increase * width)top = int(top - height_increase * height)right = int(right + width_increase * width)bot = int(bot + height_increase * height)top, bot, left, right = max(0, top), min(bot, frame_shape[0]), max(0, left), min(right, frame_shape[1])h, w = bot - top, right - leftstart = start / fpsend = end / fpstime = end - startscale = f'{image_shape[0]}:{image_shape[1]}'return f'ffmpeg -i {inp} -ss {start} -t {time} -filter:v "crop={w}:{h}:{left}:{top}, scale={scale}" crop.mp4'def compute_bbox_trajectories(trajectories, fps, frame_shape, args):commands = []for i, (bbox, tube_bbox, start, end) in enumerate(trajectories):if (end - start) > args.min_frames:command = compute_bbox(start, end, fps, tube_bbox, frame_shape, inp=args.inp, image_shape=args.image_shape, increase_area=args.increase)commands.append(command)return commandsdef process_video(args):device = 'cpu' if args.cpu else 'cuda'fa = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, flip_input=False, device=device)video = imageio.get_reader(args.inp)trajectories = []previous_frame = Nonefps = video.get_meta_data()['fps']commands = []try:for i, frame in tqdm(enumerate(video)):frame_shape = frame.shapebboxes =  extract_bbox(frame, fa)## For each trajectory check the criterionnot_valid_trajectories = []valid_trajectories = []for trajectory in trajectories:tube_bbox = trajectory[0]intersection = 0for bbox in bboxes:intersection = max(intersection, bb_intersection_over_union(tube_bbox, bbox))if intersection > args.iou_with_initial:valid_trajectories.append(trajectory)else:not_valid_trajectories.append(trajectory)commands += compute_bbox_trajectories(not_valid_trajectories, fps, frame_shape, args)trajectories = valid_trajectories## Assign bbox to trajectories, create new trajectoriesfor bbox in bboxes:intersection = 0current_trajectory = Nonefor trajectory in trajectories:tube_bbox = trajectory[0]current_intersection = bb_intersection_over_union(tube_bbox, bbox)if intersection < current_intersection and current_intersection > args.iou_with_initial:intersection = bb_intersection_over_union(tube_bbox, bbox)current_trajectory = trajectory## Create new trajectoryif current_trajectory is None:trajectories.append([bbox, bbox, i, i])else:current_trajectory[3] = icurrent_trajectory[1] = join(current_trajectory[1], bbox)except IndexError as e:raise (e)commands += compute_bbox_trajectories(trajectories, fps, frame_shape, args)return commandsif __name__ == "__main__":parser = ArgumentParser()parser.add_argument("--image_shape", default=(256, 256), type=lambda x: tuple(map(int, x.split(','))),help="Image shape")parser.add_argument("--increase", default=0.1, type=float, help='Increase bbox by this amount')parser.add_argument("--iou_with_initial", type=float, default=0.25, help="The minimal allowed iou with inital bbox")parser.add_argument("--inp", required=True, help='Input image or video')parser.add_argument("--min_frames", type=int, default=150,  help='Minimum number of frames')parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")args = parser.parse_args()commands = process_video(args)for command in commands:print (command)