1. 在图像中裁剪人脸区域
import face_alignment
import skimage. io
import numpy
from argparse import ArgumentParser
from skimage import img_as_ubyte
from skimage. transform import resize
from tqdm import tqdm
import os
import numpy as np
import warnings
warnings. filterwarnings( "ignore" )
def extract_bbox ( frame, fa) : if max ( frame. shape[ 0 ] , frame. shape[ 1 ] ) > 640 : scale_factor = max ( frame. shape[ 0 ] , frame. shape[ 1 ] ) / 640.0 frame = resize( frame, ( int ( frame. shape[ 0 ] / scale_factor) , int ( frame. shape[ 1 ] / scale_factor) ) ) frame = img_as_ubyte( frame) else : scale_factor = 1 frame = frame[ . . . , : 3 ] bboxes = fa. face_detector. detect_from_image( frame[ . . . , : : - 1 ] ) if len ( bboxes) == 0 : return [ ] return np. array( bboxes) [ : , : - 1 ] * scale_factordef bb_intersection_over_union ( boxA, boxB) : xA = max ( boxA[ 0 ] , boxB[ 0 ] ) yA = max ( boxA[ 1 ] , boxB[ 1 ] ) xB = min ( boxA[ 2 ] , boxB[ 2 ] ) yB = min ( boxA[ 3 ] , boxB[ 3 ] ) interArea = max ( 0 , xB - xA + 1 ) * max ( 0 , yB - yA + 1 ) boxAArea = ( boxA[ 2 ] - boxA[ 0 ] + 1 ) * ( boxA[ 3 ] - boxA[ 1 ] + 1 ) boxBArea = ( boxB[ 2 ] - boxB[ 0 ] + 1 ) * ( boxB[ 3 ] - boxB[ 1 ] + 1 ) iou = interArea / float ( boxAArea + boxBArea - interArea) return ioudef join ( tube_bbox, bbox) : xA = min ( tube_bbox[ 0 ] , bbox[ 0 ] ) yA = min ( tube_bbox[ 1 ] , bbox[ 1 ] ) xB = max ( tube_bbox[ 2 ] , bbox[ 2 ] ) yB = max ( tube_bbox[ 3 ] , bbox[ 3 ] ) return ( xA, yA, xB, yB) def compute_bbox ( tube_bbox, frame_shape, inp, image_shape, increase_area= 0.1 ) : left, top, right, bot = tube_bboxwidth = right - leftheight = bot - topwidth_increase = max ( increase_area, ( ( 1 + 2 * increase_area) * height - width) / ( 2 * width) ) height_increase = max ( increase_area, ( ( 1 + 2 * increase_area) * width - height) / ( 2 * height) ) left = int ( left - width_increase * width) top = int ( top - height_increase * height) right = int ( right + width_increase * width) bot = int ( bot + height_increase * height) top, bot, left, right = max ( 0 , top) , min ( bot, frame_shape[ 0 ] ) , max ( 0 , left) , min ( right, frame_shape[ 1 ] ) h, w = bot - top, right - leftscale = f' { image_shape[ 0 ] } : { image_shape[ 1 ] } ' return f'ffmpeg -i { inp} -filter:v "crop= { w} : { h} : { left} : { top} , scale= { scale} " crop.png' def process_image ( args) : device = 'cpu' if args. cpu else 'cuda' fa = face_alignment. FaceAlignment( face_alignment. LandmarksType. TWO_D, flip_input= False , device= device) frame = skimage. io. imread( args. inp) frame_shape = frame. shapebboxes = extract_bbox( frame, fa) if len ( bboxes) == 0 : print ( "No faces detected." ) return [ ] tube_bbox = bboxes[ 0 ] command = compute_bbox( tube_bbox, frame_shape, inp= args. inp, image_shape= args. image_shape, increase_area= args. increase) return [ command] if __name__ == "__main__" : parser = ArgumentParser( ) parser. add_argument( "--image_shape" , default= ( 256 , 256 ) , type = lambda x: tuple ( map ( int , x. split( ',' ) ) ) , help = "Image shape" ) parser. add_argument( "--increase" , default= 0.1 , type = float , help = 'Increase bbox by this amount' ) parser. add_argument( "--iou_with_initial" , type = float , default= 0.25 , help = "The minimal allowed iou with inital bbox" ) parser. add_argument( "--inp" , required= True , help = 'Input image' ) parser. add_argument( "--cpu" , dest= "cpu" , action= "store_true" , help = "cpu mode." ) args = parser. parse_args( ) commands = process_image( args) for command in commands: print ( command)
2. 在视频中裁剪人脸区域
import face_alignment
import skimage. io
import numpy
from argparse import ArgumentParser
from skimage import img_as_ubyte
from skimage. transform import resize
from tqdm import tqdm
import os
import imageio
import numpy as np
import warnings
warnings. filterwarnings( "ignore" )
def extract_bbox ( frame, fa) : if max ( frame. shape[ 0 ] , frame. shape[ 1 ] ) > 640 : scale_factor = max ( frame. shape[ 0 ] , frame. shape[ 1 ] ) / 640.0 frame = resize( frame, ( int ( frame. shape[ 0 ] / scale_factor) , int ( frame. shape[ 1 ] / scale_factor) ) ) frame = img_as_ubyte( frame) else : scale_factor = 1 frame = frame[ . . . , : 3 ] bboxes = fa. face_detector. detect_from_image( frame[ . . . , : : - 1 ] ) if len ( bboxes) == 0 : return [ ] return np. array( bboxes) [ : , : - 1 ] * scale_factordef bb_intersection_over_union ( boxA, boxB) : xA = max ( boxA[ 0 ] , boxB[ 0 ] ) yA = max ( boxA[ 1 ] , boxB[ 1 ] ) xB = min ( boxA[ 2 ] , boxB[ 2 ] ) yB = min ( boxA[ 3 ] , boxB[ 3 ] ) interArea = max ( 0 , xB - xA + 1 ) * max ( 0 , yB - yA + 1 ) boxAArea = ( boxA[ 2 ] - boxA[ 0 ] + 1 ) * ( boxA[ 3 ] - boxA[ 1 ] + 1 ) boxBArea = ( boxB[ 2 ] - boxB[ 0 ] + 1 ) * ( boxB[ 3 ] - boxB[ 1 ] + 1 ) iou = interArea / float ( boxAArea + boxBArea - interArea) return ioudef join ( tube_bbox, bbox) : xA = min ( tube_bbox[ 0 ] , bbox[ 0 ] ) yA = min ( tube_bbox[ 1 ] , bbox[ 1 ] ) xB = max ( tube_bbox[ 2 ] , bbox[ 2 ] ) yB = max ( tube_bbox[ 3 ] , bbox[ 3 ] ) return ( xA, yA, xB, yB) def compute_bbox ( start, end, fps, tube_bbox, frame_shape, inp, image_shape, increase_area= 0.1 ) : left, top, right, bot = tube_bboxwidth = right - leftheight = bot - topwidth_increase = max ( increase_area, ( ( 1 + 2 * increase_area) * height - width) / ( 2 * width) ) height_increase = max ( increase_area, ( ( 1 + 2 * increase_area) * width - height) / ( 2 * height) ) left = int ( left - width_increase * width) top = int ( top - height_increase * height) right = int ( right + width_increase * width) bot = int ( bot + height_increase * height) top, bot, left, right = max ( 0 , top) , min ( bot, frame_shape[ 0 ] ) , max ( 0 , left) , min ( right, frame_shape[ 1 ] ) h, w = bot - top, right - leftstart = start / fpsend = end / fpstime = end - startscale = f' { image_shape[ 0 ] } : { image_shape[ 1 ] } ' return f'ffmpeg -i { inp} -ss { start} -t { time} -filter:v "crop= { w} : { h} : { left} : { top} , scale= { scale} " crop.mp4' def compute_bbox_trajectories ( trajectories, fps, frame_shape, args) : commands = [ ] for i, ( bbox, tube_bbox, start, end) in enumerate ( trajectories) : if ( end - start) > args. min_frames: command = compute_bbox( start, end, fps, tube_bbox, frame_shape, inp= args. inp, image_shape= args. image_shape, increase_area= args. increase) commands. append( command) return commandsdef process_video ( args) : device = 'cpu' if args. cpu else 'cuda' fa = face_alignment. FaceAlignment( face_alignment. LandmarksType. TWO_D, flip_input= False , device= device) video = imageio. get_reader( args. inp) trajectories = [ ] previous_frame = None fps = video. get_meta_data( ) [ 'fps' ] commands = [ ] try : for i, frame in tqdm( enumerate ( video) ) : frame_shape = frame. shapebboxes = extract_bbox( frame, fa) not_valid_trajectories = [ ] valid_trajectories = [ ] for trajectory in trajectories: tube_bbox = trajectory[ 0 ] intersection = 0 for bbox in bboxes: intersection = max ( intersection, bb_intersection_over_union( tube_bbox, bbox) ) if intersection > args. iou_with_initial: valid_trajectories. append( trajectory) else : not_valid_trajectories. append( trajectory) commands += compute_bbox_trajectories( not_valid_trajectories, fps, frame_shape, args) trajectories = valid_trajectoriesfor bbox in bboxes: intersection = 0 current_trajectory = None for trajectory in trajectories: tube_bbox = trajectory[ 0 ] current_intersection = bb_intersection_over_union( tube_bbox, bbox) if intersection < current_intersection and current_intersection > args. iou_with_initial: intersection = bb_intersection_over_union( tube_bbox, bbox) current_trajectory = trajectoryif current_trajectory is None : trajectories. append( [ bbox, bbox, i, i] ) else : current_trajectory[ 3 ] = icurrent_trajectory[ 1 ] = join( current_trajectory[ 1 ] , bbox) except IndexError as e: raise ( e) commands += compute_bbox_trajectories( trajectories, fps, frame_shape, args) return commandsif __name__ == "__main__" : parser = ArgumentParser( ) parser. add_argument( "--image_shape" , default= ( 256 , 256 ) , type = lambda x: tuple ( map ( int , x. split( ',' ) ) ) , help = "Image shape" ) parser. add_argument( "--increase" , default= 0.1 , type = float , help = 'Increase bbox by this amount' ) parser. add_argument( "--iou_with_initial" , type = float , default= 0.25 , help = "The minimal allowed iou with inital bbox" ) parser. add_argument( "--inp" , required= True , help = 'Input image or video' ) parser. add_argument( "--min_frames" , type = int , default= 150 , help = 'Minimum number of frames' ) parser. add_argument( "--cpu" , dest= "cpu" , action= "store_true" , help = "cpu mode." ) args = parser. parse_args( ) commands = process_video( args) for command in commands: print ( command)