import  os
from  concurrent. futures import  ThreadPoolExecutor   
from  multiprocessing import  Pool
from  tqdm import  tqdm
from  PIL import  Image,  ImageDraw,  ImageFont,  ImageOps
import  shutil, os
import  numpy as  np
import  cv2
import  math
import  randomfile_path =  "/data/lh123/lh/ppocr_keys_v1.txt" 
def  rotate_rectangle ( top_left,  bottom_right,  angle_degrees) : angle_rad =  math. radians( angle_degrees) top_right =  ( bottom_right[ 0 ] ,  top_left[ 1 ] ) bottom_left =  ( top_left[ 0 ] ,  bottom_right[ 1 ] ) center =  ( ( top_left[ 0 ] + bottom_right[ 0 ] ) / 2 ,  ( top_left[ 1 ] + bottom_right[ 1 ] ) / 2 ) def  rotate_point ( point) : x =  point[ 0 ]  -  center[ 0 ] y =  center[ 1 ]  -  point[ 1 ]   new_x =  x *  math. cos( angle_rad)  -  y *  math. sin( angle_rad) new_y =  x *  math. sin( angle_rad)  +  y *  math. cos( angle_rad) return  new_x +  center[ 0 ] ,  center[ 1 ]  -  new_ypoints =  [ top_left,  top_right,  bottom_right,  bottom_left] return  [ coord for  point in  points for  coord in  point] 
def  img_to_array ( img, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) : width,  height =  img. sizepixel_data =  list ( img. getdata( ) ) return  [ pixel_data[ n: n+ width]  for  n in  range ( 0 ,  width* height,  width) ] 
def  calculate_average_color ( img_array, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) : x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4= x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4total_color =  [ 0 ,  0 ,  0 ,  0 ] count =  0 for  y in  range ( min ( y1,  y2,  y3,  y4) ,  max ( y1,  y2,  y3,  y4) ) : for  x in  range ( min ( x1,  x2,  x3,  x4) ,  max ( x1,  x2,  x3,  x4) ) : total_color =  [ total_color[ i]  +  img_array[ y] [ x] [ i]  for  i in  range ( 3 ) ] count +=  1 return  [ total //  count for  total in  total_color] 
def  generate_distinct_colors ( avg_color,  num_colors, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) : colors =  [ ] for  i in  range ( num_colors) : random_shift =  random. randint( 100 ,  200 )  +  i *  15   rgb =  tuple ( ( avg_color[ j]  +  random_shift)  %  256  for  j in  range ( 3 ) ) color =  rgb colors. append( color) return  colorsdef  color_regions ( img_array,  color,  colors, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) : x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4= x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4directions =  [ ( 0 ,  1 ) ,  ( 0 ,  - 1 ) ,  ( 1 ,  0 ) ,  ( - 1 ,  0 ) ] marked =  set ( ) region_count =  0 threshold =  0   def  color_distance ( c1,  c2) : return  ( ( c1[ 0 ]  -  c2[ 0 ] )  **  2  +  ( c1[ 1 ]  -  c2[ 1 ] )  **  2  +  ( c1[ 2 ]  -  c2[ 2 ] )  **  2 )  **  0.5 def  dfs ( x,  y,  new_color) : stack =  [ ( x,  y) ] while  stack: x,  y =  stack. pop( ) if  x <  min ( x1,  x2,  x3,  x4)  or  x >  max ( x1,  x2,  x3,  x4)  or  y <  min ( y1,  y2,  y3,  y4)  or  y >  max ( y1,  y2,  y3,  y4) : continue if  ( x,  y)  in  marked or  color_distance( img_array[ y] [ x] ,  color)  >  threshold: continue marked. add( ( x,  y) ) img_array[ y] [ x]  =  new_colorfor  dx,  dy in  directions: nx,  ny =  x +  dx,  y +  dyif  nx >=  0  and  ny >=  0  and  ny <  len ( img_array)  and  nx <  len ( img_array[ 0 ] ) : stack. append( ( nx,  ny) ) for  y in  range ( len ( img_array) ) : for  x in  range ( len ( img_array[ 0 ] ) ) : if  color_distance( img_array[ y] [ x] ,  color)  <=  threshold and  ( x,  y)  not  in  marked: dfs( x,  y,  colors[ region_count %  len ( colors) ] ) region_count +=  1 return  img_array
def  replace_color ( img_array,  target_color,  replace_color,  x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) : img_array =  np. array( img_array) x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4= x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4a =  np. zeros_like( img_array)  for  y in  range ( y1,  y3) : for  x in  range ( x1,  x2) : if  np. array_equal( img_array[ y] [ x] ,  target_color) : img_array[ y] [ x]  =  replace_colorif  ( a[ y] [ x]  ==  1 ) . all ( ) : continue if  y >  y1 and  not  np. array_equal( img_array[ y- 1 ] [ x] ,  target_color) : img_array[ y- 1 ] [ x]  =  replace_colora[ y- 1 ] [ x]  =  1 if  y <  y3 and  y+ 1 < 688  and  not  np. array_equal( img_array[ y+ 1 ] [ x] ,  target_color)  : img_array[ y+ 1 ] [ x]  =  replace_colora[ y+ 1 ] [ x]  =  1 if  x >  x1 and  not  np. array_equal( img_array[ y] [ x- 1 ] ,  target_color) : img_array[ y] [ x- 1 ]  =  replace_colora[ y] [ x- 1 ]  =  1 if  x <  x2 and  x< 1103  and  not  np. array_equal( img_array[ y] [ x+ 1 ] ,  target_color) : img_array[ y] [ x+ 1 ]  =  replace_colora[ y] [ x+ 1 ]  =  1 return  img_arrayclass  CreateData : def  __init__ ( self, file_num) : self. jay_img_paths= [ '/data/lh123/lh/verification_code/generate_data/点选文字背景_压缩/'  +  i for  i in  os. listdir( '/data/lh123/lh/verification_code/generate_data/点选文字背景_压缩/' ) ]  self. file_num= file_numself. img_save_path= '/data/lh123/lh/verification_code/generate_data/trian_多/'  self. label_save_path= '/data/lh123/lh/verification_code/generate_data/labels/'  self. test_path= '/data/lh123/lh/verification_code/generate_data/test/'  with  open ( file_path,  "r" ,  encoding= "utf-8" )  as  file : content =  file . read( ) self. songs =  list ( content) self. song2label= { song: i for  i, song in  enumerate ( self. songs) } self. label2song= { i: song for  i, song in  enumerate ( self. songs) } self. create_num= 1000 self. image_w= 1104 self. image_h= 688 self. max_iou= 0.01   def  create_folder ( self) : while  True : try : for  path in  [ self. img_save_path, self. label_save_path, self. test_path] : shutil. rmtree( path, ignore_errors= True ) os. makedirs( path, exist_ok= True ) break except : pass def  bbox_iou ( self, box2) : '''两两计算iou''' for  box1 in  self. tmp_boxes_boxs1: inter_x1= max ( [ box1[ 0 ] , box2[ 0 ] ] ) inter_y1= max ( [ box1[ 1 ] , box2[ 1 ] ] ) inter_x2= min ( [ box1[ 2 ] , box2[ 2 ] ] ) inter_y2= min ( [ box1[ 3 ] , box2[ 3 ] ] ) inter_area= ( inter_x2- inter_x1+ 1 )  *  ( inter_y2- inter_y1+ 1 ) box1_area= ( box1[ 2 ] - box1[ 0 ] + 1 )  *  ( box1[ 3 ] - box1[ 1 ] + 1 ) box2_area= ( box2[ 2 ] - box2[ 0 ] + 1 )  *  ( box2[ 3 ] - box2[ 1 ] + 1 ) iou= inter_area /  ( box1_area +  box2_area -  inter_area +  1e-16 ) if  iou >  self. max_iou: return  iouelse : return  0 
def  draw_text ( self,  image,  image_draw,  song, font_path) : self. font_path= font_pathiou =  np. infnum =  0 while  iou >  self. max_iou: if  num >=  3000 : break random_font_size =  np. random. randint( 110 ,  240 ) random_rotate =  np. random. randint( - 60 ,  60 ) random_x =  np. random. randint( 1 ,  1104 ,  1 ) random_y =  np. random. randint( 1 ,  688 ,  1 ) font =  ImageFont. truetype( self. font_path,  random_font_size) label =  self. song2label[ song] size_wh =  font. getsize( song) img =  Image. new( 'L' ,  size_wh) img_draw =  ImageDraw. Draw( img) img_draw. text( ( 0 ,  0 ) ,  song,  font= font,  fill= 255 ) img_rotate =  img. rotate( random_rotate,  resample= 2 ,  expand= True ) background_color =  image. getpixel( ( int ( random_x) ,  int ( random_y) ) ) font_color =  tuple ( ( np. array( background_color)  +  np. array( [ 128 ,  128 ,  128 ] ) )  %  256 ) img_color =  ImageOps. colorize( img_rotate,  ( 0 ,  0 ,  0 ) ,  font_color) w,  h =  img_color. sizexmin =  int ( random_x) ymin =  int ( random_y) if  random_x +  w >  self. image_w: xmin =  self. image_w -  w -  2 if  random_y +  h >  self. image_h: ymin =  self. image_h -  h -  2 xmax =  xmin +  wymax =  ymin +  ha= rotate_rectangle( ( xmin,  ymin) ,  ( xmax,  ymax) ,  random_rotate) boxes =  ( a[ 0 ] ,  a[ 1 ] ,  a[ 2 ] ,  a[ 3 ] , a[ 4 ] , a[ 5 ] , a[ 6 ] , a[ 7 ] ) boxes1 =  ( xmin,  ymin, xmax, ymax) iou =  self. bbox_iou( boxes1) fnt =  ImageFont. truetype( self. font_path,  15 ) if  not  fnt. getmask( song) : font_directory =  '/data/lh123/lh/verification_code/generate_data/fonts' font_files =  [ f for  f in  os. listdir( font_directory)  if  f. endswith( '.ttf' )  or  f. endswith( '.otf' )  or  f. endswith( '.ttc' )  or  f. endswith( '.TTF' )  or  f. endswith( '.OTF' )  or  f. endswith( '.TTC' ) ] random_font_file =  random. choice( font_files) self. font_path =  os. path. join( font_directory,  random_font_file) iou= 1 num +=  1 image. paste( img_color,  box= ( xmin,  ymin) ,  mask= img_rotate) return  image,  boxes,  label, boxes1, font_color, songdef  process ( self, boxes) :    '''将xmin,ymin,xmax,ymax转为x,y,w,h以及归一化坐标,生成label''' x1, y1, x2, y2= boxesx= ( ( x1+ x2) / 2 ) / self. image_wy= ( ( y1+ y2) / 2 ) / self. image_hw= ( x2- x1) / self. image_wh= ( y2- y1) / self. image_hreturn  [ x, y, w, h] def  main ( self) : '''主函数''' num= 1 for  i in  tqdm( range ( self. create_num) ) : self. font_color_list= [ ] random_song_num= np. random. randint( 4 , 6 )  random_jay_img_path= np. random. choice( self. jay_img_paths)  image= Image. open ( random_jay_img_path) . convert( 'RGB' ) . resize( ( self. image_w, self. image_h) ) image_draw= ImageDraw. Draw( image) boxes_list= [ ] label_list= [ ] self. tmp_boxes= [ ]  self. tmp_boxes_boxs1= [ ]  self. song_list= [ ] for  j in  range ( random_song_num) : song= np. random. choice( self. songs) font_directory =  '/data/lh123/lh/verification_code/generate_data/fonts' font_files =  [ f for  f in  os. listdir( font_directory)  if  f. endswith( '.ttf' )  or  f. endswith( '.otf' )  or  f. endswith( '.ttc' )  or  f. endswith( '.TTF' )  or  f. endswith( '.OTF' )  or  f. endswith( '.TTC' ) ] random_font_file =  random. choice( font_files) self. font_path =  os. path. join( font_directory,  random_font_file) image, boxes, label, boxes1, font_color, self. song= self. draw_text( image, image_draw, song, self. font_path) self. font_color_list. append( font_color) self. tmp_boxes. append( boxes) self. tmp_boxes_boxs1. append( boxes1) self. song_list. append( song) boxes_list. append( boxes) label_list. append( label) image_filename= self. img_save_path+ f'image { self. file_num* 1000 + num} .jpg' if  i <  self. create_num else  self. test_path+ f'test { i} .png' = self. label_save_path+ f'image { num} .txt' if  i <  self. create_num else  self. test_path+ f'test { i} .txt' = num+ 1 random_num =  random. randint( 0 ,  2 )  if ( random_num!= 0 ) : for  i in  range ( random_num) : x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4 =  boxes_list[ i] [ 0 ] , boxes_list[ i] [ 1 ] , boxes_list[ i] [ 2 ] , boxes_list[ i] [ 3 ] , boxes_list[ i] [ 4 ] , boxes_list[ i] [ 5 ] , boxes_list[ i] [ 6 ] , boxes_list[ i] [ 7 ] img_array =  img_to_array( image, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) avg_color =  calculate_average_color( img_array, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4)   colors =  generate_distinct_colors( avg_color,  6 , x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) f_color= self. font_color_list[ i] + ( 255 , ) f_color_list =  list ( self. font_color_list[ i] ) f_color_nup =  np. array( f_color_list) img_array= replace_color( img_array,  f_color_nup,  f_color_nup, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) img_array= replace_color( img_array,  f_color_nup,  f_color_nup, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) new_img_array =  color_regions( img_array,  f_color,  colors, x1,  y1,  x2,  y2,  x3,  y3,  x4,  y4) image =  Image. fromarray( np. uint8( new_img_array) ) image. save( image_filename, format = 'JPEG' ) f. write( f'labels/ { image_filename} \t[' ) number= 0 for  k in  range ( len ( label_list) ) : f. write( f'{{"transcription":" { self. song_list[ k] } ","points":[[ { int ( boxes_list[ k] [ 0 ] ) } , { int ( boxes_list[ k] [ 1 ] ) } ],[ { int ( boxes_list[ k] [ 2 ] ) } , { int ( boxes_list[ k] [ 3 ] ) } ],[ { int ( boxes_list[ k] [ 4 ] ) } , { int ( boxes_list[ k] [ 5 ] ) } ],[ { int ( boxes_list[ k] [ 6 ] ) } , { int ( boxes_list[ k] [ 7 ] ) } ]]}}' ) if ( number!= ( len ( label_list) - 1 ) ) : f. write( f',' ) number= number+ 1 f. write( f']\n' ) def  im_process ( file_num) : creator= CreateData( file_num) creator. main( ) with  open ( "process.pid" ,  "w" )  as  ij: ij. write( str ( os. getpid( ) ) ) 
my_list =  list ( range ( 0 ,  501 ) ) txt_file= '/data/lh123/lh/verification_code/generate_data/train_多.txt' 
with  open ( txt_file, 'w' )  as  f: with  Pool( processes= os. cpu_count( ) )  as  t: results =  t. map ( im_process, my_list)