class Colorize(object):
def __init__(self, model_dir='data'):
self.font_color = FontColor(col_file=osp.join(model_dir,'models/colors_new.cp'))
self.p_bevel = 0.05 self.p_outline = 0.05 self.p_drop_shadow = 0.15 self.p_border = 0.15 self.p_displacement = 0.30 self.p_texture = 0.0
def drop_shadow(self, alpha, theta, shift, size, op=0.80): """ 给输入的 alpha 图像添加投影效果,并返回带有投影效果的图像。投影的效果由参数 alpha、theta、shift、size 和 op 来控制。 alpha : alpha layer whose shadow need to be cast theta : [0,2pi] -- the shadow direction shift : shift in pixels of the shadow size : size of the GaussianBlur filter op : opacity of the shadow (multiplying factor)
@return : alpha of the shadow layer (it is assumed that the color is black/white) """ if size%2==0: size -= 1 size = max(1,size) shadow = cv.GaussianBlur(alpha,(size,size),0) [dx,dy] = shift * np.array([-np.sin(theta), np.cos(theta)]) shadow = op*sii.shift(shadow, shift=[dx,dy],mode='constant',cval=0) return shadow.astype('uint8')
def border(self, alpha, size, kernel_type='RECT'): """ alpha : alpha layer of the text size : size of the kernel kernel_type : one of [rect,ellipse,cross]
@return : alpha layer of the border (color to be added externally). """ kdict = {'RECT':cv.MORPH_RECT, 'ELLIPSE':cv.MORPH_ELLIPSE, 'CROSS':cv.MORPH_CROSS} kernel = cv.getStructuringElement(kdict[kernel_type],(size,size)) border = cv.dilate(alpha,kernel,iterations=1) return border
def blend(self,cf,cb,mode='normal'): """ 在这个方法中,函数只返回了前景图像。这表明该函数还没有完成或者是开发者忘记编写具体的合成算法。 """ return cf
def merge_two(self,fore,back,blend_type=None): """ merge two FOREground and BACKground layers. ref: https://en.wikipedia.org/wiki/Alpha_compositing ref: Chapter 7 (pg. 440 and pg. 444): http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf """ a_f = fore.alpha/255.0 a_b = back.alpha/255.0 c_f = fore.color c_b = back.color a_r = a_f + a_b - a_f*a_b if blend_type != None: c_blend = self.blend(c_f, c_b, blend_type) c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b + ((1-a_b)*a_f)[:,:,None] * c_f + (a_f*a_b)[:,:,None] * c_blend ) else: c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b + a_f[:,:,None]*c_f ) return Layer((255*a_r).astype('uint8'), c_r.astype('uint8'))
def merge_down(self, layers, blends=None): """ 将多个图层逐层合并成单个图层 layers : [l1,l2,...ln] : a list of LAYER objects. l1 is on the top, ln is the bottom-most layer. blend : the type of blend to use. Should be n-1. use None for plain alpha blending. Note : (1) it assumes that all the layers are of the SAME SIZE. @return : a single LAYER type object representing the merged-down image """ nlayers = len(layers) if nlayers > 1: [n,m] = layers[0].alpha.shape[:2] out_layer = layers[-1] for i in range(-2,-nlayers-1,-1): blend=None if blends is not None: blend = blends[i+1] out_layer = self.merge_two(fore=layers[i], back=out_layer,blend_type=blend) return out_layer else: return layers[0]
def resize_im(self, im, osize): return np.array(Image.fromarray(im).resize(osize[::-1], Image.BICUBIC)) def occlude(self): """ somehow add occlusion to text. 这个方法 occlude() 是一个占位方法,还未实现其具体功能。
根据注释中的描述,该方法的目的是向文本中添加遮挡效果。然而,在代码中该方法没有任何实现,只有一个空的 pass 语句。这意味着在当前的代码实现中,该方法没有具体的功能。
如果你希望实现该方法,你可以根据具体需求和设计思路,编写代码来实现添加遮挡效果的逻辑。例如,可以使用图像处理技术在文本区域上添加遮挡元素,或者通过修改文本的视觉特征来模拟遮挡效果。具体的实现方式取决于你的需求和想要实现的效果。 """ pass
def color_border(self, col_text, col_bg): """ 用于确定边框的颜色的选择逻辑 Decide on a color for the border: - could be the same as text-color but lower/higher 'VALUE' component. 边框颜色与文本颜色相同,但是 'VALUE' 分量较低或较高 - could be the same as bg-color but lower/higher 'VALUE'. 边框颜色与背景颜色相同,但是 'VALUE' 分量较低或较高 - could be 'mid-way' color b/w text & bg colors. 边框颜色为文本颜色和背景颜色之间的中间颜色。 """ choice = np.random.choice(3) col_text = cv.cvtColor(col_text, cv.COLOR_RGB2HSV) col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]),3)) col_text = np.mean(col_text,axis=0).astype('uint8') vs = np.linspace(0,1) def get_sample(x): """ 通过计算与目标值 x/255.0 的差距,选择一个随机样本,并在其上加入一定的随机扰动。返回取样结果乘以 255,得到一个颜色分量值。 """ ps = np.abs(vs - x/255.0) ps /= np.sum(ps) v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1) return 255*v_rand
if choice==0: col_text[0] = get_sample(col_text[0]) col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) elif choice==1: col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) col_text = self.font_color.complement(col_text) else: col_bg = cv.cvtColor(col_bg, cv.COLOR_RGB2HSV) col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]),3)) col_bg = np.mean(col_bg,axis=0).astype('uint8') col_bg = np.squeeze(cv.cvtColor(col_bg[None,None,:],cv.COLOR_HSV2RGB)) col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) col_text = self.font_color.triangle_color(col_text,col_bg)
col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_RGB2HSV)) col_text[2] = get_sample(col_text[2]) return np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
def color_text(self, text_arr, h, bg_arr): """ 用于确定文本的颜色。具体而言,该方法采用以下几种方式之一来选择文本的颜色 Decide on a color for the text: - could be some other random image. 从其他随机图像中选择一个颜色作为文本的颜色 - could be a color based on the background. 根据背景选择一个颜色 this color is sampled from a dictionary built from text-word images' colors. The VALUE channel is randomized. 这个颜色是从建立在文本单词图像颜色上的字典中进行采样的。其中,颜色的亮度通道 (VALUE) 是随机化的
H : minimum height of a character """ bg_col,fg_col,i = 0,0,0 fg_col,bg_col = self.font_color.sample_from_data(bg_arr) return Layer(alpha=text_arr, color=fg_col), fg_col, bg_col
def process(self, text_arr, bg_arr, min_h): """ 用于将文本图层 text_arr 融合到背景图像 bg_arr 上 text_arr : one alpha mask : nxm, uint8 bg_arr : background image: nxmx3, uint8 min_h : height of the smallest character (px)
return text_arr blit onto bg_arr. """ l_text, fg_col, bg_col = self.color_text(text_arr, min_h, bg_arr) bg_col = np.mean(np.mean(bg_arr,axis=0),axis=0) l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'),color=bg_col) l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1*np.random.randn(), 0.72, 1.0) layers = [l_text] blends = []
if np.random.rand() < self.p_border: if min_h <= 15 : bsz = 1 elif 15 < min_h < 30: bsz = 3 else: bsz = 5 border_a = self.border(l_text.alpha, size=bsz) l_border = Layer(border_a, self.color_border(l_text.color,l_bg.color)) layers.append(l_border) blends.append('normal')
if np.random.rand() < self.p_drop_shadow: if min_h <= 15 : bsz = 1 elif 15 < min_h < 30: bsz = 3 else: bsz = 5
theta = np.pi/4 * np.random.choice([1,3,5,7]) + 0.5*np.random.randn()
if min_h <= 15 : shift = 2 elif 15 < min_h < 30: shift = 7+np.random.randn() else: shift = 15 + 3*np.random.randn()
op = 0.50 + 0.1*np.random.randn() shadow = self.drop_shadow(l_text.alpha, theta, shift, 3*bsz, op) l_shadow = Layer(shadow, 0) layers.append(l_shadow) blends.append('normal') l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_col) layers.append(l_bg) blends.append('normal') l_normal = self.merge_down(layers,blends) l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_arr) l_out = blit_images(l_normal.color,l_bg.color.copy()) if l_out is None: layers[-1] = l_bg return self.merge_down(layers,blends).color
return l_out
def check_perceptible(self, txt_mask, bg, txt_bg): """ 这是一个被弃用的方法 check_perceptible,它用于检查文本与背景图像合并后是否仍然可见。 --- DEPRECATED; USE GRADIENT CHECKING IN POISSON-RECONSTRUCT INSTEAD ---
checks if the text after merging with background is still visible. txt_mask (hxw) : binary image of text -- 255 where text is present 0 elsewhere bg (hxwx3) : original background image WITHOUT any text. txt_bg (hxwx3) : image with text. """ bgo,txto = bg.copy(), txt_bg.copy() txt_mask = txt_mask.astype('bool') bg = cv.cvtColor(bg.copy(), cv.COLOR_RGB2Lab) txt_bg = cv.cvtColor(txt_bg.copy(), cv.COLOR_RGB2Lab) bg_px = bg[txt_mask,:] txt_px = txt_bg[txt_mask,:] bg_px[:,0] *= 100.0/255.0 txt_px[:,0] *= 100.0/255.0
diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1) diff = np.percentile(diff,[10,30,50,70,90]) print ("color diff percentile :", diff) return diff, (bgo,txto)
def color(self, bg_arr, text_arr, hs, place_order=None, pad=20): """ 将文本图像着色 Return colorized text image.
text_arr : list of (n x m) numpy text alpha mask (unit8). hs : list of minimum heights (scalar) of characters in each text-array. text_loc : [row,column] : location of text in the canvas. canvas_sz : size of canvas image. return : nxmx3 rgb colorized text-image. """ bg_arr = bg_arr.copy() if bg_arr.ndim == 2 or bg_arr.shape[2]==1: bg_arr = np.repeat(bg_arr[:,:,None], 3, 2)
canvas_sz = np.array(bg_arr.shape[:2])
if place_order is None: place_order = np.array(range(len(text_arr)))
rendered = [] for i in place_order[::-1]: loc = np.where(text_arr[i]) lx, ly = np.min(loc[0]), np.min(loc[1]) mx, my = np.max(loc[0]), np.max(loc[1]) l = np.array([lx,ly]) m = np.array([mx,my])-l+1 text_patch = text_arr[i][l[0]:l[0]+m[0],l[1]:l[1]+m[1]]
ext = canvas_sz - (l+m) num_pad = pad*np.ones(4,dtype='int32') num_pad[:2] = np.minimum(num_pad[:2], l) num_pad[2:] = np.minimum(num_pad[2:], ext) text_patch = np.pad(text_patch, pad_width=((num_pad[0],num_pad[2]), (num_pad[1],num_pad[3])), mode='constant') l -= num_pad[:2]
w,h = text_patch.shape bg = bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:]
rdr0 = self.process(text_patch, bg, hs[i]) rendered.append(rdr0)
bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:] = rdr0
""" 需要注意的是,当前代码中存在一个问题,即在 for 循环中的最后一行使用了 return,导致循环只执行一次。可能是由于代码缩进错误导致的。如果确实需要返回结果,则应将该行移动到 for 循环结束后再执行。 """ return bg_arr
return bg_arr
|