📑 Data Building. Practice Project 1_0: Letter Detection
✒️ Code Modules & Settings
xxxxxxxxxx
#!python3 -m pip install tensorflow==2.6.0 \
#--user --quiet --no-warn-script-location
path='/home/sc_work/.sage/local/lib/python3.9/site-packages'
import sys,warnings; sys.path.append(path)
warnings.filterwarnings('ignore')
import os,zipfile,h5py,cv2,urllib,requests
import numpy as np,tensorflow as tf
file_path='https://raw.githubusercontent.com/'+\
'OlgaBelitskaya/data/main/letters/'
img_size=int(2048)
file_pre,file_list,file_name='',[],''; lc,uc=0,0
tf_img,img,gray_img,edges=[],[],[],[]
names=[['lowercase','uppercase'],
[s for s in u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'],
['single-colored paper','striped paper',
'squared paper','graph paper']]
✒️ Image Loading
xxxxxxxxxx
def tf_get_img(file_name,file_path=file_path,img_size=img_size):
input_file=urllib.request.urlopen(file_path+file_name)
output_file=open(file_name,'wb')
output_file.write(input_file.read())
output_file.close(); input_file.close()
img=tf.io.read_file(file_name)
img=tf.image.decode_image(img,channels=3)
img=tf.image.convert_image_dtype(img,tf.float32)
shape=tf.cast(tf.shape(img)[:-int(1)],tf.float32)
new_shape=tf.cast(shape*img_size/max(shape),tf.int32)
img=tf.image.resize(img,new_shape)
return img[tf.newaxis,:]
def cv_get_img(file_name,lower_canny,upper_canny,
file_path=file_path,img_size=img_size):
input_file=urllib.request.urlopen(file_path+file_name)
output_file=open(file_name,'wb')
output_file.write(input_file.read())
output_file.close(); input_file.close()
img=cv2.imread(file_name)
gray_img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
edges=cv2.Canny(gray_img,int(lower_canny),int(upper_canny))
cv2.waitKey(int(0))
return img/255.,gray_img,edges
xxxxxxxxxx
layout=dict(top=[['lower_upper','letter','background'], (
['lower_canny','upper_canny']]))
def _get_params(lower_upper=selector(names[0],default='uppercase'),
letter=selector(names[1],default='б'),
background=selector(names[2],
default='single-colored paper'),
lower_canny=selector([10,20,..,210],default=20),
upper_canny=selector([10,20,..,210],default=120)):
global file_pre,file_list,file_name,\
tf_img,img,gray_img,edges,lc,uc
file_pre='%02d_'%names[0].index(lower_upper)
file_pre+='%02d_'%names[1].index(letter)
file_pre+='%02d_'%names[2].index(background)
print('file prefix: '+file_pre)
file_list=np.array([file_pre+'%02d'%(i+1)+'.jpg'
for i in range(4)])
file_name=file_list[0]
lc,uc=lower_canny,upper_canny
print('[lower_canny,upper_canny] = %s'%str([lc,uc]))
tf_img=tf_get_img(file_name)
img,gray_img,edges=cv_get_img(file_name,lc,uc)
for el in [tf_img.numpy()[int(0)],img,gray_img,edges]:
matrix_plot(el,figsize=(5,7)).show()
✒️ Contour Detection
xxxxxxxxxx
def get_closed_img(edges,kernel):
kernel=cv2.getStructuringElement(
cv2.MORPH_RECT,(int(kernel),int(kernel)))
closed_edges=cv2.morphologyEx(edges,cv2.MORPH_CLOSE,kernel)
cv2.waitKey(int(0))
return closed_edges
def get_contours(gray_img,closed_edges):
(contours, _)=cv2.findContours(
closed_edges.copy(),cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
for c in contours:
arc=cv2.arcLength(c,True)
approx_pdp=cv2.approxPolyDP(c,float(.02)*arc,True)
contour_img=cv2.drawContours(
gray_img,[approx_pdp],-int(1),
(int(0),int(255),int(0)),int(2))
cv2.waitKey(int(0))
return contours,contour_img
kernel,edge_kernel,closed_edges,contours,contour_img=0,0,[],[],[]
layout=dict(top=[['select_kernel']])) (
def _get_kernel(select_kernel=selector([3,5,..,17],default=7)):
global kernel,edge_kernel,closed_edges,contours,contour_img
edge_kernel=select_kernel
closed_edges=get_closed_img(edges,edge_kernel)
contours,contour_img=get_contours(gray_img,closed_edges)
for el in [closed_edges,contour_img]:
matrix_plot(el,figsize=(5,7)).show()
✒️ Data Building
xxxxxxxxxx
def get_imgs(contours,img_size_out=int(32)):
imgs=[]
for c in contours:
x,y,w,h=cv2.boundingRect(c)
m=np.random.randint(int(7),int(21))
if (w>int(30) and w<int(250) and h>int(30) and h<int(250)):
y1=int(y*(int(1)-float(.0015)*m))
y2=int((y+h)*(int(1)+float(.0005)*m))
x1=int(x*(int(1)-float(.001)*m))
x2=int((x+w)*(int(1)+.001*m))
new_img=img[y1:y2,x1:x2]
if min(new_img.shape[int(0)],
new_img.shape[int(1)])>int(30):
new_img=cv2.resize(new_img,
(img_size_out,img_size_out))
imgs.append(new_img)
return np.array(imgs)
imgs=get_imgs(contours); n_img=int(7)
layout=dict(top=[['image_number']])) (
def _get_images(image_number=selector([2,3,..,10],default=7)):
global imgs
if len(imgs)<49: image_number=int(2)
ga=[[matrix_plot(imgs[i+image_number*j])
for i in range(image_number)] for j in range(image_number)]
graphics_array(ga).show(frame=False)
✒️ Data ZIP Storing
xxxxxxxxxx
file_list_out=[]; file_name_zip=file_pre[:-1]+'.zip'
def get_zip(imgs,start=0):
global file_name_zip,file_pre,file_list_out
current_list_out=[]
for i in range(len(imgs)):
idx=start+i
file_name_img=file_pre+'%04d'%idx+'.png'
cv2.imwrite(file_name_img,imgs[i]*255.)
if (file_name_zip not in os.listdir()):
with zipfile.ZipFile(file_name_zip,'w') as f:
f.write(file_name_img); f.close()
else:
with zipfile.ZipFile(file_name_zip,'a') as f:
f.write(file_name_img); f.close()
os.remove(file_name_img)
current_list_out+=[file_name_img]
file_list_out+=current_list_out
return file_list_out
file_list_out=get_zip(imgs)
xxxxxxxxxx
with zipfile.ZipFile(file_name_zip) as z:
with open(file_list_out[-1],'wb') as f:
f.write(z.read(file_list_out[-1]))
example_img=cv2.imread(file_list_out[-1])
matrix_plot(example_img/255,figsize=(3,3)).show()
✒️ Data H5 Storing
xxxxxxxxxx
file_name_h5=file_pre[:-1]+'.h5'
lbls=[[int(file_pre[:2]),int(file_pre[3:5]),int(file_pre[6:8])]
for i in range(len(imgs))]
imgs=np.array(imgs,dtype='float32')
lbls=np.array(lbls,dtype='int32')
print(imgs.shape,lbls.shape)
maxlen=max([max([len(n) for n in names[i]])
for i in range(len(names))])
nms=[np.array([np.string_(name.encode('utf-8'))
for name in names[i]],dtype='S%d'%maxlen)
for i in range(len(names))]
with h5py.File(file_name_h5,'w') as f:
f.create_dataset('images',data=imgs,compression='gzip')
f.create_dataset('labels',data=lbls,compression='gzip')
for i in range(len(names)):
f.create_dataset('names%d'%(i+1),data=nms[i],
dtype='S%d'%maxlen,compression='gzip')
f.close()
xxxxxxxxxx
with h5py.File(file_name_h5,'r') as f:
keys=list(f.keys())
print('file keys: '+', '.join(keys))
images=np.array(f[keys[0]]); labels=np.array(f[keys[1]])
names=[[el.decode('utf-8') for el in f[keys[i]]]
for i in range(2,len(keys))]
f.close()
ti=[names[i][labels[0,i]] for i in range(3)]
matrix_plot(images[0],title=str(ti),figsize=(3,3)).show()
✒️ ZIP Storing of All Images with the Same Labels
xxxxxxxxxx
if (file_name_zip in os.listdir()): os.remove(file_name_zip)
start=0; file_list_out=[]
letter_names=(
[['name %d'%i,[np.string_(name.encode('utf-8'))
for name in names[i]]] for i in range(len(names))])
np.savetxt('letter_names.txt',[str(letter_names)],fmt='%s')
print('letter labels: '+letter_names[1][1][0].decode('utf-8')+\
'-'+letter_names[1][1][32].decode('utf-8'))
with zipfile.ZipFile(file_name_zip,'w') as f:
f.write('letter_names.txt')
def file_exist(file_name,file_path=file_path):
r=requests.head(file_path+file_name)
exist={'True':'exists','False':'does not exist'}
print(file_path+file_name,exist[str(r.status_code==int(200))])
return r.status_code==int(200)
for f in file_list:
if file_exist(f):
img,gray_img,edges=cv_get_img(f,lc,uc)
closed_edges=get_closed_img(edges,edge_kernel)
contours,contour_img=get_contours(gray_img,closed_edges)
imgs=get_imgs(contours)
file_list_out=get_zip(imgs=imgs,start=start)
start=int(file_list_out[-1][-7:-4])+1
else: break
file_list_out[-1]
xxxxxxxxxx
layout=dict(top=[['image_step']])) (
def _get_examples(image_step=selector([10,20,..,100],default=10)):
global file_name_zip,file_list_out
with zipfile.ZipFile(file_name_zip) as z:
for i in range(3):
current_file_name=file_list_out[-1-image_step*i]
with open(current_file_name,'wb') as f:
f.write(z.read(current_file_name))
example_img=cv2.imread(current_file_name)
matrix_plot(example_img/255,title=current_file_name,
figsize=(3,3)).show()
z.close()
✒️ H5 Storing of All Images with the Same Labels
xxxxxxxxxx
file_name_h5=file_pre[:-1]+'.h5'; imgs,lbls=[],[]
def file_exist(file_name,file_path=file_path):
r=requests.head(file_path+file_name)
exist={'True':'exists','False':'does not exist'}
print(file_path+file_name,exist[str(r.status_code==int(200))])
return r.status_code==int(200)
for f in file_list:
if file_exist(f):
img,gray_img,edges=cv_get_img(f,lc,uc)
closed_edges=get_closed_img(edges,edge_kernel)
contours,contour_img=get_contours(gray_img,closed_edges)
imgs0=get_imgs(contours); imgs.append(imgs0)
lbls.append([[int(file_pre[:2]),int(file_pre[3:5]),
int(file_pre[6:8])]
for i in range(len(imgs0))])
imgs=np.array(np.vstack(imgs),dtype='float32')
lbls=np.array(np.vstack(lbls),dtype='int32')
print(imgs.shape,lbls.shape)
maxlen=max([max([len(n) for n in names[i]])
for i in range(len(names))])
nms=[np.array([np.string_(name.encode('utf-8'))
for name in names[i]],
dtype='S%d'%maxlen)
for i in range(len(names))]
with h5py.File(file_name_h5,'w') as f:
f.create_dataset('images',data=imgs,compression='gzip')
f.create_dataset('labels',data=lbls,compression='gzip')
for i in range(len(names)):
f.create_dataset('names%d'%(i+1),data=nms[i],
dtype='S%d'%maxlen,compression='gzip')
f.close()
xxxxxxxxxx
with h5py.File(file_name_h5,'r') as f:
keys=list(f.keys())
print('file keys: '+', '.join(keys))
images=np.array(f[keys[0]]); labels=np.array(f[keys[1]])
names=[[el.decode('utf-8') for el in f[keys[i]]]
for i in range(2,len(keys))]
f.close()
randi=randint(0,images.shape[0]-1)
ti=[names[i][labels[randi,i]] for i in range(3)]
matrix_plot(images[randi],title='n: %d =>'%randi+str(ti),
figsize=(3,3)).show()
No comments:
Post a Comment