'make cuda-convnet batches from images in the input dir; start numbering batches from 7' import os import sys import numpy as np import cPickle as pickle import random from natsort import natsorted from PIL import Image from PIL import ImageOps from PIL import ImageFilter def process( image, filtre ): if filtre == 1: #Application du premier filtre de taille 4 im2 = image.filter(ImageFilter.Kernel((5,5),(-1, 2, -2, 2, -1, 2, -6, 8, -6, 2, -2, 8, -12, 8, -2, 2, -6, 8, -6, 2, -1, 2, -2, 2, -1),scale = 0, offset = 0)) #Transformation de l'image en tableau im2 = np.array( im2, dtype=np.float64 ) #normalisation, on divise par 12 tous les pixels obtenus for i in range(np.alen( im2 )): for j in range(np.alen( im2 )): im2[i][j] = im2[i][j]/12.0 im2 = im2.reshape( -1 ) return im2 else: #Transformation de l'image en tableau image = np.array( image ) image = image.reshape( -1 ) return image #On recupere les batchs avec leur numero et leur chemin def get_batch_path( output_dir, number ): filename = "data_batch_{}".format( number ) return os.path.join( output_dir, filename ) #Creation des batch vide de la taille voulu def get_empty_batch( size ): return np.zeros(( size * size, 0 ), dtype = np.float64 ) #Ecriture des batch avec attribution des labels def write_batch( path, batch ): print "writing {}...\n".format( path ) test = False labels = [ 1 for x in range( batch.shape[1] ) ] for x in range( batch.shape[1] ): if test: labels[x] = 1 test = False else: labels[x] = 0 test = True #print labels d = { 'labels': labels, 'data': batch } pickle.dump( d, open( path, "wb" ), 1) def main(): input_dir = sys.argv[1] #Chemin des images cover input_dir_s = sys.argv[2] #Chemin des images stego output_dir = sys.argv[3] #Chemin de la sortie image_size = int ( sys.argv[4] )#Taille des images filtre = int ( sys.argv[5] ) #Application du filtre 0/1 name = open(output_dir+'name.txt', 'a') batch_counter = 1 #Compteur des batchs batch_size = 2000 #Taille des batchs print "reading file names..." names = [ d for d in os.listdir( input_dir_s ) if d.endswith( '.pgm') ] #On recupere les noms des images dans le dossier #names = natsorted( names, reverse = True) #reverse = True pour inverser le contenu du tableau #random.shuffle(names) current_batch = get_empty_batch( image_size ) counter = 1 for n in names: #Parcours des images name.write(n + "\n") image = Image.open( os.path.join( input_dir, n )) image_s = Image.open( os.path.join( input_dir_s, n)) try: image = process( image, filtre ) image_s = process( image_s, filtre ) except ValueError: print "problem with image {}".format( n ) sys.exit( 1 ) image = image.reshape( -1, 1 ) image_s = image_s.reshape( -1, 1 ) current_batch = np.hstack(( current_batch, image )) #On stocke les images qui vont aller dans le batch current_batch = np.hstack(( current_batch, image_s )) if current_batch.shape[1] == batch_size: #Si on a atteint la taille du batch, on recupere le batch et on l'ecrit batch_path = get_batch_path( output_dir, batch_counter ) write_batch( batch_path, current_batch ) batch_counter += 1 current_batch = get_empty_batch( image_size ) counter += 1 if counter % 1000 == 0: print n if __name__ == '__main__': main()