'make cuda-convnet batches from images in the input dir; start numbering batches from 7'

import os
import sys
import numpy as np
import cPickle as pickle
import random
from natsort import natsorted
from PIL import Image
from PIL import ImageOps
from PIL import ImageFilter

def process( image, filtre ):
        if filtre == 1:
        #Application du premier filtre de taille 4
                im2 = image.filter(ImageFilter.Kernel((5,5),(-1, 2, -2, 2, -1, 2, -6, 8, -6, 2, -2, 8, -12, 8, -2, 2, -6, 8, -6, 2, -1, 2, -2, 2, -1),scale = 0, offset = 0))
	#Transformation de l'image en tableau
                im2 = np.array( im2, dtype=np.float64 )
	#normalisation, on divise par 12 tous les pixels obtenus
                for i in range(np.alen( im2 )):
                        for j in range(np.alen( im2 )):
                                im2[i][j] = im2[i][j]/12.0
                im2 = im2.reshape( -1 )
                return im2
	else:
	#Transformation de l'image en tableau	
		image = np.array( image )           
		image = image.reshape( -1 )         
		return image

#On recupere les batchs avec leur numero et leur chemin
def get_batch_path( output_dir, number ):
	filename = "data_batch_{}".format( number )
	return os.path.join( output_dir, filename )

#Creation des batch vide de la taille voulu
def get_empty_batch( size ):	
	return np.zeros(( size * size, 0 ), dtype = np.float64 )

#Ecriture des batch avec attribution des labels	
def write_batch( path, batch ):
	print "writing {}...\n".format( path )
	test = False
	labels = [ 1 for x in range( batch.shape[1] ) ]
	for x in range( batch.shape[1] ):
		if test:
			labels[x] = 1
			test = False
		else:
			labels[x] = 0
			test = True
	#print labels
	d = { 'labels': labels, 'data': batch }
	pickle.dump( d, open( path, "wb" ), 1)
	
def main():
	input_dir = sys.argv[1]		#Chemin des images cover
	input_dir_s = sys.argv[2]	#Chemin des images stego
	output_dir = sys.argv[3]	#Chemin de la sortie
	image_size = int ( sys.argv[4] )#Taille des images
	filtre = int ( sys.argv[5] )	#Application du filtre 0/1
        name = open(output_dir+'name.txt', 'a')
        batch_counter = 1		#Compteur des batchs
	
        batch_size = 2000		#Taille des batchs
	
	print "reading file names..."
        names = [ d for d in os.listdir( input_dir_s ) if d.endswith( '.pgm') ] #On recupere les noms des images dans le dossier
        #names = natsorted( names, reverse = True) #reverse = True pour inverser le contenu du tableau
        #random.shuffle(names)

	current_batch = get_empty_batch( image_size )
        counter = 1
	
	for n in names:							#Parcours des images
                name.write(n + "\n")
                image = Image.open( os.path.join( input_dir, n ))
                image_s = Image.open( os.path.join( input_dir_s, n))
                try:
                        image = process( image, filtre )
                        image_s = process( image_s, filtre )
                except ValueError:
                        print "problem with image {}".format( n )
                        sys.exit( 1 )

                image = image.reshape( -1, 1 )
                image_s = image_s.reshape( -1, 1 )

                current_batch = np.hstack(( current_batch, image ))	#On stocke les images qui vont aller dans le batch
                current_batch = np.hstack(( current_batch, image_s ))
		
                if current_batch.shape[1] == batch_size:		#Si on a atteint la taille du batch, on recupere le batch et on l'ecrit
                        batch_path = get_batch_path( output_dir, batch_counter )
                        write_batch( batch_path, current_batch )

                        batch_counter += 1
                        current_batch = get_empty_batch( image_size )

                counter += 1
                if counter % 1000 == 0:
                        print n
	

if __name__ == '__main__':
	main()