bindata2dctbindata.cc

  1 const char *help = "\
  2 progname: bindata2dctbindata.cc\n\
  3 code2html: This program reads a bindata and decomposes it in DCT/DCTmod2 blocks.\n\
  4 version: Torch3 vision2.0, 2004-2005\n\
  5 (c) Sebastien Marcel (marcel@idiap.ch)\n";
  6 
  7 #include "ImageGray.h"
  8 #include "ipDCT2D.h"
  9 #include "ipBlock.h"
 10 #include "ipDCTmod2.h"
 11 #include "DiskXFile.h"
 12 #include "CmdLine.h"
 13 
 14 using namespace Torch;
 15 
 16 // Allowed dimension for DCT mod2
 17 const int allowed_dim[] = {6, 10, 15, 21, 28, 36, 43, 49, 54, 58, 61, 63, 64};
 18 
 19 int main(int argc, char *argv[])
 20 {
 21 	char *filename_in;
 22 	int width;
 23 	int height;
 24 	char *filename_out;
 25 	int dct_dim;
 26 	int delta_neighbour;
 27 	int delta_keep;
 28 	int block_size;
 29 	int block_overlap;
 30 	bool block_to_frame;
 31 	bool dctmod2;
 32 	bool force;
 33 	bool verbose;
 34 	bool display;
 35 	bool xy;
 36 	bool xyIsEmbedded;
 37 	bool blocknorm;
 38 	bool unnorm;
 39 	
 40 	CmdLine cmd;
 41 	cmd.setBOption("write log", false);
 42 	cmd.info(help);
 43 	cmd.addText("\nArguments:");
 44 	cmd.addSCmdArg("imagefile in", &filename_in, "image file in");
 45 	cmd.addICmdArg("width", &width, "width");
 46 	cmd.addICmdArg("height", &height, "height");
 47 	cmd.addSCmdArg("output filename", &filename_out, "output bindata filename");
 48 	cmd.addText("\nBlock decomposition Options:");
 49 	cmd.addBCmdOption("-blocktoframe", &block_to_frame, false, "a block becomes a frame");
 50 	cmd.addICmdOption("-block", &block_size, 8, "block size");
 51 	cmd.addICmdOption("-overlap", &block_overlap, 4, "block overlap");
 52 	cmd.addText("\nDCT Options:");
 53 	cmd.addICmdOption("-dctdim", &dct_dim, 15, "DCT 2D dimension");
 54 	cmd.addICmdOption("-delta", &delta_neighbour, 1, "delta neighbour");
 55 	cmd.addICmdOption("-deltakeep", &delta_keep, 3, "delta keep");
 56 	cmd.addBCmdOption("-dctmod2", &dctmod2, false, "apply dctmod2");
 57 	cmd.addBCmdOption("-force", &force, false, "force dctmod2 for blocks larger that 8x8");
 58 	cmd.addText("\nExtra Options:");
 59 	cmd.addBCmdOption("-xy", &xy, false, "add xy coordinates of blocks");
 60 	cmd.addBCmdOption("-xyprovided", &xyIsEmbedded, false, "xy coordinates are provided");
 61 	cmd.addBCmdOption("-blocknorm", &blocknorm, false, "mean/stdv normalize blocks");
 62 	cmd.addBCmdOption("-unnorm", &unnorm, false, "unnormalize input image");
 63 	cmd.addText("\nMisc Options:");
 64 	cmd.addBCmdOption("-display", &display, false, "display dct features");
 65 	cmd.addBCmdOption("-verbose", &verbose, false, "verbose");
 66 	cmd.read(argc, argv);
 67 	
 68 	//
 69 	if(verbose) print("DCT 2D feature extraction.\n");
 70 
 71 	if((dctmod2 == true) && (block_size != 8))
 72 	{
 73 		warning("DCTmod2 is usually designed only for 8x8 blocks.");
 74 		
 75 		if(force == false) 
 76 		{
 77 			print("Disabling dctmod2.\n");
 78 
 79 		   	dctmod2 = false;
 80 		}
 81 	}
 82 
 83 	if(dctmod2 == true)
 84 	{
 85 		int dim_ok = 0;
 86 		int max_ = sizeof(allowed_dim)/sizeof(int);
 87 		
 88 		for(int i = 0 ; i < max_ ; i++)
 89 			if(dct_dim == allowed_dim[i]) dim_ok = 1;
 90 	
 91 		if(!dim_ok)
 92 		{
 93 		   	error("DCT dim (%d) is incorrect.", dct_dim);
 94 		   	print("   Correct values are: ");
 95 			for(int i = 0 ; i < max_ ; i++)
 96 			   	print("%d ", allowed_dim[i]);
 97 	  		print("\n");
 98 	
 99 			exit(0);
100 		}
101 	}
102   	
103 	//
104 	int n_images;
105 	int image_size;
106 	DiskXFile *pf_in;
107 	int n_inputs;
108 	int n_patterns;
109 	DiskXFile *pf_out;
110 
111 	// Load the image
112 	pf_in = new DiskXFile(filename_in, "r");
113 	pf_in->read(&n_images, sizeof(int), 1);
114 	pf_in->read(&image_size, sizeof(int), 1);
115 
116 	if(verbose)
117 	{
118 		print("Input file ...\n");
119 		print("   n_inputs = %d\n", image_size);
120 		print("   n_patterns = %d\n", n_images);  
121 	}
122 
123 	int offset = 0;
124 	
125 	if(xyIsEmbedded) offset = 2;
126 
127 	if(image_size-offset != width * height)
128 		error("Incorrect image size %d != %dx%d.", image_size, width, height);
129 	
130 	Sequence *seqimage = new Sequence(1, image_size-offset);
131 	
132 	// feature image machine to apply for each block
133 	ipCore *ip = NULL;
134 		
135 	int n_block, n_output;
136 	int rowblocks;
137 	int colblocks;
138 	
139 	// Choose the block by block convolution
140 	if(dctmod2 == true)
141 	{
142 		ip = new ipDCTmod2(width, height, "gray", dct_dim, block_size, block_overlap, delta_neighbour, delta_keep);
143 
144 		ipDCTmod2 *ip_ = (ipDCTmod2 *)ip;
145 		n_block = ip_->n_block;
146 		n_output = ip_->output_size;
147 		rowblocks = ip_->getrowblocks();
148 		colblocks = ip_->getcolblocks();
149 	}
150 	else
151 	{
152 		ipCore *ip_dct = NULL;
153 		ip_dct = new ipDCT2D(block_size, "gray", dct_dim);
154 		ip_dct->setBOption("verbose", verbose);
155 
156 		ip = new ipBlock(width, height, "gray", ip_dct, dct_dim, block_size, block_overlap);
157 
158 		ipBlock *ip_ = (ipBlock *)ip;
159 		n_block = ip_->n_block;
160                 n_output = dct_dim;
161 		rowblocks = colblocks = block_size;
162 	}
163 	ip->setBOption("verbose", verbose);
164 
165 	if(verbose)
166 	{
167 		print("-> n_block = %d\n", n_block);
168 		print("-> n_output = %d\n", n_output);
169 	}
170 
171 	//
172 	pf_out = new DiskXFile(filename_out, "w");
173 	if(pf_out == NULL)
174 	{
175   		error("Opening bindata file %s", filename_out);      
176     		return 0;
177 	}
178 	
179 	//
180 	int frame_size;
181 	
182 	if(block_to_frame)
183 	{
184 	  	if(verbose) print("Block to frame: each block becomes a frame.\n");
185 		
186 		n_inputs = n_output + offset;
187 		if(xy) frame_size = n_inputs + 2;
188 		else frame_size = n_inputs;
189 
190 		n_patterns = n_images * n_block;
191 	}
192 	else
193 	{
194 		if(xy) warning("Please use blocktoframe to add xy coordinates.");
195 		
196 		n_inputs = n_block * (n_output + offset);
197 		frame_size = n_inputs;
198 		n_patterns = n_images;
199 	}
200 
201 	if(verbose)
202 	{
203 		print("Output file ...\n");
204 		print("   n_inputs = %d\n", frame_size);
205 		print("   n_patterns = %d\n", n_patterns);  
206 	}
207 
208 	//
209 	pf_out->write(&n_patterns, sizeof(int), 1);
210 	pf_out->write(&frame_size, sizeof(int), 1);
211 	
212 	//
213 	real *blocks_mean = NULL;
214 	real *blocks_stdv = NULL;
215 
216 	real *data_in = new real [image_size];
217 	real *data_out = new real [n_inputs];
218 
219 	for(int p = 0 ; p < n_images ; p++)
220 	{
221 	   	pf_in->read(data_in, sizeof(real), image_size);
222 		
223 		if(unnorm)
224 		{
225 			for(int i = offset ; i < image_size ; i++)
226 			{
227 				int z_ = (int) (data_in[i] * 255.0);
228 
229 				seqimage->frames[0][i-offset] = z_;
230 			}
231 		}
232 		else
233 		{
234 			for(int i = offset ; i < image_size ; i++)
235 				seqimage->frames[0][i-offset] = data_in[i];
236 		}
237 
238 		// Apply the pattern machine (DCT) to each block of the image
239 		ip->process(seqimage);
240 
241 		if(blocknorm)
242 		{
243 			blocks_mean = new real [n_output];
244 			blocks_stdv = new real [n_output];
245 			
246 			for(int i = 0 ; i < n_output ; i++)
247 			{
248 				blocks_mean[i] = 0;
249 				blocks_stdv[i] = 0;
250 			}
251 			
252 			for(int b = 0 ; b < n_block ; b++)
253 			{
254 				real *src_ = ip->seq_out->frames[b];
255 				
256 				for(int i = 0 ; i < n_output ; i++)
257 				{
258 					real z = src_[i];
259 					blocks_mean[i] += z;
260 					blocks_stdv[i] += z*z;
261 				}
262 			}
263 
264 			for(int i = 0 ; i < n_output ; i++)
265 			{
266 				blocks_mean[i] /= (real) n_block;
267 				blocks_stdv[i] /= (real) n_block;
268 				blocks_stdv[i] -= blocks_mean[i]*blocks_mean[i];
269 				if(blocks_stdv[i] <= 0)
270 				{
271 					warning("MeanVarNorm: input column %d has a null stdv. Replaced by 1.", i);
272 					blocks_stdv[i] = 1.;
273 				}
274 				else blocks_stdv[i] = sqrt(blocks_stdv[i]);
275 			}
276 			
277 			for(int b = 0 ; b < n_block ; b++)
278 			{
279 				real *src_ = ip->seq_out->frames[b];
280 				
281 				for(int i = 0 ; i < n_output ; i++)
282 					src_[i] = (src_[i] - blocks_mean[i]) / blocks_stdv[i];
283 			}
284 		}
285 
286 		if(block_to_frame)
287 		{
288 			for(int b = 0 ; b < n_block ; b++)
289 			{
290 			   	// (July, 22 2005) There might be a BUG there: TO CHECK
291 			   	int x = (b % rowblocks);
292 				int y = (b / rowblocks);
293 				// the correct thing could be
294 			   	//int x = (b % colblocks);
295 				//int y = (b / colblocks);
296 				
297 		   		if(display == true) 
298 				{
299 					if(xy) print("Block %d (%dx%d): ", b, x, y);
300 					else print("Block %d: ", b);
301 				}
302 			
303 				real *output_ = ip->seq_out->frames[b];
304 	
305 				// if xy is embedded
306 				for(int i = 0 ; i < offset ; i++)
307 				{
308 		   			if(display == true) print("%g ", data_in[i]);
309 					
310 					data_out[i] = data_in[i];
311 				}
312 				
313 				for(int i = 0 ; i < n_output ; i++)
314 				{
315 		   			if(display == true) print("%g ", output_[i]);
316 
317 		   			data_out[i+offset] = output_[i];
318 				}
319 
320 		   		if(display == true) print("\n");
321 	
322 				if(xy)
323 				{
324 					real x_ = (real) x;
325 					real y_ = (real) y;
326 
327 					pf_out->write(&x_, sizeof(real), 1);
328 					pf_out->write(&y_, sizeof(real), 1);
329 				}
330 				
331 				for(int i = 0 ; i <  n_inputs ; i++)
332 					pf_out->write(&data_out[i], sizeof(real), 1);   
333 			}
334 		}
335 		else
336 		{
337 			int j = 0;
338 		
339 			for(int b = 0 ; b < n_block ; b++)
340 			{
341 		   		if(display == true) print("Block %d: ", b);
342 			
343 				real *output_ = ip->seq_out->frames[b];
344 			
345 				// if xy is embedded
346 				for(int i = 0 ; i < offset ; i++)
347 				{
348 		   			if(display == true) print("%g ", data_in[i]);
349 					
350 					data_out[i] = data_in[i];
351 				}
352 				
353 				for(int i = offset ; i < n_output ; i++)
354 				{
355 		   			if(display == true) print("%g ", output_[i]);
356 
357 		   			data_out[j] = output_[i];
358 				
359 					j++;
360 				}
361 		   		if(display == true) print("\n");
362 			}
363 		
364 			for(int i = 0 ; i <  n_inputs ; i++)
365 				pf_out->write(&data_out[i], sizeof(real), 1);   
366 		}
367 	}
368 		
369 	if(blocknorm)
370 	{
371 		delete [] blocks_mean;
372 		delete [] blocks_stdv;
373 	}
374 	
375 	delete [] data_out;
376 	delete [] data_in;
377 	delete pf_out;
378 	
379 
380 	// Free all
381 	delete ip;
382 	delete seqimage;
383 
384 	return 0;
385 }