bindata2dctbindata.cc
1 const char *help = "\
2 progname: bindata2dctbindata.cc\n\
3 code2html: This program reads a bindata and decomposes it in DCT/DCTmod2 blocks.\n\
4 version: Torch3 vision2.0, 2004-2005\n\
5 (c) Sebastien Marcel (marcel@idiap.ch)\n";
6
7 #include "ImageGray.h"
8 #include "ipDCT2D.h"
9 #include "ipBlock.h"
10 #include "ipDCTmod2.h"
11 #include "DiskXFile.h"
12 #include "CmdLine.h"
13
14 using namespace Torch;
15
16 // Allowed dimension for DCT mod2
17 const int allowed_dim[] = {6, 10, 15, 21, 28, 36, 43, 49, 54, 58, 61, 63, 64};
18
19 int main(int argc, char *argv[])
20 {
21 char *filename_in;
22 int width;
23 int height;
24 char *filename_out;
25 int dct_dim;
26 int delta_neighbour;
27 int delta_keep;
28 int block_size;
29 int block_overlap;
30 bool block_to_frame;
31 bool dctmod2;
32 bool force;
33 bool verbose;
34 bool display;
35 bool xy;
36 bool xyIsEmbedded;
37 bool blocknorm;
38 bool unnorm;
39
40 CmdLine cmd;
41 cmd.setBOption("write log", false);
42 cmd.info(help);
43 cmd.addText("\nArguments:");
44 cmd.addSCmdArg("imagefile in", &filename_in, "image file in");
45 cmd.addICmdArg("width", &width, "width");
46 cmd.addICmdArg("height", &height, "height");
47 cmd.addSCmdArg("output filename", &filename_out, "output bindata filename");
48 cmd.addText("\nBlock decomposition Options:");
49 cmd.addBCmdOption("-blocktoframe", &block_to_frame, false, "a block becomes a frame");
50 cmd.addICmdOption("-block", &block_size, 8, "block size");
51 cmd.addICmdOption("-overlap", &block_overlap, 4, "block overlap");
52 cmd.addText("\nDCT Options:");
53 cmd.addICmdOption("-dctdim", &dct_dim, 15, "DCT 2D dimension");
54 cmd.addICmdOption("-delta", &delta_neighbour, 1, "delta neighbour");
55 cmd.addICmdOption("-deltakeep", &delta_keep, 3, "delta keep");
56 cmd.addBCmdOption("-dctmod2", &dctmod2, false, "apply dctmod2");
57 cmd.addBCmdOption("-force", &force, false, "force dctmod2 for blocks larger that 8x8");
58 cmd.addText("\nExtra Options:");
59 cmd.addBCmdOption("-xy", &xy, false, "add xy coordinates of blocks");
60 cmd.addBCmdOption("-xyprovided", &xyIsEmbedded, false, "xy coordinates are provided");
61 cmd.addBCmdOption("-blocknorm", &blocknorm, false, "mean/stdv normalize blocks");
62 cmd.addBCmdOption("-unnorm", &unnorm, false, "unnormalize input image");
63 cmd.addText("\nMisc Options:");
64 cmd.addBCmdOption("-display", &display, false, "display dct features");
65 cmd.addBCmdOption("-verbose", &verbose, false, "verbose");
66 cmd.read(argc, argv);
67
68 //
69 if(verbose) print("DCT 2D feature extraction.\n");
70
71 if((dctmod2 == true) && (block_size != 8))
72 {
73 warning("DCTmod2 is usually designed only for 8x8 blocks.");
74
75 if(force == false)
76 {
77 print("Disabling dctmod2.\n");
78
79 dctmod2 = false;
80 }
81 }
82
83 if(dctmod2 == true)
84 {
85 int dim_ok = 0;
86 int max_ = sizeof(allowed_dim)/sizeof(int);
87
88 for(int i = 0 ; i < max_ ; i++)
89 if(dct_dim == allowed_dim[i]) dim_ok = 1;
90
91 if(!dim_ok)
92 {
93 error("DCT dim (%d) is incorrect.", dct_dim);
94 print(" Correct values are: ");
95 for(int i = 0 ; i < max_ ; i++)
96 print("%d ", allowed_dim[i]);
97 print("\n");
98
99 exit(0);
100 }
101 }
102
103 //
104 int n_images;
105 int image_size;
106 DiskXFile *pf_in;
107 int n_inputs;
108 int n_patterns;
109 DiskXFile *pf_out;
110
111 // Load the image
112 pf_in = new DiskXFile(filename_in, "r");
113 pf_in->read(&n_images, sizeof(int), 1);
114 pf_in->read(&image_size, sizeof(int), 1);
115
116 if(verbose)
117 {
118 print("Input file ...\n");
119 print(" n_inputs = %d\n", image_size);
120 print(" n_patterns = %d\n", n_images);
121 }
122
123 int offset = 0;
124
125 if(xyIsEmbedded) offset = 2;
126
127 if(image_size-offset != width * height)
128 error("Incorrect image size %d != %dx%d.", image_size, width, height);
129
130 Sequence *seqimage = new Sequence(1, image_size-offset);
131
132 // feature image machine to apply for each block
133 ipCore *ip = NULL;
134
135 int n_block, n_output;
136 int rowblocks;
137 int colblocks;
138
139 // Choose the block by block convolution
140 if(dctmod2 == true)
141 {
142 ip = new ipDCTmod2(width, height, "gray", dct_dim, block_size, block_overlap, delta_neighbour, delta_keep);
143
144 ipDCTmod2 *ip_ = (ipDCTmod2 *)ip;
145 n_block = ip_->n_block;
146 n_output = ip_->output_size;
147 rowblocks = ip_->getrowblocks();
148 colblocks = ip_->getcolblocks();
149 }
150 else
151 {
152 ipCore *ip_dct = NULL;
153 ip_dct = new ipDCT2D(block_size, "gray", dct_dim);
154 ip_dct->setBOption("verbose", verbose);
155
156 ip = new ipBlock(width, height, "gray", ip_dct, dct_dim, block_size, block_overlap);
157
158 ipBlock *ip_ = (ipBlock *)ip;
159 n_block = ip_->n_block;
160 n_output = dct_dim;
161 rowblocks = colblocks = block_size;
162 }
163 ip->setBOption("verbose", verbose);
164
165 if(verbose)
166 {
167 print("-> n_block = %d\n", n_block);
168 print("-> n_output = %d\n", n_output);
169 }
170
171 //
172 pf_out = new DiskXFile(filename_out, "w");
173 if(pf_out == NULL)
174 {
175 error("Opening bindata file %s", filename_out);
176 return 0;
177 }
178
179 //
180 int frame_size;
181
182 if(block_to_frame)
183 {
184 if(verbose) print("Block to frame: each block becomes a frame.\n");
185
186 n_inputs = n_output + offset;
187 if(xy) frame_size = n_inputs + 2;
188 else frame_size = n_inputs;
189
190 n_patterns = n_images * n_block;
191 }
192 else
193 {
194 if(xy) warning("Please use blocktoframe to add xy coordinates.");
195
196 n_inputs = n_block * (n_output + offset);
197 frame_size = n_inputs;
198 n_patterns = n_images;
199 }
200
201 if(verbose)
202 {
203 print("Output file ...\n");
204 print(" n_inputs = %d\n", frame_size);
205 print(" n_patterns = %d\n", n_patterns);
206 }
207
208 //
209 pf_out->write(&n_patterns, sizeof(int), 1);
210 pf_out->write(&frame_size, sizeof(int), 1);
211
212 //
213 real *blocks_mean = NULL;
214 real *blocks_stdv = NULL;
215
216 real *data_in = new real [image_size];
217 real *data_out = new real [n_inputs];
218
219 for(int p = 0 ; p < n_images ; p++)
220 {
221 pf_in->read(data_in, sizeof(real), image_size);
222
223 if(unnorm)
224 {
225 for(int i = offset ; i < image_size ; i++)
226 {
227 int z_ = (int) (data_in[i] * 255.0);
228
229 seqimage->frames[0][i-offset] = z_;
230 }
231 }
232 else
233 {
234 for(int i = offset ; i < image_size ; i++)
235 seqimage->frames[0][i-offset] = data_in[i];
236 }
237
238 // Apply the pattern machine (DCT) to each block of the image
239 ip->process(seqimage);
240
241 if(blocknorm)
242 {
243 blocks_mean = new real [n_output];
244 blocks_stdv = new real [n_output];
245
246 for(int i = 0 ; i < n_output ; i++)
247 {
248 blocks_mean[i] = 0;
249 blocks_stdv[i] = 0;
250 }
251
252 for(int b = 0 ; b < n_block ; b++)
253 {
254 real *src_ = ip->seq_out->frames[b];
255
256 for(int i = 0 ; i < n_output ; i++)
257 {
258 real z = src_[i];
259 blocks_mean[i] += z;
260 blocks_stdv[i] += z*z;
261 }
262 }
263
264 for(int i = 0 ; i < n_output ; i++)
265 {
266 blocks_mean[i] /= (real) n_block;
267 blocks_stdv[i] /= (real) n_block;
268 blocks_stdv[i] -= blocks_mean[i]*blocks_mean[i];
269 if(blocks_stdv[i] <= 0)
270 {
271 warning("MeanVarNorm: input column %d has a null stdv. Replaced by 1.", i);
272 blocks_stdv[i] = 1.;
273 }
274 else blocks_stdv[i] = sqrt(blocks_stdv[i]);
275 }
276
277 for(int b = 0 ; b < n_block ; b++)
278 {
279 real *src_ = ip->seq_out->frames[b];
280
281 for(int i = 0 ; i < n_output ; i++)
282 src_[i] = (src_[i] - blocks_mean[i]) / blocks_stdv[i];
283 }
284 }
285
286 if(block_to_frame)
287 {
288 for(int b = 0 ; b < n_block ; b++)
289 {
290 // (July, 22 2005) There might be a BUG there: TO CHECK
291 int x = (b % rowblocks);
292 int y = (b / rowblocks);
293 // the correct thing could be
294 //int x = (b % colblocks);
295 //int y = (b / colblocks);
296
297 if(display == true)
298 {
299 if(xy) print("Block %d (%dx%d): ", b, x, y);
300 else print("Block %d: ", b);
301 }
302
303 real *output_ = ip->seq_out->frames[b];
304
305 // if xy is embedded
306 for(int i = 0 ; i < offset ; i++)
307 {
308 if(display == true) print("%g ", data_in[i]);
309
310 data_out[i] = data_in[i];
311 }
312
313 for(int i = 0 ; i < n_output ; i++)
314 {
315 if(display == true) print("%g ", output_[i]);
316
317 data_out[i+offset] = output_[i];
318 }
319
320 if(display == true) print("\n");
321
322 if(xy)
323 {
324 real x_ = (real) x;
325 real y_ = (real) y;
326
327 pf_out->write(&x_, sizeof(real), 1);
328 pf_out->write(&y_, sizeof(real), 1);
329 }
330
331 for(int i = 0 ; i < n_inputs ; i++)
332 pf_out->write(&data_out[i], sizeof(real), 1);
333 }
334 }
335 else
336 {
337 int j = 0;
338
339 for(int b = 0 ; b < n_block ; b++)
340 {
341 if(display == true) print("Block %d: ", b);
342
343 real *output_ = ip->seq_out->frames[b];
344
345 // if xy is embedded
346 for(int i = 0 ; i < offset ; i++)
347 {
348 if(display == true) print("%g ", data_in[i]);
349
350 data_out[i] = data_in[i];
351 }
352
353 for(int i = offset ; i < n_output ; i++)
354 {
355 if(display == true) print("%g ", output_[i]);
356
357 data_out[j] = output_[i];
358
359 j++;
360 }
361 if(display == true) print("\n");
362 }
363
364 for(int i = 0 ; i < n_inputs ; i++)
365 pf_out->write(&data_out[i], sizeof(real), 1);
366 }
367 }
368
369 if(blocknorm)
370 {
371 delete [] blocks_mean;
372 delete [] blocks_stdv;
373 }
374
375 delete [] data_out;
376 delete [] data_in;
377 delete pf_out;
378
379
380 // Free all
381 delete ip;
382 delete seqimage;
383
384 return 0;
385 }