mbindata2pca.cc
1 const char *help = "\
2 progname: mbindata2pca.cc\n\
3 code2html: This program reads multiple bindata files and a PCA model, projects patterns into it.\n\
4 version: Torch3 vision2.0, 2003-2005\n\
5 (c) Sebastien Marcel (marcel@idiap.ch)\n";
6
7 #include "string_utils.h"
8 #include "ImageGray.h"
9 #include "PCAMachine.h"
10 #include "MyMeanVarNorm.h"
11 #include "HistoEqualSmoothImagePreProcessing.h"
12 #include "FileListCmdOption.h"
13 #include "str_utils.h"
14 #include "Random.h"
15 #include "CmdLine.h"
16
17 using namespace Torch;
18
19 real DFS(int n_input, real *realinput, real *mean, int n_output, real *projection, real *eigenvalues, real *dffs_, real *difs_)
20 {
21 real alpha = 0.5;
22 real alpha2 = alpha * alpha;
23
24 real dffs = 0.0;
25 for(int i = 0 ; i < n_input ; i++)
26 {
27 real z = realinput[i] - mean[i];
28
29 dffs += z * z;
30 }
31
32 real difs = 0.0;
33
34 for(int i = 0 ; i < n_output ; i++)
35 {
36 real z = projection[i] * projection[i];
37
38 dffs -= z;
39 difs += z / (eigenvalues[i] * alpha2);
40 }
41
42 dffs /= (alpha * eigenvalues[n_output]);
43
44 *dffs_ = dffs;
45 *difs_ = difs;
46
47 real dfs = difs + dffs;
48
49 return dfs;
50 }
51
52 int main(int argc, char *argv[])
53 {
54 char *dir_name;
55 char *model_filename;
56 int n_input;
57 real variance;
58 int n_output;
59 bool verbose;
60 int verbose_level;
61 bool normalize;
62 bool image_normalize;
63 int width, height;
64 bool noProjection;
65 bool doDFFS;
66 bool doDIFS;
67 int the_seed;
68 int n_perturbations;
69 int dim;
70 real nmin, nmax;
71
72 //
73 FileListCmdOption filelist("file name", "the list files or one data file");
74 filelist.isArgument(true);
75
76 //
77 CmdLine cmd;
78 cmd.setBOption("write log", false);
79
80 cmd.info(help);
81 cmd.addText("\nArguments:");
82 cmd.addCmdOption(&filelist);
83 cmd.addSCmdArg("model filename", &model_filename, "PCA model filename");
84 cmd.addICmdArg("n_input", &n_input, "number of inputs");
85 cmd.addText("\nOptions:");
86 cmd.addSCmdOption("-dir", &dir_name, ".", "dir name");
87 cmd.addRCmdOption("-variance", &variance, 0.95, "variance (-1 => 100\%)");
88 cmd.addICmdOption("-noutput", &n_output, -1, "number of outputs");
89 cmd.addBCmdOption("-verbose", &verbose, false, "verbose");
90 cmd.addICmdOption("-verbose_level", &verbose_level, 1, "level of verbose");
91 cmd.addBCmdOption("-normalize", &normalize, false, "normalize");
92 cmd.addBCmdOption("-imagenorm", &image_normalize, false, "considers the input pattern as an image and performs a photometric normalization");
93 cmd.addICmdOption("-width", &width, -1, "width");
94 cmd.addICmdOption("-height", &height, -1, "height");
95 cmd.addBCmdOption("-noProjection", &noProjection, false, "do not output projection");
96 cmd.addBCmdOption("-dffs", &doDFFS, false, "output DFFS");
97 cmd.addBCmdOption("-difs", &doDIFS, false, "output DIFS");
98 cmd.addICmdOption("-p", &n_perturbations, 0, "number of perturbations to generate");
99 cmd.addICmdOption("-dim", &dim, -1, "dimension to perturbate (all if -1)");
100 cmd.addICmdOption("-seed", &the_seed, 9503, "seed");
101 cmd.addRCmdOption("-nmin", &nmin, -0.1, "min random noise");
102 cmd.addRCmdOption("-nmax", &nmax, +0.1, "max random noise");
103 cmd.read(argc, argv);
104
105 if(verbose == false) verbose_level = 0;
106
107 Random::manualSeed(the_seed);
108
109 if(image_normalize)
110 {
111 print("Perform photometric normalization ...\n");
112
113 print("The input pattern is an %dx%d image.\n", width, height);
114 }
115
116 if(n_perturbations < 0) n_perturbations = 0;
117
118 //
119 PCAMachine *pca_machine = NULL;
120 pca_machine = new PCAMachine(n_input);
121
122 //
123 PreProcessing *preprocess = NULL;
124 if(image_normalize)
125 {
126 print("Pre-processing image %dx%d ...\n", width, height);
127 preprocess = new HistoEqualSmoothImagePreProcessing(width, height);
128 }
129
130 //
131 MyMeanVarNorm *mv_norm = NULL;
132
133 if(normalize)
134 {
135 if(verbose_level >= 1)
136 print("Loading PCA model and its normalisation: %s ...\n", model_filename);
137
138 mv_norm = new MyMeanVarNorm(n_input, 1);
139
140 pca_machine->load(model_filename, mv_norm);
141 }
142 else
143 {
144 if(verbose_level >= 1)
145 print("Loading PCA model: %s ...\n", model_filename);
146
147 DiskXFile *file = NULL;
148 file = new DiskXFile(model_filename, "r");
149 pca_machine->loadXFile(file);
150 delete file;
151 }
152
153 //
154 pca_machine->setIOption("verbose_level", verbose_level);
155 pca_machine->setROption("variance", variance);
156 pca_machine->init();
157
158 if(variance <= 0.0 || n_output <= 0) n_output = pca_machine->n_outputs;
159 if(n_output > 0) pca_machine->n_outputs = n_output;
160
161 //
162 if(verbose_level >= 1)
163 print("Projection bindata file into PCA space (%d -> %d) ...\n", n_input, n_output);
164
165 //
166 float *realinput = NULL;
167 Sequence *seq;
168
169 realinput = new float [n_input];
170 seq = new Sequence(&realinput, 1, n_input);
171
172 //
173 int dimIn;
174 int n_patterns;
175
176 DiskXFile *pf = NULL;
177
178 //
179 for(int i = 0 ; i < filelist.n_files ; i++)
180 {
181 char *temp = strBaseName(filelist.file_names[i]);
182 char *file_name = strRemoveSuffix(temp);
183
184 if(verbose)
185 print("Processing file %s\n", file_name);
186
187 pf = new DiskXFile(filelist.file_names[i], "r");
188
189 pf->read(&n_patterns, sizeof(int), 1);
190 pf->read(&dimIn, sizeof(int), 1);
191
192 if(verbose_level >= 2)
193 {
194 print("n_inputs : %d\n", dimIn);
195 print("n_patterns : %d\n", n_patterns);
196 }
197
198 if(n_input > dimIn)
199 {
200 error("Number of inputs specified (%d) bigger than into the file (%d)", n_input, dimIn);
201
202 delete pf;
203
204 return 0;
205 }
206
207
208
209 //
210 DiskXFile *pfOutput = NULL;
211 char filename_out[250];
212
213 sprintf(filename_out, "%s/%s.bindata", dir_name, file_name);
214
215 pfOutput = new DiskXFile(filename_out, "w");
216
217 int P = n_patterns + n_patterns*n_perturbations;
218
219 pfOutput->write(&P, sizeof(int), 1);
220 int m = 0;
221 if(!noProjection) m = n_output;
222 if(doDFFS) m++;
223 if(doDIFS) m++;
224
225 if(m == 0)
226 error("Nothing to do then.");
227
228 pfOutput->write(&m, sizeof(int), 1);
229
230 real *machine_output = new real [n_output];
231
232 for(int p = 0 ; p < n_patterns ; p++)
233 {
234 //
235 for(int i = 0 ; i < n_input ; i++)
236 pf->read(&realinput[i], sizeof(float), 1);
237
238 if(verbose_level >= 2)
239 print(" Seq = [%2.3f %2.3f %2.3f ...]\n", realinput[0], realinput[1], realinput[2]);
240
241 if(image_normalize)
242 preprocess->preProcessInputs(seq);
243
244 if(normalize)
245 mv_norm->preProcessInputs(seq);
246
247 //
248 pca_machine->forward(seq);
249
250 if(verbose_level >= 2)
251 print(" Output = [%2.3f %2.3f %2.3f ...]\n", pca_machine->outputs->frames[0][0], pca_machine->outputs->frames[0][1], pca_machine->outputs->frames[0][2]);
252
253 for(int i = 0 ; i < n_output ; i++)
254 machine_output[i] = pca_machine->outputs->frames[0][i];
255
256
257 real dffs = 0.0;
258 real difs = 0.0;
259 real dfs = DFS(n_input, realinput, pca_machine->Xm, n_output, machine_output, pca_machine->eigenvalues, &dffs, &difs);
260
261 if(!noProjection)
262 pfOutput->write(machine_output, sizeof(float), n_output);
263
264 if(doDFFS) pfOutput->write(&dffs, sizeof(float), 1);
265 if(doDIFS) pfOutput->write(&difs, sizeof(float), 1);
266
267 for(int j = 0 ; j < n_perturbations ; j++)
268 {
269 for(int i = 0 ; i < n_output ; i++)
270 if((dim == -1) || (i == dim)) machine_output[i] = pca_machine->outputs->frames[0][i] + Random::boundedUniform(nmin, nmax);
271 else machine_output[i] = pca_machine->outputs->frames[0][i];
272
273 dffs = 0.0;
274 difs = 0.0;
275 dfs = DFS(n_input, realinput, pca_machine->Xm, n_output, machine_output, pca_machine->eigenvalues, &dffs, &difs);
276
277 if(!noProjection)
278 pfOutput->write(machine_output, sizeof(float), n_output);
279
280 if(doDFFS) pfOutput->write(&dffs, sizeof(float), 1);
281 if(doDIFS) pfOutput->write(&difs, sizeof(float), 1);
282 }
283 }
284
285 //
286 delete pf;
287 delete pfOutput;
288 delete [] machine_output;
289 }
290
291
292 //
293 delete seq;
294 delete [] realinput;
295
296 if(normalize) delete mv_norm;
297 if(image_normalize) delete preprocess;
298 delete pca_machine;
299
300 return 0;
301 }
302