mbindata2pca.cc

  1 const char *help = "\
  2 progname: mbindata2pca.cc\n\
  3 code2html: This program reads multiple bindata files and a PCA model, projects patterns into it.\n\
  4 version: Torch3 vision2.0, 2003-2005\n\
  5 (c) Sebastien Marcel (marcel@idiap.ch)\n";
  6 
  7 #include "string_utils.h"
  8 #include "ImageGray.h"
  9 #include "PCAMachine.h"
 10 #include "MyMeanVarNorm.h"
 11 #include "HistoEqualSmoothImagePreProcessing.h"
 12 #include "FileListCmdOption.h"
 13 #include "str_utils.h"
 14 #include "Random.h"
 15 #include "CmdLine.h"
 16 
 17 using namespace Torch;
 18 
 19 real DFS(int n_input, real *realinput, real *mean, int n_output, real *projection, real *eigenvalues, real *dffs_, real *difs_)
 20 {
 21 	real alpha = 0.5;
 22 	real alpha2 = alpha * alpha;
 23 	
 24 	real dffs = 0.0;
 25 	for(int i = 0 ; i < n_input ; i++)
 26 	{
 27 		real z = realinput[i] - mean[i];
 28 			
 29 		dffs += z * z;
 30 	}
 31 
 32 	real difs = 0.0;
 33 
 34 	for(int i = 0 ; i < n_output ; i++)
 35 	{
 36 		real z = projection[i] * projection[i];
 37 
 38 		dffs -= z;
 39 		difs += z / (eigenvalues[i] * alpha2);
 40 	}
 41 
 42 	dffs /= (alpha * eigenvalues[n_output]);
 43 
 44 	*dffs_ = dffs;
 45 	*difs_ = difs;
 46 
 47 	real dfs = difs + dffs;
 48 	
 49 	return dfs;
 50 }
 51 
 52 int main(int argc, char *argv[])
 53 {
 54 	char *dir_name;
 55 	char *model_filename;
 56 	int n_input;
 57 	real variance;
 58 	int n_output;
 59 	bool verbose;
 60 	int verbose_level;
 61 	bool normalize;
 62 	bool image_normalize;
 63 	int width, height;
 64 	bool noProjection;
 65 	bool doDFFS;
 66 	bool doDIFS;
 67 	int the_seed;
 68 	int n_perturbations;
 69 	int dim;
 70 	real nmin, nmax;
 71 
 72 	//
 73 	FileListCmdOption filelist("file name", "the list files or one data file");
 74         filelist.isArgument(true);
 75 
 76 	//
 77 	CmdLine cmd;
 78 	cmd.setBOption("write log", false);
 79 	
 80 	cmd.info(help);
 81 	cmd.addText("\nArguments:");
 82   	cmd.addCmdOption(&filelist);
 83 	cmd.addSCmdArg("model filename", &model_filename, "PCA model filename");
 84 	cmd.addICmdArg("n_input", &n_input, "number of inputs");
 85 	cmd.addText("\nOptions:");
 86 	cmd.addSCmdOption("-dir", &dir_name, ".", "dir name");
 87 	cmd.addRCmdOption("-variance", &variance, 0.95, "variance (-1 => 100\%)");
 88 	cmd.addICmdOption("-noutput", &n_output, -1, "number of outputs");
 89 	cmd.addBCmdOption("-verbose", &verbose, false, "verbose");
 90 	cmd.addICmdOption("-verbose_level", &verbose_level, 1, "level of verbose");
 91 	cmd.addBCmdOption("-normalize", &normalize, false, "normalize");
 92 	cmd.addBCmdOption("-imagenorm", &image_normalize, false, "considers the input pattern as an image and performs a photometric normalization");
 93 	cmd.addICmdOption("-width", &width, -1, "width");
 94 	cmd.addICmdOption("-height", &height, -1, "height");
 95 	cmd.addBCmdOption("-noProjection", &noProjection, false, "do not output projection");
 96 	cmd.addBCmdOption("-dffs", &doDFFS, false, "output DFFS");
 97 	cmd.addBCmdOption("-difs", &doDIFS, false, "output DIFS");
 98 	cmd.addICmdOption("-p", &n_perturbations, 0, "number of perturbations to generate");
 99 	cmd.addICmdOption("-dim", &dim, -1, "dimension to perturbate (all if -1)");
100 	cmd.addICmdOption("-seed", &the_seed, 9503, "seed");
101 	cmd.addRCmdOption("-nmin", &nmin, -0.1, "min random noise");
102 	cmd.addRCmdOption("-nmax", &nmax, +0.1, "max random noise");
103 	cmd.read(argc, argv);
104 
105 	if(verbose == false) verbose_level = 0;
106 	
107 	Random::manualSeed(the_seed);
108 
109 	if(image_normalize)
110 	{
111 		print("Perform photometric normalization ...\n");
112 
113 		print("The input pattern is an %dx%d image.\n", width, height);
114 	}
115 
116 	if(n_perturbations < 0) n_perturbations = 0;
117 
118 	//
119 	PCAMachine *pca_machine = NULL;
120 	pca_machine = new PCAMachine(n_input);
121 
122 	//
123 	PreProcessing *preprocess = NULL;
124 	if(image_normalize)
125 	{
126 		print("Pre-processing image %dx%d ...\n", width, height);
127 		preprocess = new HistoEqualSmoothImagePreProcessing(width, height);
128 	}
129 
130 	//
131 	MyMeanVarNorm *mv_norm = NULL;
132 
133 	if(normalize)
134 	{
135 	   	if(verbose_level >= 1)
136 			print("Loading PCA model and its normalisation: %s ...\n", model_filename);
137 
138 		mv_norm = new MyMeanVarNorm(n_input, 1);
139 
140 	   	pca_machine->load(model_filename, mv_norm);
141 	}
142 	else
143 	{
144 	   	if(verbose_level >= 1)
145 			print("Loading PCA model: %s ...\n", model_filename);
146 
147 		DiskXFile *file = NULL;
148 		file = new DiskXFile(model_filename, "r");
149 		pca_machine->loadXFile(file);
150 		delete file;
151 	}
152 	   
153 	//
154 	pca_machine->setIOption("verbose_level", verbose_level);
155 	pca_machine->setROption("variance", variance);
156 	pca_machine->init();
157 
158 	if(variance <= 0.0 || n_output <= 0) n_output = pca_machine->n_outputs;
159 	if(n_output > 0) pca_machine->n_outputs = n_output; 
160 
161 	//
162 	if(verbose_level >= 1)
163 		print("Projection bindata file into PCA space (%d -> %d) ...\n", n_input, n_output);
164 
165 	//
166 	float *realinput = NULL;
167         Sequence *seq;
168 
169         realinput = new float [n_input];
170         seq = new Sequence(&realinput, 1, n_input);	
171 
172 	//
173 	int dimIn;
174 	int n_patterns;
175 	
176 	DiskXFile *pf = NULL;
177 
178 	//
179 	for(int i = 0 ; i < filelist.n_files ; i++)
180 	{
181 		char *temp = strBaseName(filelist.file_names[i]);
182 		char *file_name = strRemoveSuffix(temp);
183 
184 		if(verbose)
185 			print("Processing file %s\n", file_name);
186 
187 		pf = new DiskXFile(filelist.file_names[i], "r");
188 
189 		pf->read(&n_patterns, sizeof(int), 1);
190 		pf->read(&dimIn, sizeof(int), 1);
191 
192 		if(verbose_level >= 2)
193 		{
194 			print("n_inputs : %d\n", dimIn);
195 			print("n_patterns : %d\n", n_patterns);  
196 		}
197 
198 		if(n_input > dimIn)
199 		{
200 			error("Number of inputs specified (%d) bigger than into the file (%d)", n_input, dimIn);
201 	   
202 			delete pf;
203 
204 			return 0;
205 		}
206 
207 
208 
209 		//
210 		DiskXFile *pfOutput = NULL;
211 		char filename_out[250];
212 
213 		sprintf(filename_out, "%s/%s.bindata", dir_name, file_name);
214 		
215 		pfOutput = new DiskXFile(filename_out, "w");
216 	
217 		int P = n_patterns + n_patterns*n_perturbations;
218 	
219 		pfOutput->write(&P, sizeof(int), 1);
220 		int m = 0;
221 		if(!noProjection) m = n_output;
222 		if(doDFFS) m++;
223 		if(doDIFS) m++;
224 
225 		if(m == 0)
226 			error("Nothing to do then.");
227 	
228 		pfOutput->write(&m, sizeof(int), 1);
229 		
230 		real *machine_output = new real [n_output];
231 
232 		for(int p = 0 ; p < n_patterns ; p++)
233 		{
234 	   		//
235 	  		for(int i = 0 ; i < n_input ; i++)
236 				pf->read(&realinput[i], sizeof(float), 1);   
237 
238 	   		if(verbose_level >= 2)
239 				print(" Seq =     [%2.3f %2.3f %2.3f ...]\n", realinput[0], realinput[1], realinput[2]);
240 
241 			if(image_normalize)
242 				preprocess->preProcessInputs(seq);
243 
244 			if(normalize)
245 				mv_norm->preProcessInputs(seq);
246 
247 			//
248 			pca_machine->forward(seq);
249 			
250 	   		if(verbose_level >= 2)
251 				print(" Output =   [%2.3f %2.3f %2.3f ...]\n", pca_machine->outputs->frames[0][0], pca_machine->outputs->frames[0][1], pca_machine->outputs->frames[0][2]);
252 
253 	  		for(int i = 0 ; i < n_output ; i++)
254 				machine_output[i] = pca_machine->outputs->frames[0][i];
255 
256 
257 			real dffs = 0.0;
258 			real difs = 0.0;
259 			real dfs = DFS(n_input, realinput, pca_machine->Xm, n_output, machine_output, pca_machine->eigenvalues, &dffs, &difs);
260 
261 			if(!noProjection)
262 				pfOutput->write(machine_output, sizeof(float), n_output);   
263 
264 			if(doDFFS) pfOutput->write(&dffs, sizeof(float), 1);   
265 			if(doDIFS) pfOutput->write(&difs, sizeof(float), 1);   
266 
267 			for(int j = 0 ; j < n_perturbations ; j++)
268 			{
269 				for(int i = 0 ; i < n_output ; i++)
270 					if((dim == -1) || (i == dim)) machine_output[i] = pca_machine->outputs->frames[0][i] + Random::boundedUniform(nmin, nmax);
271 					else machine_output[i] = pca_machine->outputs->frames[0][i];
272 		
273 				dffs = 0.0;
274 				difs = 0.0;
275 				dfs = DFS(n_input, realinput, pca_machine->Xm, n_output, machine_output, pca_machine->eigenvalues, &dffs, &difs);
276 
277 				if(!noProjection)
278 					pfOutput->write(machine_output, sizeof(float), n_output);   
279 
280 				if(doDFFS) pfOutput->write(&dffs, sizeof(float), 1);   
281 				if(doDIFS) pfOutput->write(&difs, sizeof(float), 1);   
282 			}
283 		}
284 
285 		//
286 		delete pf;
287 		delete pfOutput;
288 		delete [] machine_output;
289 	}
290 
291 	
292 	//
293 	delete seq;
294 	delete [] realinput;
295 
296 	if(normalize) delete mv_norm;
297 	if(image_normalize) delete preprocess;
298 	delete pca_machine;
299 	
300   	return 0;
301 }
302