statbindata.cc

  1 const char *help = "\
  2 progname: statbindata.cc\n\
  3 code2html: This program reads bindata files (Torch binary dataset format) and computes mean/stdv.\n\
  4 version: Torch3 vision2.0, 2004-2005\n\
  5 (c) Sebastien Marcel (marcel@idiap.ch)\n";
  6 
  7 #include "DiskXFile.h"
  8 #include "ImageGray.h"
  9 #include "FileListCmdOption.h"
 10 #include "CmdLine.h"
 11 
 12 using namespace Torch;
 13 
 14 int main(int argc, char *argv[])
 15 {
 16 	bool verbose;
 17 	bool savemeanaspgm;
 18 	int n_inputs;
 19 	int width;
 20 	int height;
 21 
 22 	FileListCmdOption filelist("filelist", "the list files or one data file");
 23 	filelist.isArgument(true);
 24 	
 25 	CmdLine cmd;
 26 	cmd.setBOption("write log", false);
 27 
 28 	//
 29 	cmd.info(help);
 30 	cmd.addText("\nArguments:");
 31 	cmd.addCmdOption(&filelist);
 32 	cmd.addICmdArg("n_inputs", &n_inputs, "number of inputs");
 33 	cmd.addText("\nOptions:");
 34 	cmd.addBCmdOption("-verbose", &verbose, false, "verbose");
 35 	cmd.addBCmdOption("-savepgm", &savemeanaspgm, false, "savemeanaspgm");
 36 	cmd.addICmdOption("-width", &width, 19, "width");
 37 	cmd.addICmdOption("-height", &height, 19, "height");
 38 	cmd.read(argc, argv);
 39 	
 40 	//
 41 	int n_inputs_;
 42 	int n_patterns;
 43 	DiskXFile *pf_in;
 44 		
 45 	real *data = new real [n_inputs];
 46 	real *mean = new real [n_inputs];
 47 	real *stdv = new real [n_inputs];
 48 		
 49 	
 50 	for(int j = 0 ; j < n_inputs ; j++) mean[j] = stdv[j] = 0.0;
 51 
 52 	int n_total_patterns = 0;
 53 
 54 	real min_mean = 100.0;
 55 	real max_mean = -100.0;
 56 	real min_stdv = 100.0;
 57 	real max_stdv = -100.0;
 58 	
 59 	for(int i = 0 ; i < filelist.n_files ; i++)
 60 	{
 61 		pf_in = new DiskXFile(filelist.file_names[i], "r");
 62 		if((pf_in == NULL) || (pf_in->is_opened == false))
 63 		{
 64       			error("Opening bindata file %s", filelist.file_names[i]);
 65       
 66       			return 0;
 67 		}
 68 	
 69 		pf_in->read(&n_patterns, sizeof(int), 1);
 70 		pf_in->read(&n_inputs_, sizeof(int), 1);
 71 
 72 		if(n_inputs_ != n_inputs_)
 73 		{
 74 			delete pf_in;
 75 			error("number of inputs (%d != %d) incorrect in file %s", n_inputs_, n_inputs, filelist.file_names[i]);
 76 		}
 77 	
 78 		if(verbose)
 79 		{
 80 			print("Reading bindata file (%s)\n", filelist.file_names[i]);
 81 			print("   n_inputs = %d\n", n_inputs);
 82 			print("   n_patterns = %d\n", n_patterns);  
 83 		}
 84 
 85 		for(int p = 0 ; p < n_patterns ; p++)
 86 		{
 87 			pf_in->read(data, sizeof(real), n_inputs);   
 88 
 89 			real mean_pixel = 0.0;
 90 			real stdv_pixel = 0.0;
 91 			
 92 			for(int j = 0 ; j < n_inputs ; j++)
 93 			{
 94 				real z = data[j];
 95 				mean[j] += z;
 96 				stdv[j] += z*z;
 97 
 98 				mean_pixel += z;
 99 				stdv_pixel += z*z;
100 			}
101 			mean_pixel /= (real) n_inputs;
102 			stdv_pixel /= (real) n_inputs;
103 			stdv_pixel -= mean_pixel*mean_pixel;
104 			if(stdv_pixel <= 0) stdv_pixel = 1.0;
105 			else stdv_pixel = sqrt(stdv_pixel);
106 
107 			if(mean_pixel < min_mean) min_mean = mean_pixel;
108 			if(mean_pixel > max_mean) max_mean = mean_pixel;
109 			if(stdv_pixel < min_stdv) min_stdv = stdv_pixel;
110 			if(stdv_pixel > max_stdv) max_stdv = stdv_pixel;
111 		}
112 
113 		n_total_patterns += n_patterns;
114 
115 		delete pf_in;
116 	}
117 
118 	print("Total number of patterns = %d\n", n_total_patterns);
119 	print("min mean = %g\n", min_mean);
120 	print("max mean = %g\n", max_mean);
121 	print("min stdv = %g\n", min_stdv);
122 	print("max stdv = %g\n", max_stdv);
123 	
124 	for(int j = 0 ; j < n_inputs ; j++)
125 	{
126 		mean[j] /= (real) n_total_patterns;
127 		stdv[j] /= (real) n_total_patterns;
128 		stdv[j] -= mean[j]*mean[j];
129 		if(stdv[j] <= 0)
130 		{
131 			warning("input column %d has a null stdv. Replaced by 1.", j);
132 			stdv[j] = 1.0;
133 		}
134 		else stdv[j] = sqrt(stdv[j]);
135 	}
136 
137 	if(savemeanaspgm)
138 	{
139 		if(width * height == n_inputs)
140 		{
141 			ImageGray *grayimage = new ImageGray(width, height);
142 
143 			grayimage->copyFrom(width, height, mean, "float", 1.0);
144 			grayimage->save("mean.pgm");
145 			
146 			delete grayimage;
147 		}
148 		else warning("width (%d) x height (%d) != %d, impossible to save the mean as pgm", width, height, n_inputs);
149 	}
150 	
151 	delete [] data;
152 	delete [] mean;
153 	delete [] stdv;
154 
155 	return 0;
156 }