Simple Statistics and Statistical Functions

The functions listed here are defined in simplestat.c and statfuncs.c. They are declared in cfsemshare.h, which includes mrcslice.h and is included by b3dutil.h.

Functions for Simple Statistics


void avgSD(float *x, int n, float *avg, float *sd, float *sem)
void avgsd(float *x, int *n, float *avg, float *sd, float *sem)
void sumsToAvgSD(float sx, float sxsq, int n, float *avg, float *sd)
void sums_to_avgsd(float *sx, float *sxsq, int *n, float *avg, float *sd)
void sumsToAvgSDdbl(double sx8, double sxsq8, int n1, int n2, float *avg, float *sd)
void sums_to_avgsd8(double *sx8, double *sxsq8, int *n1, int *n2, float *avg, float *sd)
void sumsToAvgSDallDbl(double sx8, double sxsq8, int n1, int n2, double *avg, double *sd)
void sumstoavgsdalldbl(double *sx8, double *sxsq8, int *n1, int *n2, double *avg, double *sd)
void arrayMinMaxMean(float *array, int nx, int ny, int ix0, int ix1, int iy0, int iy1, float *dmin, float *dmax, float *dmean)
float imageSubareaMean(void *array, int type, int nxDim, int ix0, int ix1, int iy0, int iy1)
void iclden(float *array, int *nx, int *ny, int *ix0, int *ix1, int *iy0, int *iy1, float *dmin, float *dmax, float *dmean)
void arrayMinMaxMeanSd(float *array, int nx, int ny, int ix0, int ix1, int iy0, int iy1, float *dmin, float *dmax, double *sumDbl, double *sumSqDbl, float *avg, float *SD)
void iclavgsd(float *array, int *nx, int *ny, int *ix0, int *ix1, int *iy0, int *iy1, float *dmin, float *dmax, double *sumDbl, double *sumSqDbl, float *avg, float *SD)
void scaleArrayForMode(float *array, int nxDim, int mode, int nx1, int nx2, int ny1, int ny2, float *dmin, float *dmax, float *dmean)
void isetdn(float *array, int *nxDim, int *nyDim, int *mode, int *nx1, int *nx2, int *ny1, int *ny2, float *dmin, float *dmax, float *dmean)
void lsFit(float *x, float *y, int num, float *slope, float *intcp, float *ro)
void lsfit(float *x, float *y, int *num, float *slope, float *intcp, float *ro)
void lsFitPred(float *x, float *y, int n, float *slope, float *bint, float *ro, float *sa, float *sb, float *se, float xpred, float *ypred, float *prederr)
void lsfitpred(float *x, float *y, int *n, float *slope, float *bint, float *ro, float *sa, float *sb, float *se, float *xpred, float *ypred, float *prederr)
void lsfits(float *x, float *y, int *n, float *slope, float *bint, float *ro, float *sa, float *sb, float *se)
void lsFit2(float *x1, float *x2, float *y, int n, float *a, float *b, float *c)
void lsfit2(float *x1, float *x2, float *y, int *n, float *a, float *b, float *c)
void lsfit2noc(float *x1, float *x2, float *y, int *n, float *a, float *b)
void lsFit2Pred(float *x1, float *x2, float *y, int n, float *a, float *b, float *c, float x1pred, float x2pred, float *ypred, float *prederr)
void lsfit2pred(float *x1, float *x2, float *y, int *n, float *a, float *b, float *c, float *x1pred, float *x2pred, float *ypred, float *prederr)
void lsFit3(float *x1, float *x2, float *x3, float *y, int n, float *a1, float *a2, float *a3, float *c)
void lsfit3(float *x1, float *x2, float *x3, float *y, int *n, float *a1, float *a2, float *a3, float *c)
void eigenSort(double *val, double *vec, int n, int rowStride, int colStride, int useAbs)

Statistical Functions


double tValue(double signif, int ndf)
double dtvalue(double *signif, int *ndf)
double fValue(double signif, int ndf1, int ndf2)
double dfvalue(double *signif, int *ndf1, int *ndf2)
double errFunc(double x)
double errfunc(double *x)
double incompBeta(double a, double b, double x)
double incompbeta(double *a, double *b, double *x)
double betaFunc(double p, double q)
double lnGamma(double x)
float gaussianDeviate(int seed)
void gaussiandeviate(float *value, int *seed)
void twoGaussianDeviates(float *value1, float *value2, int *pseudo)

Functions for Simple Statistics

void avgSD(float *x, int n, float *avg, float *sd, float *sem)

Calculates the mean avg, standard deviation sd, and standard error of mean sem, from the n values in array x.  Callable from Fortran by the same name.

void avgsd(float *x, int *n, float *avg, float *sd, float *sem)

Fortran wrapper for avgSD

void sumsToAvgSD(float sx, float sxsq, int n, float *avg, float *sd)

Computes a mean avg and standard deviation sd from the sum of values sx, sum of squares sxsq, and number of values n. It will not generate any division by 0 errors.  Callable from Fortran by the same name.

void sums_to_avgsd(float *sx, float *sxsq, int *n, float *avg, float *sd)

Fortran wrapper for sumsToAvgSD

void sumsToAvgSDdbl(double sx8, double sxsq8, int n1, int n2, float *avg, float *sd)

Computes a mean avg and standard deviation sd from the sum of values sx8, sum of squares sxsq8, and number of values n1 * n2, where the number of values can be greater than 2**31. It will not generate any division by 0 errors.

void sums_to_avgsd8(double *sx8, double *sxsq8, int *n1, int *n2, float *avg, float *sd)

Fortran wrapper for sumsToAvgSDdbl; use real*8 for sx8, sxsq8.

void sumsToAvgSDallDbl(double sx8, double sxsq8, int n1, int n2, double *avg, double *sd)

Like sumsToAvgSDdbl, except that it returns the avg and sd as doubles.

void sumstoavgsdalldbl(double *sx8, double *sxsq8, int *n1, int *n2, double *avg, double *sd)

Fortran wrapper for sumsToAvgSDallDbl; use real*8 for double arguments.

void arrayMinMaxMean(float *array, int nx, int ny, int ix0, int ix1, int iy0, int iy1, float *dmin, float *dmax, float *dmean)

Computes the minimum DMIN, maximum dmax, and mean dmean from data in array, dimensioned nx by ny, for X indices from ix0 to ix1 and Y indices from iy0 to iy1, inclusive (numbered from 0 when calling from C).  For a simple function that works with all data types, see fullArrayMinMaxMean.

float imageSubareaMean(void *array, int type, int nxDim, int ix0, int ix1, int iy0, int iy1)

Returns mean from data of MRC_MODE... type in array, with X dimension nxDim, for X indices from ix0 to ix1 and Y indices from iy0 to iy1, inclusive.  Returns 0 for an unsupported data type (not float, short, unsigned short, or byte).

void iclden(float *array, int *nx, int *ny, int *ix0, int *ix1, int *iy0, int *iy1, float *dmin, float *dmax, float *dmean)

Fortran wrapper for arrayMinMaxMean with ix0, ix1, iy0, and iy1 numbered from 1 instead of 0.

void arrayMinMaxMeanSd(float *array, int nx, int ny, int ix0, int ix1, int iy0, int iy1, float *dmin, float *dmax, double *sumDbl, double *sumSqDbl, float *avg, float *SD)

Computes the minimum dmin, maximum dmax, mean avg, and standard deviation SD from data in array, dimensioned nx by ny, for X indices from ix0 to ix1 and Y indices from iy0 to iy1, inclusive (numbered from 0 when calling from C).  It also returns the sum in sumDbl and sum of squares in sumSqDbl.  It makes a rough estimate of the image mean and sums the deviations from that estimate, then computes the SD from that sum and the actual mean.  This gives an accurate SD value when the SD is much smaller than the mean.  The returned sum of squares is then recomputed from the SD and mean.

void iclavgsd(float *array, int *nx, int *ny, int *ix0, int *ix1, int *iy0, int *iy1, float *dmin, float *dmax, double *sumDbl, double *sumSqDbl, float *avg, float *SD)

Fortran wrapper for arrayMinMaxMeanSd with ix0, ix1, iy0, and iy1 numbered from 1 instead of 0.

void scaleArrayForMode(float *array, int nxDim, int mode, int nx1, int nx2, int ny1, int ny2, float *dmin, float *dmax, float *dmean)

Scales densities in a subarea of array appropriately for the given mode, which must be 0, 1, 6, or 2, and returns min, max, and mean densities in dmin, dmax, and dmean.  The values will range from 0 to 255 or to maximum value for modes 1 and 6.  The X dimension of the array is in nxDim and the starting and ending coordinates of the subarea are in nx1, nx2, ny1, and ny2, numbered from zero.

void isetdn(float *array, int *nxDim, int *nyDim, int *mode, int *nx1, int *nx2, int *ny1, int *ny2, float *dmin, float *dmax, float *dmean)

Fortran wrapper to scaleArrayForMode with coordinates numbered from 1 for consistency with iclden.

void lsFit(float *x, float *y, int num, float *slope, float *intcp, float *ro)

Fits a straight line to the n points in arrays x and y by the method of least squares, returning slope, intercept bint, correlation coeficient ro.

void lsfit(float *x, float *y, int *num, float *slope, float *intcp, float *ro)

Fortran wrapper for lsFit

void lsFitPred(float *x, float *y, int n, float *slope, float *bint, float *ro, float *sa, float *sb, float *se, float xpred, float *ypred, float *prederr)

Fits a straight line to the n points in arrays x and y by the method of least squares, returning slope, intercept bint, correlation coeficient ro, standard errors of the estimate se, the slope sb, and the intercept sa, and for one X value xpred, it returns the predicted value ypred and the standard error of the prediction prederr.

void lsfitpred(float *x, float *y, int *n, float *slope, float *bint, float *ro, float *sa, float *sb, float *se, float *xpred, float *ypred, float *prederr)

Fortran wrapper for lsFitPred

void lsfits(float *x, float *y, int *n, float *slope, float *bint, float *ro, float *sa, float *sb, float *se)

Fortran wrapper for lsFitPred that returns the standard errors and omits the prediction

void lsFit2(float *x1, float *x2, float *y, int n, float *a, float *b, float *c)

Does a linear regression fit of the n values in the array y to the values in the arrays x1 and x2, namely to the equation
   y = a * x1 + b * x2 + c   
It returns the coefficients a and c, and the intercept c. If c is NULL it fits instead to
   y = a * x1 + b * x2

void lsfit2(float *x1, float *x2, float *y, int *n, float *a, float *b, float *c)

Fortran wrapper for lsFit2

void lsfit2noc(float *x1, float *x2, float *y, int *n, float *a, float *b)

Fortran wrapper for calling lsFit2 with c NULL.

void lsFit2Pred(float *x1, float *x2, float *y, int n, float *a, float *b, float *c, float x1pred, float x2pred, float *ypred, float *prederr)

Does a linear regression fit of the n values in the array y to the values in the arrays x1 and x2, namely to the equation
   y = a * x1 + b * x2 + c   
It returns the coefficients a and b, and the intercept c, but if c is NULL it fits instead to
   y = a * x1 + b * x2   
For one value of x1 and x2 given by x1pred and x2pred, it returns the value predicted by the equation in ypred and the standard error of the prediction in prederr.

void lsfit2pred(float *x1, float *x2, float *y, int *n, float *a, float *b, float *c, float *x1pred, float *x2pred, float *ypred, float *prederr)

Fortran wrapper for lsFit2Pred

void lsFit3(float *x1, float *x2, float *x3, float *y, int n, float *a1, float *a2, float *a3, float *c)

Does a linear regression fit of the n values in the array y to the values in the arrays x1, x2, and x3, namely, to the equation
   y = a1 * x1 + a2 * x2 + a3 * x3 + c   
It returns the coefficients a1, a2, a3 and the intercept c.

void lsfit3(float *x1, float *x2, float *x3, float *y, int *n, float *a1, float *a2, float *a3, float *c)

Fortran wrapper for lsFit3

void eigenSort(double *val, double *vec, int n, int rowStride, int colStride, int useAbs)

Sorts eigenvalues in val into descending order and rearranges their eigenvectors in vec so that they still correspond.  n is the number of dimensions, rowStride is the index step between succcessive elements of an eigenvector, and colStride is the index step between successive eigenvectors.  Set useAbs nonzero to sort on the absolute value of the eigenvalues.  For eigenvectors from LAPACK, set rowStride to 1 and colStride to the leading dimension of the vector array; for eigenvectors from dsyevh3 and associated routines, set rowStride to n and colStride to 1.

Statistical Functions

double tValue(double signif, int ndf)

Returns the t-value that gives the significance level signif with the number of degrees of freedom ndf, where signif should be between 0.5 and 1.0.

double dtvalue(double *signif, int *ndf)

Fortran wrapper for tValue

double fValue(double signif, int ndf1, int ndf2)

Returns the F-value that gives the cumulative probability value signif with the number of degrees of freedom ndf1 and ndf2.

double dfvalue(double *signif, int *ndf1, int *ndf2)

Fortran wrapper for fValue

double errFunc(double x)

Returns the value of the error function erf() at x.

double errfunc(double *x)

Fortran wrapper for errFunc

double incompBeta(double a, double b, double x)

Computes and returns the incomplete beta function of x for parameters a and b, for 0 <= x <= 1, and a > 0 and b > 0.

double incompbeta(double *a, double *b, double *x)

Fortran wrapper for incompBeta

double betaFunc(double p, double q)

Computes and returns the beta function of p and q, which must be > 0.

double lnGamma(double x)

Computes and returns the natural log of the gamma function of x, which should not equal 0, -1, -2, etc.

float gaussianDeviate(int seed)

Generates and returns a random Gaussian deviate with mean of 0 and variance of 1 using the Box-Mueller transform.  On the first call, or whenever seed does not match the last passed value, seed is used to seed the random number generator.  A value less than or equal to zero will make it get a random seed from the time in seconds.

void gaussiandeviate(float *value, int *seed)

Fortran wrapper for gaussianDeviate

void twoGaussianDeviates(float *value1, float *value2, int *pseudo)

Generates and returns two Gaussian deviates in value1 and value2 with mean of 0 and a variance of 1 using the Box-Mueller transform.  Random values are generated from pseudo with a linear congruential generator with a period of 2
20; thus pseudo should initially be between 0 and 1048575.  If it is 0, it will be initialized from the current time.  Unlike gaussianDeviate, this routine is thread-safe.