/* * framealign.h -- header file for framealign * */ #ifndef FRAMEALIGN_H #define FRAMEALIGN_H // Define macro for import of gpuframe functions under Windows #ifndef DLL_EX_IM #if defined(_WIN32) && defined(DELAY_LOAD_FGPU) #define DLL_EX_IM _declspec(dllimport) #else #define DLL_EX_IM #endif #endif #include #include #include "CorrectDefects.h" #include "gpuframe.h" #define MAX_ALL_VS_ALL 100 #define MAX_FILTERS 6 #define GPU_FOR_SUMMING (1) #define GPU_DO_EVEN_ODD (1 << 1) #define GPU_FOR_ALIGNING (1 << 2) #define GPU_DO_NOISE_TAPER (1 << 3) #define GPU_DO_BIN_PAD (1 << 4) #define STACK_FULL_ON_GPU (1 << 5) #define GPU_DO_GAIN_NORM (1 << 6) #define GPU_CORRECT_DEFECTS (1 << 7) #define GPU_DO_PREPROCESS (1 << 8) #define GPU_STACK_LIMITED (1 << 9) #define GPU_DO_UNWGT_SUM (1 << 10) #define GPU_AVG_SUPER_2X (1 << 11) #define GPU_AVG_SUPER_4X (1 << 12) #define GPU_RUN_SHRMEMFRAME (1 << 15) #define GPU_STACK_LIM_SHIFT 20 #define GPU_STACK_LIM_MASK 0xFFF #define GPU_NUMBER_SHIFT 16 // This is used by SerialEM -> SerialEMCCD #define GPU_NUMBER_MASK 0x7 // in calling SetupFrameAligning class FrameAlign { public: FrameAlign(void); ~FrameAlign(void); int initialize(int binSum, int binAlign, float trimFrac, int numAllVsAll, int cumAlignAtEnd, int useHybrid, int deferSum, int groupSize, int nx, int ny, float padFrac, float taperFrac, int antiFiltType, float radius1, float *radius2, float sigma1, float *sigma2, int numFilters, int maxShift, float kFactor, float maxMaxWeight, int summingMode, int expectedZ, int makeUnwgtSum, int gpuFlags, int debug); int setupDoseWeighting(float priorDose, float *frameDoses, float pixelSize, float critScale, float aFac, float bFac, float cFac, float *reweightFilt, int &filtSize); int gpuAvailable(int nGPU, float *memory, int debug); int nextFrame(void *frame, int type, float *gainRef, int nxGain, int nyGain, void *darkRef, float truncLimit, CameraDefects *defects, int camSizeX, int camSizeY, int defBin, float shiftX, float shiftY); int finishAlignAndSum(float refineRadius2, float refineSigma2, float iterCrit, int groupRefine, int doSpline, float *alisum, float *xShifts, float *yShifts, float *rawXshifts, float *rawYshifts, float *ringCorrs, float deltaR, int &bestFilt, float *smoothDist, float *rawDist, float *resMean, float *resSD, float *meanResMax, float *maxResMax, float *meanRawMax, float *maxRawMax, float *evenSum = NULL, float *oddSum = NULL); int getUnweightedSum(float *nonDWsum); void cleanup(void); int splineSmooth(float *xShifts, float *yShifts, int numShifts, float *smoothedX, float *smoothedY, float &splineDist); void setPrintFunc(CharArgType func); void analyzeFRCcrossings(float *ringCorrs, float frcDelta, float &halfCross, float &quartCross, float &eighthCross, float &halfNyq); float *getFullWorkArray() {return mWorkFullSize;}; static void getPadSizesBytes(int nx, int ny, float fullTaperFrac, int sumBin, int alignBin, float &fullPadSize, float &sumPadSize, float &alignPadSize); static void gpuMemoryNeeds(float fullPadSize, float sumPadSize, float alignPadSize, int numAllVsAll, int nzAlign, int refineAtEnd, int groupSize, float &needForGpuSum, float &needForGpuAli); static float totalMemoryNeeds(float fullPadSize, int fullDataSize, float sumPadSize, float alignPadSize, int numAllVsAll, int nzAlign, int refineAtEnd, int numBinTests, int numFiltTests, int hybridShifts, int groupSize, int doSpline, int gpuFlags, int deferSum, int testMode, int startAssess, bool &sumInOnePass, int &numHoldFull); static float findPreprocPadGpuFlags(int unpaddedX, int unpaddedY, int dataType, int binning, bool hasGain, bool hasDefect, bool hasTrunc, int numExpected, float freeMem, float stackMargin, int inFlags, int &outFlags); int setTruncationLimit(void *array, int nx, int ny, int useMode, float truncLimit, float &truncUse); int getPaddedSumSize() {return (mSumXpad + 2) * mSumYpad;}; private: void doRegression(bool doRobust, int filt, int fitInd, std::set &dropSet); int alignTwoFrames(int refInd, int binInd, float nearXshift, float nearYshift, int filtInd, float &xShift, float &yShift, bool filterSubarea, bool dump); int leastCommonMultiple(int num1, int num2); int addToSums(float *fullArr, int sumInd, int binInd, int frameNum, int filtInd = -1); void findAllVsAllAlignment(bool justForLimits); void adjustAndPushShifts(int topInd, int filt, int useFilt); void wrapImage(float *bufFrom, int nxDimFrom, int nxFrom, int nyFrom, float *bufTo, int nxDimTo, int nxTo, int nyTo, int xOffset, int yOffset); int testAndCleanup(bool failed); float smoothedTotalDistance(float *xShifts, float *yShifts, int numShifts, float &rawDist, float *xSmoothed = NULL, float *ySmoothed = NULL, double *variance = NULL); float surroundingMean(void *frame, int type, float truncLimit, int ix, int iy); void frameShiftFromGroups(int frame, int filt, float &shiftX, float &shiftY); void getAllFrameShifts(FloatVec &frameXshift, FloatVec &frameYshift, int useFilt); int prepareToFetchAlignFFTs(int aliFrameInd); void filterAndAddToSum(float *fft, float *array, int nx, int ny, float *ctf, float delta); void preProcessFrame(void *frame, void *darkRef, int defBin, float *fOut); static bool preprocPadGpuMemoryFits(int unpaddedX, int unpaddedY, int dataType, int binning, bool hasGain, bool hasDefect, bool hasTrunc, bool doPreProc, bool doNoise, bool doBinPad, float freeMem, float &needsMem); int recoverGpuFullStack(bool stacking, float **binArr); int recoverGpuAlignFFTs(bool saving, int aliFrameInd, float *alignSum, float *workArr, float **binArr, bool stacking); int recoverFromSummingFailure(float **fullArr, int frameInd, int sumInd); void cancelInitialStepsOnGPU(); CharArgType mPrintFunc; float *mFullEvenSum; float *mFullOddSum; float *mUnweightSum; float *mAlignSum; float *mWorkFullSize; std::vectormSavedFullSize; // Full noise taper padded arrays saved for sum IntVec mSavedFullFrameNum; // List of frames saved here std::vectormSavedBinPad; // Stack of binned, padded arrays for aligning std::vectormSavedGroups; // Stack of groups sums for aligning float *mWorkBinPad; float *mCorrBinPad; float *mCorrFiltTemp; float *mReduceTemp; float *mShiftTemp; unsigned char **mLinePtrs; float *mFitMat; float *mFitWork; float *mSubFiltMask[MAX_FILTERS]; float *mFullFiltMask; float *mTempSubFilt; float *mWrapTemp; FloatVec mXshifts[MAX_FILTERS + 1]; FloatVec mYshifts[MAX_FILTERS + 1]; FloatVec mXallShifts[MAX_FILTERS]; FloatVec mYallShifts[MAX_FILTERS]; float mXfitShifts[MAX_FILTERS + 1][MAX_ALL_VS_ALL]; float mYfitShifts[MAX_FILTERS + 1][MAX_ALL_VS_ALL]; float mXnearShifts[MAX_ALL_VS_ALL]; float mYnearShifts[MAX_ALL_VS_ALL]; float mLastXfit[MAX_FILTERS + 1][MAX_ALL_VS_ALL]; float mLastYfit[MAX_FILTERS + 1][MAX_ALL_VS_ALL]; float mCumulXdiff[MAX_FILTERS + 1], mCumulYdiff[MAX_FILTERS + 1]; int mNumAsBestFilt[MAX_FILTERS]; float mRadius2[MAX_FILTERS]; float mSigma2[MAX_FILTERS]; int mGpuFlags; int mFlagsForUnpadCall; // Save the flags for consistency in the two calls int mEvenOddForSumSetup; // Similarly here for setupSumming calls int mNxGain, mNyGain; // Saved size of last gain reference float *mGainRef; // Pointer to last gain ref int mCamSizeX, mCamSizeY; // Camera size non-zero for defect correction CameraDefects *mCamDefects; // Pointer to last defects structure float mTruncLimit; // Truncation limit int mDefectBin; // Binning of defects int mNoiseLength; // Length for noise-taper-pad int mFrameType; // Original type of frames int mStackType; // Type of images saved in full image stack int mNx, mNy; // Original size of image int mAntiFiltType; // Type of filter for antialias reduction int mXstart, mXend; // Starting and ending coordinates of region to trim int mYstart, mYend; int mFullXpad, mFullYpad; // Padded size of full images int mSumXpad, mSumYpad; // Padded size of sum(s) int mAlignXpad, mAlignYpad; // Padded size of align images int mAlignPix, mAlignBytes; // Number of pixels/bytes in align images including + 2 int mXreduceTemp, mYreduceTemp; int mAliFiltSize; float mTrimFrac, mTaperFrac; // Trim fraction and taper fraction for align int mSummingMode; // 1 for align only, -1 for sum only, 0 for both int mUseHybrid; int mNumFrames; int mNumFullSaved; // Number of full frames saved in CPU stack int mBinSum, mBinAlign; int mNumAllVsAll; int mNumFilters; int mBestFilt; float mMaxMaxWeight; int mMaxShift; int mDumpInd; double mWallFullFFT, mWallBinPad, mWallBinFFT, mWallReduce, mWallShift, mWallStart; double mWallFilter, mWallConjProd, mWallPreProc, mWallNoise; int mDebug; int mMakeUnwgtSum; // Flag to make a non dose-weighted sum bool mUnwgtOnGpu; // Flag to make it on the GPU bool mDumpCorrs; bool mDumpRefCorrs; bool mDumpEvenOdd; bool mPickedBestFilt; bool mDeferSumming; // Flag that summing is done entirely at end bool mGpuAligning; // Flag that aligning is on the GPU bool mGpuSumming; // Flag that summing is on the GPU bool mNoisePadOnGpu; // Flag to do noise pad on GPU bool mBinPadOnGpu; // Flag to do align bin pad on GPU bool mStackUnpadOnGpu; // Flag to stack unpadded normed array on GPU int mGpuStackLimit; // Limit to # to stack on GPU int mNumStackedOnGpu; // Number currently stacked on GPU int mGroupSize, mGroupSizeInitial; float mKfactor; int mCumAlignAtEnd; float mPickRatioCrit; float mPickDiffCrit; int mFailedOftenCrit; int mNumFits; bool mReportTimes; float mResMeanSum[MAX_FILTERS+ 1], mResSDsum[MAX_FILTERS+ 1]; float mResMaxSum[MAX_FILTERS+ 1], mMaxResMax[MAX_FILTERS+ 1]; float mMaxRawMax[MAX_FILTERS+ 1], mRawMaxSum[MAX_FILTERS+ 1]; float mPredMeanSum[MAX_FILTERS + 1]; float mFiltFunc[8193], mFiltDelta; int mGpuLibLoaded; // If FrameGPU loaded: -1 untested, 0 not available, 1 yes, int mNumExpectedFrames; bool mDoingDoseWeighting; // Overall flag for dose weighting FloatVec mFrameDoses; // List of doses per frame FloatVec mDoseWgtFilter; // Current filter float mPriorDoseCum; // Accumulated prior dose float mPixelSize; // Pixel size for these calculations float mCritDoseScale; // Scaling factor (0.8 for 200 KV, etc) float mCritDoseAfac, mCritDoseBfac, mCritDoseCfac; // Parameters float mDWFdelta; // Scaling from pixel to frequency in /pixel FloatVec mReweightFilt; // Filter to multiple by for reweighting }; #endif