#include <string.h>
#include <math.h>
#include "mex.h"

void mexFunction(int nlhs, mxArray *plhs[ ], 
        int nrhs, const mxArray *prhs[ ]) 
{
    int i, j, k, ii, jj, kk; 
    int m, n, h, w, nbins;
    double *dsqrt, *sqrt_table;
    double s;
    int *rcount;
    double *p;
    int *hI2;
    double *support;

    if (nrhs != 3) 
        mexErrMsgTxt("The number of input arguments must be three.");

    
    nbins = mxGetM(prhs[0]);
    if (nbins == 1)
        nbins = mxGetN(prhs[0]);
    p = mxGetPr(prhs[0]);
    
    /* template size (hI) */
    w = mxGetM(prhs[1]);
    h = mxGetN(prhs[1]);
            
    /* test image size (hI2) */
    m = mxGetM(prhs[2]);
    n = mxGetN(prhs[2]);
    hI2 = (int *) mxGetData(prhs[2]);
    
    /* Create the support map */
    plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL); /* initialized 0 */
    support = mxGetPr(plhs[0]);

    /* Create the lookup table */
    sqrt_table = (double *) mxCalloc(h*w+1, sizeof(double));
    dsqrt = (double *) mxCalloc(h*w+1, sizeof(double));
    for (ii = 0; ii <= w*h; ii++) {
        sqrt_table[ii] = sqrt((double)ii);        
    }
    for (ii = 0; ii < w*h; ii++) {
        dsqrt[ii] = sqrt_table[ii+1]-sqrt_table[ii];        
    }    
    
    rcount = (int *) mxCalloc(nbins, sizeof(int));
    
    for (j = 10; j < m-w-10; j++) {
        /* initialization for first patch */
        k = 10; 
        memset(rcount, 0, nbins*sizeof(int));
        
        for (jj=j; jj<j+w; jj++) {
            for (kk=k; kk<k+h; kk++) {
                i = hI2[jj + kk*m] - 1;
                rcount[i]++;
            }
        }
        s = 0;
        for (ii = 0; ii < nbins; ii++) {
            s += p[ii]*sqrt_table[rcount[ii]];
        }        
        support[j+(w/2) + (k + (h/2))*m] = s;
        
        
        for (k = 11; k < n-h-10; k++) {
            for (jj=j; jj<j+w; jj++) {
                i = hI2[jj + (k-1)*m] - 1;
                s = s - p[i]*dsqrt[rcount[i]];
                rcount[i]--;
            }
            for (jj=j; jj<j+w; jj++) {
                i = hI2[jj + (k+h-1)*m] - 1;
                rcount[i]++;
                s = s + p[i]*dsqrt[rcount[i]];
            }
            support[j+(w/2) + (k + (h/2))*m] = s;            
        } 
                      
    }
    
    s = sqrt_table[w*h];
    for (j = 0; j < m*n; j++) {
        support[j] = support[j]/s;
    }
    
    mxFree(rcount);
    mxFree(sqrt_table);
    mxFree(dsqrt);
}         
