#include <string.h>
#include <math.h>
#include "mex.h"

void mexFunction(int nlhs, mxArray *plhs[ ], 
        int nrhs, const mxArray *prhs[ ]) 
{
    int i, j, k, ii, jj, kk; 
    int m, n, h, w, nbins;
    double s, s1, s2;
    int *rcount;
    double *ratio;
    double ratio_ssum;
    int *hI2;
    double *support;
    int *support2;
    double *aux_img;

    if (nrhs != 3) 
        mexErrMsgTxt("The number of input arguments must be three.");
    
    nbins = mxGetM(prhs[0]);
    if (nbins == 1)
        nbins = mxGetN(prhs[0]);
    ratio = mxGetPr(prhs[0]);
    
    /* template size (hI) */
    w = mxGetM(prhs[1]);
    h = mxGetN(prhs[1]);
            
    /* test image size (hI2) */
    m = mxGetM(prhs[2]);
    n = mxGetN(prhs[2]);
    hI2 = (int *) mxGetData(prhs[2]);
    
    /* Create the support map */
    plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL); /* initialized 0 */
    support = mxGetPr(plhs[0]);

    support2 = (int *) mxCalloc(m*n, sizeof(int));
    memset(support2, 0, m*n*sizeof(int));

    rcount = (int *) mxCalloc(nbins, sizeof(int));
        
    for (j = 0; j < m-w; j++) {
        /* initialization for first patch */
        k = 0; 
        memset(rcount, 0, nbins*sizeof(int));        
        for (jj=j; jj<j+w; jj++) {
            for (kk=k; kk<k+h; kk++) {
                i = hI2[jj + kk*m] - 1;
                rcount[i]++;
            }
        }
        s = 0;
        for (ii = 0; ii < nbins; ii++) {
            s += rcount[ii]*rcount[ii];
        }        
        support2[j + k*m] = s;
                
        for (k = 1; k < n-h; k++) {
            for (jj=j; jj<j+w; jj++) {
                i = hI2[jj + (k-1)*m] - 1;
                s = s - 2*rcount[i] + 1;
                rcount[i]--;
            }
            for (jj=j; jj<j+w; jj++) {
                i = hI2[jj + (k+h-1)*m] - 1;
                rcount[i]++;
                s = s + 2*rcount[i] - 1;
            }
            support2[j + k*m] = s;            
        } 
                      
    }

   
    aux_img = (double *) mxCalloc(m*n, sizeof(double));  
    memset(aux_img, 0, m*n*sizeof(double));
    for (j = 0; j < m; j++) {
        for (k = 0; k < n; k++) {
            aux_img[j+k*m] = ratio[hI2[j+k*m]-1];
        }
    }
    for (j =0; j < m; j++) {
        for (k = 1; k < n; k++) {
            aux_img[j + k*m] = aux_img[j + (k-1)*m] + aux_img[j + k*m];
        }                       
    }
    for (j = 1; j < m; j++) {
        for (k = 0; k < n; k++) {
            aux_img[j + k*m] = aux_img[j -1 + k*m] + aux_img[j + k*m];
        }                       
    }
    
    ratio_ssum = 0;
    for (i=0; i<nbins; i++) {
        ratio_ssum += ratio[i]*ratio[i];
    }    
    for (j = 10; j < m-w-10; j++) {
        for (k = 10; k < n-h-10; k++) {
            s1 = aux_img[j + k*m] + aux_img[j+w-1 + (k+h-1)*m]
                    - aux_img[j + (k+h-1)*m]
                    - aux_img[j+w-1 + k*m];
            s2 = (double) support2[j+k*m];
            support[j+(w/2) + (k+h/2)*m] = 
                    1 - 0.5*(s2/(w*w*h*h)-2*s1/(w*h)+ratio_ssum);
        }                       
    }
    
    
    mxFree(rcount);
    mxFree(support2);
    mxFree(aux_img);
}         
