/*=================================================================
 *
 *  RankSum.h
 *  Author: Andrew Magis
 *  Calculate Wilcoxon rank sum test on matrix of gene expression data
 *
 * Inputs: expression data, list of class labels, abs_value toggle.  The algorithm
 * assumes that class label 0 is normal.  If abs_value toggle == 1, the absolute
 * value of the test statistic is taken before they are sorted.
 * Outputs: unsortd list of scores, sorted list of directional wilcoxon scores, 
 * and indices into original gene list
 *=================================================================*/

#ifndef _RANKSUM_H
#define	_RANKSUM_H
 
#include <math.h>
#include <vector>
#include <algorithm>

std::vector<float*> RankSum(float *data, unsigned int &m1, unsigned int &n1, std::vector<float> classes, bool abs_value) { 
		
	if (n1 != classes.size()) {
		throw GeneralException("Number of samples != number of class labels\n", "RankSum");
	}
				
	// Create a pointer to the output data
	float *unsorted = new float[m1];
	float *wilcox = new float[m1];
	int *index = new int[m1];
	int n2 = classes.size();

	//Calculate the mean and standard deviation of the ranked distributions
	float na = 0.f, nb = 0.f;
	for (int i = 0; i < n2; i++) {
		nb += classes[i];
	}
	na = n2 - nb;
	//float mua = (na*(n2+1.f)) / 2.f;
	float mub = (nb*(n2+1.f)) / 2.f;
	float sigma = sqrt((na*nb*(n2+1.f)) / 12.f);
	
	std::vector< std::pair<float, int> > wilcox_scores;
	for (int i = 0; i < m1; i++) {
	
		//Define vectors for input and output of this data
		std::vector<float> col(n1, 0);
		std::vector<float> ranks(n1, 0);	
		
		//For each sample of this gene
		for (int j = 0; j < n1; j++) {
			col[j] = data[m1*j+i];
		}
				
		//Pass the two vectors to tiedrank to calculate ranks
		tiedrank(col, ranks); 
		
		//Now sum the ranks for each class
		float Ta = 0.f, Tb = 0.f;
		for (int j = 0; j < n2; j++) {
			if (classes[j] == 0) {
				Ta += ranks[j];
			} else {
				Tb += ranks[j];
			}
		}
				
		//Finally calculate the test statistic for this gene
		//do not calculate test statistic for normal, only for other
		/*
		float za = 0.f;
		if (Ta > mua) {
			za = (Ta - mua - 0.5f) / sigma;
		} else if (Ta < mua) {
			za = (Ta - mua + 0.5f) / sigma;
		}
		*/
		
		float zb = 0.f;
		if (Tb > mub) {
			zb = (Tb - mub - 0.5f) / sigma;
		} else if (Tb < mub) {
			zb = (Tb - mub + 0.5f) / sigma;
		}	
		
		//If abs_value toggle is set, do it
		if (abs_value == 1) {
			zb = ABSMACRO(zb);
		}
			
		//Add this to the vector of scores along with the indices into the genes
		wilcox_scores.push_back(std::pair<float, int>(zb, i));
		
	}
	
	if (wilcox_scores.size() != m1) {
		throw GeneralException("Error! Incorrect number of wilcox scores calculated\n", "RankSum");
	}
	
	//Copy the data back 
	for (int i = 0; i < m1; i++) {
		unsorted[i] = wilcox_scores[i].first;
	}
	
	//At the end of the primary loop, sort the zscores
	std::sort(wilcox_scores.rbegin(), wilcox_scores.rend());
	
	//Copy the data back into the output arrays
	for (int i = 0; i < m1; i++) {
		wilcox[i] = wilcox_scores[i].first;
		index[i] = wilcox_scores[i].second;
	}
	
	std::vector<float*> pointers;
	pointers.push_back(unsorted);
	pointers.push_back(wilcox);
	pointers.push_back((float*)index);
	return pointers;
	
}

#endif
