/*=================================================================
 *
 *  FileParser.h
 *  Author: Andrew Magis
 *  Loads comma, tab, and space-delimited lists into a data structure and 
 *  allows for manipulation of the data
 *
 *=================================================================*/

#ifndef _FILEPARSER_H
#define	_FILEPARSER_H

#include "Exception.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include "TiedRank.h"

template <class U, class V>
U convert(const V &t) {
	std::stringstream stream;
	stream << t; 
	U converted; 
	stream >> converted; 
	return converted;
}

template <typename T = string>
class FileParser {

public:
    FileParser();
    FileParser(string _filename, bool row_header = false, bool col_header = false);
    FileParser(const FileParser& orig);
    virtual ~FileParser();
    string Class() { return "FileParser"; };
    void Print();

    std::vector<T> GetRow(int index);
    std::vector<T> GetColumn(int index);
    std::vector<string> GetRowHeader();
    std::vector<string> GetColHeader();
    int Rows();
    int Cols();
	
	void ConvertToRanks();
	T* GetData();
	int GetClassNum(int label);
	T* GetClassColOrder(int label);
	T* GetClassColOrder(int label, int num, int *indices);

private:
    string filename;
    std::ifstream infile;
    void ParseLine(string line);
	void Tokenize(const string& str, std::vector<string>& tokens, const string& delimiters);
	std::string trim_right(const std::string &source , const std::string& t = " ");
	std::string trim_left( const std::string &source, const std::string& t = " ");
	std::string trim(const std::string& source, const std::string& t = " ");

    //vector of vectors to store the data in this file
    std::vector< std::vector<T> > data;
    std::vector<string> cheader, rheader;
    int num_cols;
    bool row_header, col_header;

};

template <typename T>
FileParser<T>::FileParser() {
    throw GeneralException("No filename provided in constructor!", this->Class());
}

template <typename T>
FileParser<T>::FileParser(const FileParser& orig)
: filename(orig.filename), data(orig.data), num_cols(orig.num_cols), 
  cheader(orig.cheader), rheader(orig.rheader), row_header(orig.row_header), col_header(orig.col_header)
{}

template <typename T>
FileParser<T>::FileParser(string _filename, bool _row_header, bool _col_header)
: filename(_filename), num_cols(-1), row_header(_row_header), col_header(_col_header) {

    string line;

    infile.open(filename.c_str());
    	//The following ine breaks the Mac OSX version for some reason.
	//infile.exceptions(std::ifstream::badbit);
    if (infile.is_open()) {
	while (std::getline(infile, line)) {
            ParseLine(line);
        }
        infile.close();

    } else {
        throw GeneralException("File not found", filename, this->Class());
    }

	if (row_header)
		num_cols--;
}

template <typename T>
FileParser<T>::~FileParser() {
}

template <typename T>
void FileParser<T>::ParseLine(string line) {

    //Take away whitespace from either end
    trim(line);

    //Now we have a line. If it is empty, ignore it
    if (line.length() == 0) return;

    //if it begins with #, ignore it
    if (line[0] == '#') return;

    //If it begins with //, ignore it
    if (line.substr(0, 2).compare("//") == 0) return;

    std::vector<string> temp;

    //Otherwise tokenize the line for any whitespace or commas
	Tokenize(line, temp, "\t,");
	
    //Get the size of the first line. This should be consistent across all lines
    std::vector<T> cast;
    if (num_cols == -1) {
        num_cols = temp.size();

        //Add the first row to the column header vector
        if (col_header) {
            for (std::vector<string>::iterator it = temp.begin(); it != temp.end(); it++) {
                cheader.push_back(trim((*it)));
            }
        } else {
			//Create a temporary column header
            for (int i = 0; i < num_cols; i++) {
                string header = "sample" + convert<std::string, int>(i);
                cheader.push_back(header);
            }

            //For each element in temp, cast it to whatever type this class is
            for (std::vector<string>::iterator it = temp.begin(); it != temp.end(); it++) {

                if (row_header) {

                    //If we are at the first element
                    if (it == temp.begin()) {
                        rheader.push_back(trim((*it)));
                    } else {
                        cast.push_back(convert<T, std::string>(trim((*it))));
                    }

                } else {

                    if (it == temp.begin()) {
                        string header = "probe" + convert<std::string, int>(data.size()+1);
                        rheader.push_back(header);
                    }
                    cast.push_back(convert<T, std::string>(trim((*it))));
                }
            }
            data.push_back(cast);

        }

    } else if (num_cols != (int)temp.size()) {
        printf("Length of line %u is %u\n", (unsigned int)data.size(), (unsigned int)temp.size());
        throw GeneralException("Irregularly sized data file", filename, this->Class());

    } else {

        //For each element in temp, cast it to whatever type this class is
        for (std::vector<string>::iterator it = temp.begin(); it != temp.end(); it++) {

            if (row_header) {

                //If we are at the first element
                if (it == temp.begin()) {
                    rheader.push_back(trim((*it)));
                } else {
                    cast.push_back(convert<T, std::string>(trim((*it))));
                }
                
            } else {

                if (it == temp.begin()) {
                    string header = "probe" + convert<std::string, int>(data.size()+1);
                    rheader.push_back(header);
                }
                cast.push_back(convert<T, std::string>(trim((*it))));
            }
        }

        //We can therefore add this to the vector of vectors
        data.push_back(cast);
    }
	
}

template <typename T>
std::vector<T> FileParser<T>::GetRow(int index) {

    //Return the vector for this row
    if ((index >= data.size()) || (index < 0)) {
        throw GeneralException("Row index out of bounds", this->Class());
    }
    return data[index];

}

template <typename T>
std::vector<T> FileParser<T>::GetColumn(int index) {

    //return the vector for this column
    if ((index >= num_cols) || (index < 0)) {
        throw GeneralException("Column index out of bounds", this->Class());
    }

    std::vector<T> column;

    //This is a temporary solution.  I don't know how slow this will be, so for now
    //I will just create a new vector of a column and return it

    //for (std::vector< std::vector<T> >::iterator it = data.begin(); it != data.end(); it++) {
    for (int i = 0; i < data.size(); i++) {
        //column.push_back((*it)[index]);
        column.push_back(data[i][index]);
    }
    return column;
}

template <typename T>
int FileParser<T>::Rows() {
    return data.size();
}

template <typename T>
int FileParser<T>::Cols() {
    return num_cols;
}

template <typename T>
std::vector<string> FileParser<T>::GetRowHeader() {
    return rheader;
}

template <typename T>
std::vector<string> FileParser<T>::GetColHeader() {
    return cheader;
}

template <typename T>
void FileParser<T>::ConvertToRanks() {

	T* alldata = GetData();
	T* ranks = TiedRank(alldata, Rows()-1, Cols());
	
	//Copy ranks back into the data structure
	for (int i = 0; i < Rows()-1; i++) {
		for (int j = 0; j < Cols(); j++) {		
			data[i+1][j] = ranks[j*(Rows()-1)+i];
		}
	}	
	delete[] alldata;
}

template <typename T>
T* FileParser<T>::GetData() {

	T* alldata = new T[Cols()*Rows()-1];
	unsigned int index = 0;
	//Copy over the data into the new array in column-major order
	for (int i = 0; i < Cols(); i++) {
		
		//Get this col
		std::vector<T> current = GetColumn(i);
		for (int j = 1; j < current.size(); j++) {
			alldata[index++] = current[j];
			
		}
	}
	return alldata;
}

template <typename T>
int FileParser<T>::GetClassNum(int label) {

	unsigned int num_class = 0;
	
	//Count the number of class1 and class2 columns we have
	std::vector<T> row = GetRow(0);
	for (int i = 0; i < row.size(); i++) {
		if (label == (int)row[i])
			num_class++;
	}

	return num_class;
}

template <typename T>
T* FileParser<T>::GetClassColOrder(int label, int num, int *indices) {
	
	int num_class = GetClassNum(label);	
	std::vector<T> row = GetRow(0);
	
	//Allocate space for the class1 and class2 arrays
	T* class1 = new T[num_class*Rows()];
	unsigned int index = 0;

	//Copy over the data into the new array in column-major order
	for (int i = 0; i < Cols(); i++) {
		
		//If this column is one that we want
		if (label != (int)row[i]) continue;
		
		//Get this col
		std::vector<T> current = GetColumn(i);
		for (int j = 0; j < num; j++) {
			class1[index++] = current[indices[j]+1];
			
		}
	}
	return class1;
}

template <typename T>
T* FileParser<T>::GetClassColOrder(int label) {
	
	int num_class = GetClassNum(label);
	std::vector<T> row = GetRow(0);
	
	//Allocate space for the class1 and class2 arrays
	T* class1 = new T[num_class*Rows()];
	unsigned int index = 0;
	
	//Copy over the data into the new array in column-major order
	for (int i = 0; i < Cols(); i++) {
		
		//If this column is one that we want
		if (label != (int)row[i]) continue;
		
		//Get this col
		std::vector<T> current = GetColumn(i);
		for (int j = 1; j < current.size(); j++) {
			class1[index++] = current[j];
			
		}
	}
	return class1;
}

template <typename T>
void FileParser<T>::Tokenize(const string& str, std::vector<string>& tokens, const string& delimiters = " ") {

    string::size_type lastPos = str.find_first_not_of(delimiters, 0);
    string::size_type pos = str.find_first_of(delimiters, lastPos);
    while (string::npos != pos || string::npos != lastPos)
    {
        tokens.push_back(str.substr(lastPos, pos - lastPos));
        lastPos = str.find_first_not_of(delimiters, pos);
        pos = str.find_first_of(delimiters, lastPos);
    }
}

template <typename T>
inline std::string FileParser<T>::trim_right(const std::string &source , const std::string &t) {
	std::string str = source;
	return str.erase( str.find_last_not_of(t) + 1);
}

template <typename T>
inline std::string FileParser<T>::trim_left( const std::string &source, const std::string &t) {
	std::string str = source;
	return str.erase(0 , source.find_first_not_of(t) );
}

template <typename T>
inline std::string FileParser<T>::trim(const std::string &source, const std::string &t) {
	std::string str = source;
	return trim_left( trim_right( str , t) , t );
} 

template <typename T>
void FileParser<T>::Print() {

    //First print the column headers
    printf("\t");
    for (std::vector<string>::iterator it = cheader.begin(); it != cheader.end(); it++) {
        printf("%s\t", (*it).c_str());
    }
    printf("\n");

    int index = 0;
    //for (std::vector< std::vector<T> >::iterator it = data.begin(); it != data.end(); it++) {
    for (int i = 0; i < data.size(); i++) {

        //First print the row header element for this row
        printf("%s\t", rheader[index++].c_str());

        //Now print the data for this row
        //for (std::vector<T>::iterator dit = data[i].begin(); dit != data[i].end(); dit++) {
        for (int j = 0; j < data[i].size(); j++) {
            //printf("%s\t", (*dit).c_str());
            std::cout << data[i][j] << '\t';
        }
        printf("\n");
    } 
}
#endif	/* _FILEPARSER_H */

