#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <iostream>
#include <fstream>
#include "detect.h"
using namespace std;

/* --------------------------------------------------------------------------------------------------------- */

int SKIP_LEN=                       18;
int SUB_STR_LEN =                   14;
int MATCH_MIN_LEN=                  5;
int ERROR_NUM=                      2;
int MERGE_DEVIATION=                3;
int OV_DEVIATION =                  14;

void fuzhi(string name,string val){
        //string names[] = {"SKIP_LEN","SUB_STR_LEN","MATCH_MIN_LEN","ERROR_NUM","MERGE_DEVIATION" };
        if ( name == "SKIP_LEN")
                    SKIP_LEN = atoi(val.c_str());
            else if ( name == "SUB_STR_LEN")
                        SUB_STR_LEN = atoi(val.c_str());
                else if ( name == "MATCH_MIN_LEN")
                            MATCH_MIN_LEN = atoi(val.c_str());
                    else if ( name == "ERROR_NUM")
                                ERROR_NUM = atoi(val.c_str());
                        else if ( name == "MERGE_DEVIATION")
                                    MERGE_DEVIATION = atoi(val.c_str());
                            return;
}

int main(int argc, char* argv[])
{
        if ( argc > 1 ){
                    for ( int i = 1; i < argc; i=i+2 )
                                {
                                                fuzhi(argv[i],argv[i+1]);
                                                        }
                        }
        OV_DEVIATION = SUB_STR_LEN + MATCH_MIN_LEN - ERROR_NUM - ERROR_NUM - 1;
        string strWorkDir = "./";
                Start(strWorkDir);

                    cout << "OK!" << endl;
                        
                        return 0;
}
bool Start(IN string const& strWorkDir)
{
	bool r = false;
	ifstream ifs;
	ofstream ofii, ofsi;
	_record_info ri;
	_ana_info ai;
	int rst;
	vector<_ana_info_node > vctNode;
	
    ifs.open((strWorkDir + INPUT_FILE).c_str());
	if (!ifs.is_open()) C_RET_FALSE;
	ofii.open((strWorkDir + INV_FILE).c_str());
	if (!ofii.is_open()) C_RET_FALSE;
	ofsi.open((strWorkDir + OUTPUT_FILE_I).c_str());
	if (!ofsi.is_open()) C_RET_FALSE;
	
    while(true)
	{
		rst = GetRecord(ifs, ri);
		if (rst == -1) C_GOTO_END;
		if (rst == 0) break;
		else if (rst == 2) continue;

		if (!Step1(ri, ai)) C_GOTO_END;
		if (!Step2(ai)) C_GOTO_END;
		if (!Step3(ai, ai.vctResult)) C_GOTO_END;
	
		Output(ofii, ofsi, ai);
	}

	r = true;

L_End:
	if (ifs.is_open()) ifs.close();
	if (ofsi.is_open()) ofsi.close();
	if (ofii.is_open()) ofii.close();
	return r;
}

// 0: break
// 1: ok
// 2: continue
// -1: error
int GetRecord(ifstream& ifs, OUT _record_info& ri)
{
	string strLine;
	vector<string > vctTemp;

	getline(ifs, strLine);
	if (!ifs)
	{
		if (ifs.eof()) return 0;
		return -1;
	}
	
    strLine = Trim(strLine, " \t");
	if (strLine.empty()) return 2;

	vctTemp.clear();
	Split(strLine, " \t", vctTemp);
	if (vctTemp.size() != 7) return -1;

	transform(vctTemp[4].begin(), vctTemp[4].end(), vctTemp[4].begin(), ::toupper);
	transform(vctTemp[5].begin(), vctTemp[5].end(), vctTemp[5].begin(), ::toupper);

	ri.head = vctTemp[0];
	ri.chr = vctTemp[1];
	ri.start = vctTemp[2];
	ri.end = vctTemp[3];
	ri.read = vctTemp[4];
	ri.ref = vctTemp[5];
	ri.flag = vctTemp[6];

	return 1;
}

bool Step1(_record_info const& ri, OUT _ana_info& ai)
{
	string mainStr, subStr;
	vector<_match_all_info > vctMai;
	ai.head = ri.head;
	ai.chr = ri.chr;
	ai.start = ri.start;
	ai.end = ri.end;
	ai.read = ri.read;
	ai.ref = ri.ref;
	ai.flag = ri.flag;
	ai.vctNode_0.clear();
	ai.vctNode_1.clear();
	
	if (ai.read.size() < SKIP_LEN * 2) C_RET_FALSE;
	subStr = ai.read.substr(SKIP_LEN, ai.read.size() - SKIP_LEN * 2);
	mainStr = ai.ref.substr(SKIP_LEN, ai.ref.size() - SKIP_LEN * 2);

	unsigned int errorNum = ERROR_NUM;
	MatchAll(mainStr, subStr, errorNum, vctMai);
	Add_0(vctMai, ai.read.size(), ai.ref.size(), ai, errorNum, ai.vctNode_0);
	
	Change(subStr);
	MatchAll_1(mainStr, subStr, errorNum, vctMai);
	Add_1(vctMai, ai.read.size(), ai.ref.size(), ai, errorNum, ai.vctNode_1);

	return true;
}

void MatchAll(string const& mainStr, string const& subStr, unsigned int errorNum, OUT vector<_match_all_info >& vctMai)
{
	unsigned int i, j, k;
	unsigned int upperBound;
	_match_one_info moi;
	upperBound = subStr.size() - SUB_STR_LEN + 1;
	vctMai.clear();

	for (k = 0; k < upperBound; ++k)
	{
		Match(mainStr, subStr.substr(k, SUB_STR_LEN), errorNum, moi);

		for (i = 0; i < moi.vctPos.size(); ++i)
		{
			for (j = 0; j < vctMai.size(); ++j)
			{
				if (((vctMai[j].pos + 1) == k) && ((vctMai[j].mainEnd + 1) == moi.vctPos[i]))
				{
					++vctMai[j].mainEnd;
					++vctMai[j].pos;
					break;
				}
			}

			if (j == vctMai.size())
			{
				vctMai.push_back(_match_all_info());
				vctMai.back().pos = k;
				vctMai.back().mainStart = vctMai.back().mainEnd = moi.vctPos[i];
				vctMai.back().subStart = k;
			}
		}
	}
}

void MatchAll_1(string const& mainStr, string const& subStr, unsigned int errorNum, OUT vector<_match_all_info >& vctMai)
{
        unsigned int i, j, k;
        unsigned int upperBound;
        _match_one_info moi;
        upperBound = subStr.size() - SUB_STR_LEN + 1;
        vctMai.clear();

        for (k = 0; k < upperBound; ++k)
        {
                Match_1(mainStr, subStr.substr(k, SUB_STR_LEN), errorNum, moi);

                for (i = 0; i < moi.vctPos.size(); ++i)
                {
                        for (j = 0; j < vctMai.size(); ++j)
                        {
                                if (((vctMai[j].pos + 1) == k) && ((vctMai[j].mainEnd + 1) == moi.vctPos[i]))
                                {
                                        ++vctMai[j].mainEnd;
                                        ++vctMai[j].pos;
                                        break;
                                }
                        }

                        if (j == vctMai.size())
                        {
                                vctMai.push_back(_match_all_info());
                                vctMai.back().pos = k;
                                vctMai.back().mainStart = vctMai.back().mainEnd = moi.vctPos[i];
                                vctMai.back().subStart = k;
                        }
                }
        }
}

void Add_0(IN vector<_match_all_info > const& vctMai, IN unsigned int subLen, IN unsigned int mainLen, IN _ana_info& ai, IN unsigned int errorNum, OUT vector<_ana_info_node >& vctNode)
{
	unsigned int i;
	vctNode.push_back(_ana_info_node());
	vctNode.back().flag = FLAG_FORWARD;
	vctNode.back().mainStart = vctNode.back().subStart = 0;
	vctNode.back().mainEnd = vctNode.back().subEnd = SKIP_LEN - 1;

	for (i = 0; i < vctMai.size(); ++i)
	{
		if ((vctMai[i].mainEnd - vctMai[i].mainStart) >= MATCH_MIN_LEN)
		{
			vctNode.push_back(_ana_info_node());
			vctNode.back().flag = FLAG_FORWARD;
			vctNode.back().mainStart = vctMai[i].mainStart + SKIP_LEN;
			vctNode.back().mainEnd = vctMai[i].mainEnd + SUB_STR_LEN - 1 + SKIP_LEN;
			vctNode.back().subStart = vctMai[i].subStart + SKIP_LEN;
			vctNode.back().subEnd = vctNode.back().subStart + vctNode.back().mainEnd - vctNode.back().mainStart;
			
			for (unsigned int j = 0; j < errorNum; ++j)
			{
				if (ai.read[vctNode.back().subStart] != ai.ref[vctNode.back().mainStart])
				{
					++vctNode.back().subStart;
					++vctNode.back().mainStart; 
				}
				else break;
			}
			for (unsigned int k = 0; k < errorNum; ++k)
			{
 				if (ai.read[vctNode.back().subEnd] != ai.ref[vctNode.back().mainEnd])
                               	{	
                                        --vctNode.back().subEnd;
                                        --vctNode.back().mainEnd;
        		     	}
				else break;
			}
	
		}
	}

	vctNode.push_back(_ana_info_node());
	vctNode.back().flag = FLAG_FORWARD;
	vctNode.back().mainStart = mainLen - SKIP_LEN;
	vctNode.back().mainEnd = mainLen - 1;
	vctNode.back().subStart = subLen - SKIP_LEN;
	vctNode.back().subEnd = subLen - 1;
	sort(vctNode.begin(), vctNode.end(), SortSub);
}

void Add_1(IN vector<_match_all_info > const& vctMai, IN unsigned int subLen, IN unsigned int mainLen, IN  _ana_info& ai, IN unsigned int errorNum, OUT vector<_ana_info_node >& vctNode)
{
	unsigned int i;

	for (i = 0; i < vctMai.size(); ++i)
	{
		if ((vctMai[i].mainEnd - vctMai[i].mainStart) >= MATCH_MIN_LEN)
		{
			vctNode.push_back(_ana_info_node());
			vctNode.back().flag = FLAG_BACKWARD;
			vctNode.back().mainStart = vctMai[i].mainStart + SKIP_LEN;
			vctNode.back().mainEnd = vctMai[i].mainEnd + SUB_STR_LEN - 1 + SKIP_LEN;
			vctNode.back().subStart = vctMai[i].subStart;
			vctNode.back().subEnd = vctNode.back().subStart + vctNode.back().mainEnd - vctNode.back().mainStart;
			
                        string  tempStr = ai.read.substr(SKIP_LEN, ai.read.size() - SKIP_LEN * 2);
                        Change(tempStr);
                        for (unsigned int j = 0; j < errorNum; ++j)
                        {	
                                if (tempStr[vctNode.back().subStart] != ai.ref[vctNode.back().mainStart])
                                {
                                        ++vctNode.back().subStart;
                                        ++vctNode.back().mainStart;
				}
				else break;
			}
			for (unsigned int k = 0; k < errorNum; ++k)
			{	
                                if (tempStr[vctNode.back().subEnd] != ai.ref[vctNode.back().mainEnd])
                                {
					--vctNode.back().subEnd;
                                        --vctNode.back().mainEnd;
                                }
                                else break;
                        }
		
		}
	}
	sort(vctNode.begin(), vctNode.end(), SortSub);
}

void Change(INOUT string& str)
{
	unsigned int i;
	for (i = 0; i < str.size(); ++i)
	{
		switch(str[i])
		{
		case 'A': str[i] = 'T'; break;
		case 'C': str[i] = 'G'; break;
		case 'T': str[i] = 'A'; break;
		case 'G': str[i] = 'C'; break;
		}
	}
	reverse(str.begin(), str.end());
}

void Match(IN string const& mainStr, IN string const& subStr, IN unsigned int errorNum, OUT _match_one_info& moi)
{
        unsigned int pos;
        moi.vctPos.clear();
        pos = 0;
        while(true)
        {
                pos = StrFind(mainStr, subStr, errorNum, pos);
                if (pos == FIND_NPOS) break;
                moi.vctPos.push_back(pos);
                ++pos;
        }
}

void Match_1(IN string const& mainStr, IN string const& subStr, IN unsigned int errorNum, OUT _match_one_info& moi)
{
	unsigned int pos;
	moi.vctPos.clear();
	pos = 0;
	while(true)
	{
		unsigned int at_num = WordFind(subStr, "AT");
		unsigned int gc_num = WordFind(subStr, "GC");
	
		if((at_num == FIND_NPOS) || (gc_num == FIND_NPOS)) break;

		pos = StrFind(mainStr, subStr, errorNum, pos);
		if (pos == FIND_NPOS) break;
		moi.vctPos.push_back(pos);
		++pos;
	}
}

unsigned int StrFind(string const& mainStr, string const& subStr, IN unsigned int errorNum, unsigned int posf)
{
	unsigned int mainLen = mainStr.size();
	unsigned int subLen = subStr.size();
	unsigned int i, j, num, upperBound;

	if ((mainLen - posf) < subLen) return FIND_NPOS;
	upperBound = mainLen - subLen + 1;

	for (i = posf; i < upperBound; ++i)
	{
		num = 0;
		for(j = 0; j < subLen; ++j)
		{
			if(mainStr[i + j] != subStr[j]) ++num;
		}
		if (num <= errorNum) return i;
	}

	return FIND_NPOS;
}

unsigned int WordFind(string const& subStr, string word)
{
        unsigned int posw = 0;
	unsigned int wordnum = 0;
	unsigned int num = subStr.size() / 4;

        while((posw = subStr.find(word, posw)) != FIND_NPOS)
	{
	
		++wordnum;
		if (wordnum >= num) return FIND_NPOS;
		posw++;
	}
        return wordnum;
}

bool Step2(INOUT _ana_info& ai)
{
	unsigned int len;
	Merge(ai.vctNode_0);
	Merge(ai.vctNode_1);
	len = ai.read.size() - 2 * SKIP_LEN;

    for (int i = 0; i < ai.vctNode_1.size(); ++i)
	{
		ai.vctNode_1[i].subEnd = len - 1 - ai.vctNode_1[i].subStart + SKIP_LEN;
		ai.vctNode_1[i].subStart = ai.vctNode_1[i].subEnd - (ai.vctNode_1[i].mainEnd - ai.vctNode_1[i].mainStart);
	}

	int m = ai.vctNode_0.size();
	int n = ai.vctNode_1.size();
	for (int i = 0; i < m; ++i)
	{
		for (int j = 0; j < n; ++j)
		{
		if ((ai.vctNode_0[i].subEnd >= ai.vctNode_1[j].subStart) && (ai.vctNode_1[j].subStart > ai.vctNode_0[i].subStart + OV_DEVIATION) && (ai.vctNode_1[j].subEnd > ai.vctNode_0[i].subEnd + OV_DEVIATION))//substr, 0 ov 1
		{
			if ((ai.vctNode_0[i].mainEnd >= ai.vctNode_1[j].mainStart) && (ai.vctNode_1[j].mainStart > ai.vctNode_0[i].mainStart + OV_DEVIATION) && (ai.vctNode_1[j].mainEnd > ai.vctNode_0[i].mainEnd + OV_DEVIATION))//mainstr, 0 ov 1
			{
				if ((ai.vctNode_1[j].subEnd - ai.vctNode_0[i].subStart) <= (ai.vctNode_1[j].mainEnd - ai.vctNode_0[i].mainStart))//keep substr
				{
						ai.vctNode_0.push_back(_ana_info_node());
						ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;					
						ai.vctNode_0.back().subStart = ai.vctNode_0[i].subStart;
						ai.vctNode_0.back().subEnd = ai.vctNode_1[j].subStart - 1;
						ai.vctNode_0.back().mainStart = ai.vctNode_0[i].mainStart;
						ai.vctNode_0.back().mainEnd = ai.vctNode_0.back().mainStart + ai.vctNode_0.back().subEnd - ai.vctNode_0.back().subStart;
						continue;
				}
				else //keep mainstr
				{
						ai.vctNode_0.push_back(_ana_info_node());
						ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;					
						ai.vctNode_0.back().subStart = ai.vctNode_0[i].subStart;
						ai.vctNode_0.back().mainStart = ai.vctNode_0[i].mainStart;
						ai.vctNode_0.back().mainEnd = ai.vctNode_1[j].mainStart - 1;					
						ai.vctNode_0.back().subEnd = ai.vctNode_0.back().subStart + ai.vctNode_0.back().mainEnd - ai.vctNode_0.back().mainStart;
						continue;
				}
			}

					
			if (ai.vctNode_1[j].mainStart > ai.vctNode_0[i].mainEnd) //mainstr, no overlap, keep substr
			{
					ai.vctNode_0.push_back(_ana_info_node());
					ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;					
					ai.vctNode_0.back().subStart = ai.vctNode_0[i].subStart;
					ai.vctNode_0.back().subEnd = ai.vctNode_1[j].subStart - 1;
					ai.vctNode_0.back().mainStart = ai.vctNode_0[i].mainStart;
					ai.vctNode_0.back().mainEnd = ai.vctNode_0.back().mainStart + ai.vctNode_0.back().subEnd - ai.vctNode_0.back().subStart;
					continue;
			}
		}//substr 0 ov 1 ends
		
		if ((ai.vctNode_1[j].subStart > ai.vctNode_0[i].subEnd) && (ai.vctNode_0[i].mainEnd >= ai.vctNode_1[j].mainStart) && (ai.vctNode_1[j].mainStart > ai.vctNode_0[i].mainStart + OV_DEVIATION) && (ai.vctNode_1[j].mainEnd > ai.vctNode_0[i].mainEnd + OV_DEVIATION))
		        {
                                ai.vctNode_0.push_back(_ana_info_node());
                                ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;
                                ai.vctNode_0.back().subStart = ai.vctNode_0[i].subStart;
                                ai.vctNode_0.back().mainStart = ai.vctNode_0[i].mainStart;
                                ai.vctNode_0.back().mainEnd = ai.vctNode_1[j].mainStart - 1;
                                ai.vctNode_0.back().subEnd = ai.vctNode_0.back().subStart + ai.vctNode_0.back().mainEnd - ai.vctNode_0.back().mainStart;
                                continue;
                        }

	      	if ((ai.vctNode_1[j].subEnd < ai.vctNode_0[i].subStart) && (ai.vctNode_1[j].mainEnd >= ai.vctNode_0[i].mainStart) && (ai.vctNode_0[i].mainStart > ai.vctNode_1[j].mainStart + OV_DEVIATION) && (ai.vctNode_0[i].mainEnd > ai.vctNode_1[j].mainEnd + OV_DEVIATION))     //mainstr, 1 ov 0, keep 1			
			{
				ai.vctNode_0.push_back(_ana_info_node());
				ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;					
				ai.vctNode_0.back().subEnd = ai.vctNode_0[i].subEnd;
				ai.vctNode_0.back().mainStart = ai.vctNode_1[j].mainEnd + 1;
				ai.vctNode_0.back().mainEnd = ai.vctNode_0[i].mainEnd;
				ai.vctNode_0.back().subStart = ai.vctNode_0.back().subEnd - (ai.vctNode_0.back().mainEnd - ai.vctNode_0.back().mainStart);
				continue;
			}
	

		if ((ai.vctNode_1[j].subEnd >= ai.vctNode_0[i].subStart) && (ai.vctNode_0[i].subStart > ai.vctNode_1[j].subStart + OV_DEVIATION) && (ai.vctNode_0[i].subEnd > ai.vctNode_1[j].subEnd + OV_DEVIATION))//substr, 1 ov 0
		{
			if ((ai.vctNode_1[j].mainEnd >= ai.vctNode_0[i].mainStart) && (ai.vctNode_0[i].mainStart > ai.vctNode_1[j].mainStart + OV_DEVIATION) && (ai.vctNode_1[j].mainEnd > ai.vctNode_0[i].mainEnd + OV_DEVIATION))	//mainstr, 1 ov 0
			{
				if ((ai.vctNode_0[i].subEnd - ai.vctNode_1[j].subStart) <= (ai.vctNode_0[i].mainEnd - ai.vctNode_1[j].mainStart))//keep substr
				{
						ai.vctNode_0.push_back(_ana_info_node());
						ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;					
						ai.vctNode_0.back().subStart = ai.vctNode_1[j].subEnd + 1;
						ai.vctNode_0.back().subEnd = ai.vctNode_0[i].subEnd;
						ai.vctNode_0.back().mainEnd = ai.vctNode_0[i].mainEnd;
						ai.vctNode_0.back().mainStart = ai.vctNode_0.back().mainEnd - (ai.vctNode_0.back().subEnd - ai.vctNode_0.back().subStart);
						continue;
				}
				else //keep mainstr
				{
						ai.vctNode_0.push_back(_ana_info_node());
						ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;					
						ai.vctNode_0.back().subEnd = ai.vctNode_0[i].subEnd;
						ai.vctNode_0.back().mainStart = ai.vctNode_1[j].mainEnd + 1;
						ai.vctNode_0.back().mainEnd = ai.vctNode_0[i].mainEnd;
						ai.vctNode_0.back().subStart = ai.vctNode_0.back().subEnd - (ai.vctNode_0.back().mainEnd - ai.vctNode_0.back().mainStart);
						continue;
				}
			}//mainstr, 1 ov 0 ends

			if (ai.vctNode_1[j].mainEnd < ai.vctNode_0[i].mainStart) //mainstr, no overlap, keep substr
			{
					ai.vctNode_0.push_back(_ana_info_node());
					ai.vctNode_0.back().flag = ai.vctNode_0[i].flag;					
					ai.vctNode_0.back().subStart = ai.vctNode_1[j].subEnd + 1;
					ai.vctNode_0.back().subEnd = ai.vctNode_0[i].subEnd;
					ai.vctNode_0.back().mainEnd = ai.vctNode_0[i].mainEnd;
					ai.vctNode_0.back().mainStart = ai.vctNode_0.back().mainEnd - (ai.vctNode_0.back().subEnd - ai.vctNode_0.back().subStart);
					continue;
			}
		}
		
		}
		
	}//0 1 overlap ends

/////////////////////////////////////overlap ends		
	GetErrorNum_0(ai.read, ai.ref, ai.vctNode_0);
	
	GetErrorNum_1(ai.read, ai.ref, ai.vctNode_1);

	return true;
}

void GetErrorNum_0(string const& strRead, string const& strRef, INOUT vector<_ana_info_node >& vctNode)
{
	unsigned int i;
	for (i = 0; i < vctNode.size(); ++i)
	{
		int len = vctNode[i].subEnd - vctNode[i].subStart + 1;
		vctNode[i].errorNum = MisFind(strRead, strRef, vctNode[i].subStart, vctNode[i].mainStart, len);
		vctNode[i].matchNum = vctNode[i].mainEnd - vctNode[i].mainStart + 1 - vctNode[i].errorNum;
	}
}

void GetErrorNum_1(string const& strRead, string const& strRef, INOUT vector<_ana_info_node >& vctNode)
{
	unsigned int i;
	
	for (i = 0; i < vctNode.size(); ++i)
	{
		string strSub = strRead.substr(vctNode[i].subStart, vctNode[i].subEnd - vctNode[i].subStart + 1);
		Change(strSub);
	
		vctNode[i].errorNum = MisFind(strSub, strRef, 0 , vctNode[i].mainStart, vctNode[i].subEnd - vctNode[i].subStart + 1);
	        vctNode[i].matchNum = vctNode[i].mainEnd - vctNode[i].mainStart + 1 - vctNode[i].errorNum;
	}
	
}

unsigned int MisFind(string const& subStr, string const& mainStr, unsigned int subStart, unsigned int mainStart, unsigned int len)
{
	unsigned int i, j, s, errorNum = 0;
	i = subStart;
	j = mainStart;
	errorNum = 0;

    for (s = 0; s< len; ++s, ++i, ++j)
	{
		if (subStr[i] != mainStr[j]) ++errorNum;
	}
	return errorNum;
}

unsigned int MisFind(IN string const& mainStr, IN string const& subStr, OUT unsigned int num)
{
	unsigned int mainLen = mainStr.size();
	unsigned int subLen = subStr.size();
	unsigned int i;

	if (mainLen != subLen) return FIND_NPOS;
	num = 0;
	for(i = 0; i < subLen; ++i)
	{
		if(mainStr[i] != subStr[i]) ++num;
	}

	return num;
}

void Merge(INOUT vector<_ana_info_node >& vctNode)
{
	unsigned int i, j, postIdx;

	if (vctNode.size() <= 1) return;

	postIdx = 0xffffffff;

	j = 0;
	for (i = 1; i < vctNode.size(); ++i)
	{
		if (((vctNode[j].subEnd + MERGE_DEVIATION) >= vctNode[i].subStart) &&
			(vctNode[i].subEnd - vctNode[j].subStart) == (vctNode[i].mainEnd - vctNode[j].mainStart))
		{
			vctNode[j].subEnd = vctNode[i].subEnd;
			vctNode[j].mainEnd = vctNode[i].mainEnd;
		}
	
		else
		{
			++j;
			if (j != i) vctNode[j] = vctNode[i];
		}
	}

	vctNode.erase(vctNode.begin() + j + 1, vctNode.end());

/////////////////////////overlap

	if (vctNode.size() <= 1) return;

	unsigned int k = vctNode.size();	
	for (j = 0; j< k - 1; ++j)
	{
	for (i = j + 1; i < k; ++i)
	{
		if ((vctNode[j].subEnd >= vctNode[i].subStart) && (vctNode[i].subEnd > vctNode[j].subEnd + OV_DEVIATION) && (vctNode[i].subStart > vctNode[j].subStart + OV_DEVIATION))//substr, j ov i
		{
			if ((vctNode[j].mainEnd >= vctNode[i].mainStart) && (vctNode[i].mainStart > vctNode[j].mainStart + OV_DEVIATION) && (vctNode[i].mainEnd > vctNode[j].mainEnd + OV_DEVIATION))	//mainstr, j ov i
			{
				if ((vctNode[i].subEnd - vctNode[j].subStart) <= (vctNode[i].mainEnd - vctNode[j].mainStart))//keep substr
				{
						vctNode.push_back(_ana_info_node());//change i'
						vctNode.back().flag = vctNode[i].flag;
						vctNode.back().subStart = vctNode[j].subEnd + 1;
						vctNode.back().subEnd = vctNode[i].subEnd;
						vctNode.back().mainEnd = vctNode[i].mainEnd;
						vctNode.back().mainStart = vctNode.back().mainEnd - (vctNode.back().subEnd - vctNode.back().subStart);

						vctNode.push_back(_ana_info_node());//change j'
                        vctNode.back().flag = vctNode[j].flag;
                        vctNode.back().subStart = vctNode[j].subStart;
                        vctNode.back().subEnd = vctNode[i].subStart - 1;
                        vctNode.back().mainStart = vctNode[j].mainStart;
                        vctNode.back().mainEnd = vctNode.back().mainStart + (vctNode.back().subEnd - vctNode.back().subStart);
						
                        continue;
				}
				
				else //keep mainstr
				{
						vctNode.push_back(_ana_info_node());
                        vctNode.back().flag = vctNode[i].flag;
                        vctNode.back().mainStart = vctNode[j].mainEnd + 1;
                        vctNode.back().mainEnd = vctNode[i].mainEnd;
                        vctNode.back().subEnd = vctNode[i].subEnd;
                        vctNode.back().subStart = vctNode.back().subEnd - (vctNode.back().mainEnd - vctNode.back().mainStart);
		
						vctNode.push_back(_ana_info_node());
                        vctNode.back().flag = vctNode[j].flag;
                        vctNode.back().mainStart = vctNode[j].mainStart;
                        vctNode.back().mainEnd = vctNode[i].mainStart - 1;
                        vctNode.back().subStart = vctNode[j].subStart;
                        vctNode.back().subEnd = vctNode.back().subStart + (vctNode.back().mainEnd - vctNode.back().mainStart);

                        continue;
				}
			}

			if (vctNode[i].mainStart > vctNode[j].mainEnd) //mainstr, no overlap, keep substr
			{
					vctNode.push_back(_ana_info_node());
                    vctNode.back().flag = vctNode[i].flag;
                    vctNode.back().subStart = vctNode[j].subEnd + 1;
                    vctNode.back().subEnd = vctNode[i].subEnd;
                    vctNode.back().mainEnd = vctNode[i].mainEnd;
                    vctNode.back().mainStart = vctNode.back().mainEnd - (vctNode.back().subEnd - vctNode.back().subStart);

					vctNode.push_back(_ana_info_node());
                    vctNode.back().flag = vctNode[j].flag;
                    vctNode.back().subStart = vctNode[j].subStart;
                    vctNode.back().subEnd = vctNode[i].subStart - 1;
                    vctNode.back().mainStart = vctNode[j].mainStart;
                    vctNode.back().mainEnd = vctNode.back().mainStart + (vctNode.back().subEnd - vctNode.back().subStart);

					continue;
			}

		}//ending substr j ov i

                if (vctNode[i].subStart > vctNode[j].subEnd)//substr, no overlap, j i
		{
                        if ((vctNode[j].mainEnd >= vctNode[i].mainStart) && (vctNode[i].mainStart > vctNode[j].mainStart + OV_DEVIATION) && (vctNode[i].mainEnd > vctNode[j].mainEnd + OV_DEVIATION))       //mainstr, j ov i, keep mainstr
			{
					vctNode.push_back(_ana_info_node());
                    vctNode.back().flag = vctNode[i].flag;
                    vctNode.back().mainStart = vctNode[j].mainEnd + 1;
                    vctNode.back().mainEnd = vctNode[i].mainEnd;
                    vctNode.back().subEnd = vctNode[i].subEnd;
                    vctNode.back().subStart = vctNode.back().subEnd - (vctNode.back().mainEnd - vctNode.back().mainStart);
	
					vctNode.push_back(_ana_info_node());
                    vctNode.back().flag = vctNode[j].flag;
                    vctNode.back().mainEnd = vctNode[i].mainStart - 1;
                    vctNode.back().mainStart = vctNode[j].mainStart;
                    vctNode.back().subStart = vctNode[j].subStart;
                    vctNode.back().subEnd = vctNode.back().subStart + (vctNode.back().mainEnd - vctNode.back().mainStart);

					continue;
			}
		}//ending no overlap j i
	}//ending for i
	}//ending for j

///////////////////////////
}

bool Step3(_ana_info& ai, OUT vector<_ana_info_node >& vctNode)
{
	vector<_ana_info_node > vctTempNode;
	
	vctTempNode.clear();
	vctTempNode.insert(vctTempNode.end(), ai.vctNode_0.begin(), ai.vctNode_0.end());
	vctTempNode.insert(vctTempNode.end(), ai.vctNode_1.begin(), ai.vctNode_1.end());
	sort(vctTempNode.begin(), vctTempNode.end(), SortSub);
    
    GetBestCombination(vctTempNode, ai.ref.size());
	vctNode.clear();
    vctNode = vctTempNode;
    return true;
}

bool SortSub(_ana_info_node const& elem1, _ana_info_node const& elem2)
{
	return (elem1.subStart < elem2.subStart) || ((elem1.subStart == elem2.subStart) && elem1.subEnd < elem2.subEnd);
}

int getScore(_ana_info_node const& node)
{
    if ( node.flag == FLAG_FORWARD )
    return 100 * node.errorNum - 100 * node.matchNum;
    else return 100 * node.errorNum - 100 * node.matchNum + 3 * (node.matchNum+node.errorNum) * (node.matchNum+node.errorNum);

}

int computePay(_ana_info_node const& node1, _ana_info_node const& node2)
{
    int gap = node2.mainStart - node1.mainEnd - 1;
    if ( gap == 0 ) return 0;
    else return 400 + 30 * gap;
}
int computePay(_ana_info_node const& node1, unsigned int mainLen)
{
    int gap = mainLen - node1.mainEnd - 1;
    if ( gap == 0 ) return 0;
    else return 400 + 30 * gap;
}
int computePay(_ana_info_node const& node1)
{
    int gap = node1.mainStart-1;
    if ( gap == 0 ) return 0;
    else return 400 + 30 * gap;
}
void GetBestCombination(vector<_ana_info_node >& vctNode, unsigned int mainLen)
{
 
        int n = vctNode.size();
        int minScore[n];
        int lastNode[n];
        int nodeNum[n];
        for (int i = 0; i < n; i++)
        {
            lastNode[i] = -1;
            nodeNum[i] = 1;
        }
        minScore[0] = computePay(vctNode[0]) + getScore(vctNode[0]);
        
        int min_score = minScore[0] + computePay(vctNode[0],mainLen);
        
        int mark = 0;
                                
        for ( int i = 1; i < n; i++ )
        {
            minScore[i] = computePay(vctNode[i]) + getScore(vctNode[i]);     
            for ( int j = 0; j < i; j++)
            {
                if ( !IsOverlap(vctNode[j],vctNode[i]) )
                {
                    int temp = minScore[j] + computePay(vctNode[j],vctNode[i]) + getScore(vctNode[i]);
                    if ( minScore[i]  > temp )
                        {
                        minScore[i]  = temp;
                        lastNode[i] = j;
                        nodeNum[i] = nodeNum[j]+1;
                        }
                }   
            }
                if ( min_score > minScore[i] + computePay(vctNode[i],mainLen))
                {
                    min_score = minScore[i]+ computePay(vctNode[i],mainLen);
                    
                    mark = i;
                }   
     
        }
                                    
        vector<_ana_info_node > result;
        int location = mark;
        
        while(location > -1)
        {
            result.push_back(vctNode[location]);
            location = lastNode[location];
        }

        vctNode.clear();
            for ( int i = 0; i < nodeNum[mark]; i++ )
            vctNode.push_back(result[nodeNum[mark]-i-1]);

}
bool IsOverlap(_ana_info_node const& node1, _ana_info_node const& node2)
{
    if ((node1.subEnd < node2.subStart) && ((node1.mainStart > node2.mainEnd) || (node1.mainEnd < node2.mainStart))) return false;

	return true;
}

///////////////

void Output(ofstream& ofii, ofstream& ofsi, _ana_info& ai)
{
	if (IsOutput_i(ai)) 
	{
		OutputAnaInfo(ofsi, ai);
		OutputInvInfo(ofii, ai, ai.vctResult);
	}
}

void OutputInvInfo(ofstream& ofs, _ana_info& ai, vector<_ana_info_node >& vctNode)
{
        unsigned int i;
        for (i = 0; i < vctNode.size(); ++i)
	{
	if(vctNode[i].flag == FLAG_BACKWARD)
		{
		ofs << ai.head << "	";
		ofs << ai.chr << "	";
		ofs << atoi(ai.start.c_str()) + vctNode[i].mainStart << "	";
		ofs << atoi(ai.start.c_str()) + vctNode[i].mainEnd << "       ";
		ofs << vctNode[i].mainEnd - vctNode[i].mainStart + 1 << "	";
		ofs << ai.read.substr(vctNode[i].subStart, vctNode[i].subEnd - vctNode[i].subStart + 1) << endl;
		}
	}
}


void OutputAnaInfo(ofstream& ofs, _ana_info& ai)
{
          
    if (ai.chr == "chr1") ai.chrSize = 249250621 ; 
    if (ai.chr == "chr2") ai.chrSize = 243199373 ; 
	if (ai.chr == "chr3") ai.chrSize = 198022430 ; 	
	if (ai.chr == "chr4") ai.chrSize = 191154276 ; 
	if (ai.chr == "chr5") ai.chrSize = 180915260; 
	if (ai.chr == "chr6") ai.chrSize = 171115067; 
	if (ai.chr == "chr7") ai.chrSize = 159138663; 
	if (ai.chr == "chr8") ai.chrSize = 146364022; 
	if (ai.chr == "chr9") ai.chrSize = 141213431; 
	if (ai.chr == "chr10") ai.chrSize = 135534747;
	if (ai.chr == "chr11") ai.chrSize = 135006516;
	if (ai.chr == "chr12") ai.chrSize = 133851895;
	if (ai.chr == "chr13") ai.chrSize = 115169878;
	if (ai.chr == "chr14") ai.chrSize = 107349540;
	if (ai.chr == "chr15") ai.chrSize = 102531392;
	if (ai.chr == "chr16") ai.chrSize = 90354753; 
	if (ai.chr == "chr17") ai.chrSize = 81195210; 
	if (ai.chr == "chr18") ai.chrSize = 78077248; 
	if (ai.chr == "chr19") ai.chrSize = 59128983; 
	if (ai.chr == "chr20") ai.chrSize = 63025520; 		
	if (ai.chr == "chr21") ai.chrSize = 48129895; 	
	if (ai.chr == "chr22") ai.chrSize = 51304566; 	
	if (ai.chr == "chrX") ai.chrSize = 155270560; 
	if (ai.chr == "chrY") ai.chrSize = 59373566; 
	if (ai.chr == "chr11_gl000202_random") ai.chrSize = 40103;
	if (ai.chr == "chr17_ctg5_hap1") ai.chrSize = 1680828;
    if (ai.chr == "chr17_gl000203_random") ai.chrSize = 37498;
    if (ai.chr == "chr17_gl000204_random") ai.chrSize = 81310;
    if (ai.chr == "chr17_gl000205_random") ai.chrSize = 174588;
    if (ai.chr == "chr17_gl000206_random") ai.chrSize = 41001;
    if (ai.chr == "chr18_gl000207_random") ai.chrSize = 4262;
    if (ai.chr == "chr19_gl000208_random") ai.chrSize = 92689;
    if (ai.chr == "chr19_gl000209_random") ai.chrSize = 159169;
    if (ai.chr == "chr1_gl000191_random") ai.chrSize = 106433;
    if (ai.chr == "chr1_gl000192_random") ai.chrSize = 547496;
    if (ai.chr == "chr21_gl000210_random") ai.chrSize = 27682;
    if (ai.chr == "chr4_ctg9_hap1") ai.chrSize = 590426;
    if (ai.chr == "chr4_gl000193_random") ai.chrSize = 189789;
    if (ai.chr == "chr4_gl000194_random") ai.chrSize = 191469;
    if (ai.chr == "chr6_apd_hap1") ai.chrSize = 4622290;
    if (ai.chr == "chr6_cox_hap2") ai.chrSize = 4795371;
    if (ai.chr == "chr6_dbb_hap3") ai.chrSize = 4610396;
    if (ai.chr == "chr6_mann_hap4") ai.chrSize = 4683263;
    if (ai.chr == "chr6_mcf_hap5") ai.chrSize = 4833398;
    if (ai.chr == "chr6_qbl_hap6") ai.chrSize = 4611984;
    if (ai.chr == "chr6_ssto_hap7") ai.chrSize = 4928567;
    if (ai.chr == "chr7_gl000195_random") ai.chrSize = 182896;
    if (ai.chr == "chr8_gl000196_random") ai.chrSize = 38914;
    if (ai.chr == "chr8_gl000197_random") ai.chrSize = 37175;
    if (ai.chr == "chr9_gl000198_random") ai.chrSize = 90085;
    if (ai.chr == "chr9_gl000199_random") ai.chrSize = 169874;
    if (ai.chr == "chr9_gl000200_random") ai.chrSize = 187035;
    if (ai.chr == "chr9_gl000201_random") ai.chrSize = 36148;
    if (ai.chr == "chrM") ai.chrSize = 16571;
    if (ai.chr == "chrUn_gl000211") ai.chrSize = 166566;
    if (ai.chr == "chrUn_gl000212") ai.chrSize = 186858;
    if (ai.chr == "chrUn_gl000213") ai.chrSize = 164239;
    if (ai.chr == "chrUn_gl000214") ai.chrSize = 137718;
    if (ai.chr == "chrUn_gl000215") ai.chrSize = 172545;
    if (ai.chr == "chrUn_gl000216") ai.chrSize = 172294;
    if (ai.chr == "chrUn_gl000217") ai.chrSize = 172149;
    if (ai.chr == "chrUn_gl000218") ai.chrSize = 161147;
    if (ai.chr == "chrUn_gl000219") ai.chrSize = 179198;
    if (ai.chr == "chrUn_gl000220") ai.chrSize = 161802;
    if (ai.chr == "chrUn_gl000221") ai.chrSize = 155397;
	if (ai.chr == "chrUn_gl000222") ai.chrSize = 186861;
    if (ai.chr == "chrUn_gl000223") ai.chrSize = 180455;
    if (ai.chr == "chrUn_gl000224") ai.chrSize = 179693;
    if (ai.chr == "chrUn_gl000225") ai.chrSize = 211173;
        if (ai.chr == "chrUn_gl000226") ai.chrSize = 15008;
        if (ai.chr == "chrUn_gl000227") ai.chrSize = 128374;
        if (ai.chr == "chrUn_gl000228") ai.chrSize = 129120;
        if (ai.chr == "chrUn_gl000229") ai.chrSize = 19913;
        if (ai.chr == "chrUn_gl000230") ai.chrSize = 43691;
        if (ai.chr == "chrUn_gl000231") ai.chrSize = 27386;
        if (ai.chr == "chrUn_gl000232") ai.chrSize = 40652;
        if (ai.chr == "chrUn_gl000233") ai.chrSize = 45941;
        if (ai.chr == "chrUn_gl000234") ai.chrSize = 40531;
        if (ai.chr == "chrUn_gl000235") ai.chrSize = 34474;
        if (ai.chr == "chrUn_gl000236") ai.chrSize = 41934;
        if (ai.chr == "chrUn_gl000237") ai.chrSize = 45867;
        if (ai.chr == "chrUn_gl000238") ai.chrSize = 39939;
        if (ai.chr == "chrUn_gl000239") ai.chrSize = 33824;
        if (ai.chr == "chrUn_gl000240") ai.chrSize = 41933;
        if (ai.chr == "chrUn_gl000241") ai.chrSize = 42152;
        if (ai.chr == "chrUn_gl000242") ai.chrSize = 43523;
        if (ai.chr == "chrUn_gl000243") ai.chrSize = 43341;
        if (ai.chr == "chrUn_gl000244") ai.chrSize = 39929;
        if (ai.chr == "chrUn_gl000245") ai.chrSize = 36651;
        if (ai.chr == "chrUn_gl000246") ai.chrSize = 38154;
        if (ai.chr == "chrUn_gl000247") ai.chrSize = 36422;
        if (ai.chr == "chrUn_gl000248") ai.chrSize = 39786;
        if (ai.chr == "chrUn_gl000249") ai.chrSize = 38502;
        ofs << ai.head << endl;

	OutputVctNode(ofs, ai, ai.vctResult);
	ofs << endl;
}

void OutputVctNode(ofstream& ofs,  _ana_info const& ai, vector<_ana_info_node > const& vctNode)
{

    unsigned int sum, invsum, t, q, tempEnd;
	string alignedStr, invStr;
    sum = 0;
	invsum = 0;
	t = 0;
	q = 0;
	int alignedStart = vctNode[0].subStart;
	int invStart = ai.read.size();
	for (int i = 0; i < vctNode.size(); ++i)
	{
		if (vctNode[i].flag == FLAG_FORWARD)
		{
			if (vctNode[i].mainStart == t)
			{
				alignedStr += ai.read.substr(vctNode[i].subStart, vctNode[i].subEnd - vctNode[i].subStart + 1);
				t = vctNode[i].mainEnd + 1; 		
				sum += vctNode[i].subEnd - vctNode[i].subStart + 1;
				continue;
			}
			else
			{
				int tempT = vctNode[i].mainStart - t;				
				for (int j = 0; j < tempT; ++j) 
				{
					alignedStr += "-";
					t++;
				}				
                        	
                                alignedStr += ai.read.substr(vctNode[i].subStart, vctNode[i].subEnd - vctNode[i].subStart + 1);
                                t = vctNode[i].mainEnd + 1;
                                sum += vctNode[i].subEnd - vctNode[i].subStart + 1;
                                continue;
			}

		}
		else
		{
			if (vctNode[i].subStart < invStart) invStart = vctNode[i].subStart;
			if (vctNode[i].mainStart == q)
			{
				string tempStr = ai.read.substr(vctNode[i].subStart, vctNode[i].subEnd - vctNode[i].subStart + 1);	
				Change(tempStr);
				invStr += tempStr;
				q = vctNode[i].mainEnd + 1;
				invsum += vctNode[i].subEnd - vctNode[i].subStart + 1;
				continue;
			}		
			else
			{
				int tempQ = vctNode[i].mainStart - q;
				for (int k = 0; k < tempQ; ++k)
                                {
                                        invStr += "-";
                                        q++;
                                }
				string tempStr = ai.read.substr(vctNode[i].subStart, vctNode[i].subEnd - vctNode[i].subStart + 1);
				Change(tempStr);
				invStr += tempStr;
                                q = vctNode[i].mainEnd + 1;
                                invsum += vctNode[i].subEnd - vctNode[i].subStart + 1;
				tempEnd = vctNode[i].mainEnd;
				continue;
			}

		}
		
	}

    for (int i = 0; i < ai.ref.size() - tempEnd - 1; ++i) invStr += "-";
	
                string refChr = "hg19." + ai.chr;
                ofs << "s" << setw(30) << refChr << setw(12) << ai.start << setw(6) << ai.ref.size() << setw(4)
                        << ai.flag << setw(12) << ai.chrSize << setw(123) << ai.ref << endl;
                ofs << "s" << setw(30) << ai.head << setw(12) << alignedStart << setw(6) << sum << setw(4)
                        << "+" << setw(12) << ai.read.size() << setw(123) << alignedStr << endl;
                ofs << "s" << setw(30) << ai.head << setw(12) << invStart << setw(6) << invsum <<  setw(4)
                        << "-" << setw(12) << ai.read.size() << setw(123) << invStr << endl;
}

bool IsOutput_i(_ana_info& ai)
{
	for (unsigned int i = 0; i < ai.vctResult.size(); i++)
	{	
		if (ai.vctResult[i].flag == FLAG_BACKWARD) 
		{
		ai.vctNode_0.clear();
		ai.vctNode_1.clear();
		ai.vctResult.clear();
		string subStr, mainStr;
		vector<_match_all_info > vctMai;

		subStr = ai.read.substr(SKIP_LEN, ai.read.size() - SKIP_LEN * 2);
		mainStr = ai.ref.substr(SKIP_LEN, ai.ref.size() - SKIP_LEN * 2);

		unsigned int errorNum = ERROR_NUM + 1;
		MatchAll(mainStr, subStr, errorNum, vctMai);
		Add_0(vctMai, ai.read.size(), ai.ref.size(), ai, errorNum, ai.vctNode_0);
	
		Change(subStr);
		MatchAll_1(mainStr, subStr, errorNum, vctMai);
		Add_1(vctMai, ai.read.size(), ai.ref.size(), ai, errorNum, ai.vctNode_1);
		
        Step2(ai);
		
        Step3(ai, ai.vctResult);

		}
	}

	for (unsigned int i = 0; i < ai.vctResult.size(); i++)
	{
		if (ai.vctResult[i].flag == FLAG_BACKWARD) 
	
		{	
		        unsigned int i, sum, size;
       			sum = 0;
        		if (ai.read.size() <= ai.ref.size())
        		{
                		size = ai.read.size();
                		for (i = 0; i < ai.vctResult.size(); ++i) sum += ai.vctResult[i].subEnd - ai.vctResult[i].subStart + 1;
        		}
        		else
        		{
                		size = ai.ref.size();
                		for (i = 0; i < ai.vctResult.size(); ++i) sum += ai.vctResult[i].mainEnd - ai.vctResult[i].mainStart + 1;
        		}
	  		if (sum + 15 >= size)
			return true;
		}

	}

	return false;
}

///////////////

void Split(IN string const& strSrc, IN string const& strDelimiters, OUT vector<string >& vctDst)
{
	string::size_type pos1, pos2;
	pos2 = 0;
	while(true)
	{
		pos1 = strSrc.find_first_not_of(strDelimiters, pos2);
		if (pos1 == string::npos) break;
		pos2 = strSrc.find_first_of(strDelimiters, pos1 + 1);
		if (pos2 == string::npos)
		{
			vctDst.push_back(strSrc.substr(pos1));
			break;
		}
		vctDst.push_back(strSrc.substr(pos1, pos2 - pos1));
		++pos2;
	}
}

string Trim(string const& str, string const& strCharacters)
{
    unsigned int p1,p2;
	p1 = str.find_first_not_of(strCharacters);
	if (str.find_first_not_of(strCharacters) == string::npos) return "";
    p2 = str.find_last_not_of(strCharacters);
	return str.substr(p1, p2 - p1 + 1);
}

