#include <math.h>
#include <sstream>
#include <fstream>
#include "TypeDef.h"
#include "SequenceTransform.h"
#include "OftenUsedOperatLib.h"
#include "Metagenome.h"
#include "Bin.h"

M_D Metagenome_T::getMatrix(Str &MatrixFilename)
{
	std::ifstream in( MatrixFilename.data() );
	if( !in )
	{ cout << "can't open file " << MatrixFilename << endl; exit(0); }

	int lin = 0;
	int col = 0;
	std::string line;

	in >> lin >> col;
	getline( in, line );

	Matrix_T< double > ma( lin, col, 0.0 );
	for( int j=0;  j<col; ++j )
	{
		for( int i=0; i<lin; ++i )
			in >> ma( i, j );
		getline( in, line );
	}
	in.close();
	return ma;
}

void Metagenome_T::displayMatrix( const Matrix_T< double >& ma, std::ostream& out )
{
	out << std::setw(10) << ma.getLine() << std::setw(10) << ma.getColum() << endl;
	for( int i=0; i<ma.getColum(); ++i )
	{
		for( int j=0; j<ma.getLine(); ++j )
			out << ma(j,i) << "\t";
		out << endl;
	}
}

void TIS_T::x2LongestORF( const char* seq, Pa_I_I& location )
{
	int STPPos = location.second;
	int ATGPos = -1;

	int tmp = ((seq[STPPos] - 48)<<4) + ((seq[STPPos+1] - 48)<<2) + seq[STPPos + 2] - 48;

	for( STPPos -= 3; ; STPPos -= 3 )
	{
		if( STPPos <3 )
		{
			location.first = STPPos;
			return;
		}
		int subStr = ((seq[STPPos] - 48)<<4) + ((seq[STPPos+1] - 48)<<2) + seq[STPPos + 2] - 48;
		//			TAA-300			TGA-320			TAG-302
		if( STOPS.find(subStr) != STOPS.end() ){
			location.first = ATGPos;
			return;
		}
		//			ATG-032			GTG-232          TTG-332        CTG-132
		if( STARTS.find(subStr) != STARTS.end() )
			ATGPos = STPPos;
	}
}

void Metagenome_T::getSeqs(Str& seqsFilename)
{
	std::ifstream seqsFile( seqsFilename.data() );
	if( !seqsFile.good() ){
		std::cout<<"file "<<seqsFilename<<" not found!"<<std::endl;
		exit(1);
	}

	Str line;
	Str seq,negativeseq;
	Str id;

	std::getline( seqsFile, line );
	while(!seqsFile.eof()){
		
		id = line.substr(1);
		getline( seqsFile,line );
		while ( line.find(">") == std::string::npos )
		{
			if ( seqsFile.eof())
				break;
			seq+=SequenceTransform_T::char2DigitalSeq(line);
			getline( seqsFile, line);
		}

		negativeseq = seq;
		std::reverse( negativeseq.begin(), negativeseq.end() );	//зת
		std::for_each( negativeseq.begin(), negativeseq.end(),	//תΪ
			SequenceTransform_T::ToOppRule_T() );

		IdSeqMap.insert( make_pair(id, make_pair(seq,negativeseq)) );

		seq.erase( seq.begin(), seq.end() );
		negativeseq.erase( negativeseq.begin(), negativeseq.end() );
	}	
	seqsFile.close();
}

void Metagenome_T::outSeqs( Str& seqsFilename )
{
	std::ofstream seqsFile( seqsFilename.data() );
	std::map< Str, std::pair<Str,Str> >::iterator iter = IdSeqMap.begin();
	for( ; iter!=IdSeqMap.end() ; iter++)
	{
		seqsFile << ">" << iter->first << endl;

		Str seq = SequenceTransform_T::digital2CharSeq(iter->second.first);
		int k;
		for( k = 0; k < (int)seq.size()/70; k++ )
			seqsFile << seq.substr(70*k,70) << endl;
		if ( seq.substr(70*k).size() != 0 )    //пһС
			seqsFile << seq.substr(70*k) << endl;
	}
	seqsFile.close();
}

void Metagenome_T::getLocations(Str &LocationsFilename)
{
	std::ifstream LocationsFile( LocationsFilename.data() );
	if( !LocationsFile.good() ){
		std::cout<<"file "<<LocationsFilename<<" not found!"<<std::endl;
		exit(1);
	}

	Str line;
	Str id;
	Str pos1,pos2,tmp;
	bool isPositive;

	std::vector<Location_T> subLocations;

	std::getline( LocationsFile, line );
	
	
	while(!LocationsFile.eof()){
		id = line.substr(1);
		getline( LocationsFile, line );
		while( line.find(">") == std::string::npos )
		{

			std::istringstream in ( line );
			in >> pos1 >> pos2 >> tmp;

			isPositive = (tmp=="+");

			Pa_I_I location = make_pair(atoi(pos1.data()),atoi(pos2.data()));
			Location_T Location(location,isPositive);

			subLocations.push_back(Location);

			std::getline( LocationsFile, line );
			
			if(LocationsFile.eof())
				break;
		}

		if (subLocations.size()!=0)
		{
			IdLocationMap.insert( make_pair(id,subLocations) );
			subLocations.erase( subLocations.begin(), subLocations.end() );
		}
	}
	
	LocationsFile.close();
}

void Metagenome_T::outLocations( Str& LocationsFilename )
{
	std::ofstream LocationsFile( LocationsFilename.data() );
	std::map< Str, std::vector<Location_T> >::iterator iter = IdLocationMap.begin();
	for( ; iter!=IdLocationMap.end(); iter++ )
	{
		LocationsFile << ">" << iter->first << endl ;
		std::vector<Location_T> subLocations = iter->second;
		for( int j = 0; j < (int)subLocations.size(); j++ )
		{
			LocationsFile << subLocations[j].location.first << "\t" << subLocations[j].location.second << "\t";
			if ( subLocations[j].isPositive )
				LocationsFile << "+";
			else
				LocationsFile << "-";
			LocationsFile << endl;
		}
	}
	LocationsFile.close();
}

void Metagenome_T::getGenusId_from_beginning(Str &seqsFilename, Str &bin_model_file, Str &taxon_map_file)
{
	std::vector<string> genome_list;
	std::vector<std::vector<float> > table;
	int start = (int)time(NULL);
	get_Freqs_table(bin_model_file, genome_list, table);
	cout << time(NULL)-start << " seconds costs" << endl;

	std::vector<string> genomes;
	std::vector<string> groups;
	get_group_map(taxon_map_file, genomes, groups);
	Metagenome_T::GenusIdMap = bin_fasta(table, IdSeqMap, genomes, groups);
}

void Metagenome_T::outGenusId(Str &GenusIdFilename)
{
	std::ofstream GenusIdFile( GenusIdFilename.data() );
	std::multimap< Str, Str >::iterator iter =GenusIdMap.begin();
	for( ; iter!=GenusIdMap.end(); iter++ )
		GenusIdFile << ">" << iter->second << "\t" << iter->first << endl ;

	GenusIdFile.close();
}

void Metagenome_T::getGenusORFSetMap()
{
	std::multimap< Str, Str >::iterator iter1 = GenusIdMap.begin();
	std::map< Str, std::pair<Str,Str> >::iterator iter2 = IdSeqMap.begin();
	std::map< Str, std::vector<Location_T> >::iterator iter3 = IdLocationMap.begin();

	for ( ; iter1!=GenusIdMap.end(); iter1++ )
	{
		Str id = iter1->second;
		if ( IdSeqMap.find( id )!= IdSeqMap.end() ) 
			iter2 = IdSeqMap.find( id ) ;
		else 
			continue;
		if ( IdLocationMap.find(id) != IdLocationMap.end() )
			iter3 = IdLocationMap.find(id);
		else
			continue;

		for ( int j=0; j<(int)iter3->second.size(); j++ )
		{
			ORF_T ORF;
			ORF.id = iter1->second;
			ORF.isPositive = (iter3->second)[j].isPositive;
			ORF.location = (iter3->second)[j].location;
			ORF.fileForm2Location((int)iter2->second.first.size()); //ＴlocationתΪҪõʽ

			ORF.start = (iter3->second)[j].start;
			ORF.trueTISPos = (iter3->second)[j].trueTISPos;

			if (ORF.isPositive)
				ORF.codingSeq = &iter2->second.first;
			else
				ORF.codingSeq = &iter2->second.second;

			ORF.ORFLength =  ORF.location.second - ORF.location.first ; //ʱlocation.secondָֹӵĵһλ

			ORF.seqLen = (int)iter2->second.first.size();

			GenusORFSetMap[iter1->first].push_back(ORF);
		}
		
	}
}

void TIS_T::initiateSettings()
{
	std::ifstream in("Settings.txt");
	if( !in.good() ){
		cout<<"Settings.txt is missing in this folder."<<endl;
		exit(0);
	}
	while( !in.eof() ){
		Str line;
		std::getline(in,line);
		if( !line.empty() && (line.find("#") == std::string::npos) ){
			int pos = (int)line.find("=");
			Str key = line.substr(0,pos), value = line.substr(pos+1);
			if( key == "MAXORDER" ){
				MAXORDER = atoi( value.data() );
				//cout<<"  MAX ORDER OF MARKOV MODEL: "<<MAXORDER<<"\n";
			}
			if( key == "UPSTREAM" ){
				UPBP = atoi( value.data() );
				//cout<<"  UPSTREAM WINDOW LENGTH: "<<UPBP<<endl;
			}
			if( key == "DOWNSTREAM" ){
				DOWNBP = atoi( value.data() );
				//cout<<"  DOWNSTREAM WINDOW LENGTH: "<<DOWNBP<<endl;
			}
			if( key == "HaveHeadNumCutoff" ){
				//cout<<"  INPUT FORMAT: "<<value<<endl;
				HaveHeadNumCutoff = atoi(value.data());
			}			
		}
	}
}

void TIS_T::getprioriParameters( Str& genusName )
{
	Str path = prioriparameters + "\\" + genusName + "\\";

	Str prioriProbFilename = path+genusName+".prioriProb";
	Str trueMaFilename = path+genusName+".trueMa";
	Str ufMaFilename = path+genusName+".ufMa";
	Str dfMaFilename = path+genusName+".dfMa";

	std::ifstream prioriProbFile ( prioriProbFilename.data() );
	Str tmp;
	prioriProbFile >> pT >> pFU >> pFD >> tmp;
	prioriProbFile.close();

	trueTIS = getMatrix(trueMaFilename);
	upFalse = getMatrix(ufMaFilename);
	downFalse = getMatrix(dfMaFilename);
}

void TIS_T::initiatePWMs( std::vector<ORF_T>& ORFSet )
{
	//ORDER = 0; // ʱֻ0Ʒ
	int sz = (int)pow(4.0,ORDER+1);
	upFalse = M_D(sz, UPBP+3+DOWNBP,1); //1ʼ ֹlog0
	downFalse = M_D(sz, UPBP+3+DOWNBP,1);
	trueTIS = M_D( sz, UPBP+3+DOWNBP,1);

	Set_Str upSet,downSet,trueSet; //ORFȥʱʹ

	Ve_D bkg(4,0);
	int C = 0;
	int upCount=0, downCount=0, trueCount=0;

	std::vector< ORF_T >::iterator iter = ORFSet.begin();
	for( ; iter != ORFSet.end(); ++iter )
	{
		const char* seq = iter->codingSeq->data();
		int seqLen = iter->seqLen;
		
		//location.firstСORFûͷORFʵTISڸƬеĸʺС
		//ôORFѧϰPWMӰ
		if ( iter->leftmost < 3 )
			continue;
		
		int initTIS=-1; //¼ʼTISλ location.firstߣҵһͬλTIS

		//location.firstߣҵһͬλTIS ΪʼTISλ
		int hint = iter->location.first ;

		for( ; hint < iter->location.second; hint += 3 )
		{
			int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
			if(STARTS.find(subStr) != STARTS.end())
			{
				initTIS = hint;
				break;
			}
		}

		//initTISΪ-1 δҵͬλXTG 
		if ( initTIS == -1 )
		{
			iter->haveTIS = -1;
			continue;
		}

		iter->location.first = initTIS; //location.firstΪinitTIS 
		
		//޹
		if ( filter_on == false)
		{
			//up false TIS, regardless of frame
			hint = 0 > (initTIS - 200) ? 0 : (initTIS - 200);
			for( ; hint < initTIS - 3; ++hint )
			{
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if( STARTS.find(subStr) != STARTS.end())
				{
					int beg = hint - UPBP;
					if( beg >= 0 && beg + UPBP + 3 + DOWNBP + 1 + ORDER < seqLen  )
					{
						upCount++;
						int i = 0;
						for( ; i < upFalse.getColum(); ++i ){
							int index = 0;
							for( int f = 0; f < ORDER + 1; ++f ){
								index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
							}
							++upFalse(index,i);							
							++bkg[seq[beg+i]-'0'];
							++C;
						}
					}
				}
			}
			//down false TIS
			hint = initTIS + 3;
			for( ; hint < iter->location.second; hint += 3 ){
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if(STARTS.find(subStr) != STARTS.end()){//)
					int beg = hint - UPBP;
					if( beg >= 0 && beg + UPBP + 3 + DOWNBP + 1 + ORDER < seqLen  )
					{
						downCount++;
						int i = 0;
						for( ; i < upFalse.getColum(); ++i ){
							int index = 0;
							for( int f = 0; f < ORDER + 1; ++f ){
								index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
							}
							++downFalse(index,i);							
						}
					}
				}
			}
			//true TIS
			int beg = initTIS - UPBP;
			if( beg >= 0 && beg + UPBP + 3 + DOWNBP + 1 + ORDER < seqLen  )
			{
				trueCount++;
				int i = 0;
				for( ; i < trueTIS.getColum(); ++i ){
					int index = 0;
					for( int f = 0; f < ORDER + 1; ++f ){
						index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
					}
					++trueTIS(index,i);
				}		
			}
		}
		//й
		else
		{
			Str nter;
			//up false TIS, regardless of frame
			hint = (0 > (initTIS - 200) ? 0 : (initTIS - 200));
			for( ; hint < initTIS - 3; ++hint )
			{
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if( subStr == 14 || subStr == 46 || subStr == 62 || subStr == 30)
				{
					int beg = hint - UPBP;
					if( beg >= 0 && beg + UPBP + 3 + DOWNBP + 1  + ORDER < seqLen  )
					{
						upCount++;
						nter = Str(seq+beg,UPBP + 3 + DOWNBP) ;
						if( upSet.find(nter) == upSet.end() )
						{
							int i = 0;
							for( ; i < upFalse.getColum(); ++i ){
								int index = 0;
								for( int f = 0; f < ORDER + 1; ++f ){
									index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
								}
								++upFalse(index,i);							
								++bkg[seq[beg+i]-'0'];
								++C;
							}
							upSet.insert(nter);
						}
						
					}
				}
			}
			//down false TIS
			hint = initTIS + 3;
			for( ; hint < iter->location.second; hint += 3 ){
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if(STARTS.find(subStr) != STARTS.end()){
					int beg = hint - UPBP;
					if( beg >= 0 && beg + UPBP + 3 + DOWNBP + 1  + ORDER < seqLen  )
					{
						downCount++;
						nter = Str(seq+beg,UPBP + 3 + DOWNBP) ;
						if ( downSet.find(nter)==downSet.end() )
						{
							int i = 0;
							for( ; i < downFalse.getColum(); ++i ){
								int index = 0;
								for( int f = 0; f < ORDER + 1; ++f ){
									index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
								}
								++downFalse(index,i);							
							}
							downSet.insert(nter);
						}
					}
				}
			}
			//true TIS
			int beg = initTIS - UPBP;
			if( beg >= 0 && beg + UPBP + 3 + DOWNBP + 1 + ORDER < seqLen  )
			{
				trueCount++;
				nter = Str(seq+beg,UPBP + 3 + DOWNBP) ;
				if( trueSet.find(nter)==trueSet.end() )
				{
					int i = 0;
					for( ; i < trueTIS.getColum(); ++i ){
						int index = 0;
						for( int f = 0; f < ORDER + 1; ++f ){
							index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
						}
						++trueTIS(index,i);
					}
					trueSet.insert(nter);
				}
			}
		
		}
	}

	//Normalize PWM
	trueTIS.toBeAveraged();
	upFalse.toBeAveraged();
	downFalse.toBeAveraged();
	trueTIS.toBeLoged();
	upFalse.toBeLoged();
	downFalse.toBeLoged();

	/*displayMatrix(upFalse,cout);
	displayMatrix(downFalse,cout);
	displayMatrix(trueTIS,cout);*/

	int i = 0;
	for( ; i < (int)bkg.size(); ++i )
		bkg[i] /= C;
	
	qi = Ve_D();
	double P = 0;
	double ufN = 0;
	double a = bkg[0], c = bkg[1], g = bkg[2], t = bkg[3];
	for( i = 0; ; ++i ){
		double p = (a*t*g+g*t*g+t*t*g)/(a*t*g+g*t*g+t*t*g+t*g*a+t*a*a+t*a*g);
		double q = (t*g*a+t*a*a+t*a*g)/(a*t*g+g*t*g+t*t*g+t*g*a+t*a*a+t*a*g);
		P += q*pow(p,i);
		qi.push_back(q*pow(p,i));
		if( P > 0.9999 ){
			maxCandidateN = i+1;
			break;
		}
		else
			ufN += i * q*pow(p,i);
	}

	pT = 1;
	pFU = ufN;
	pFD = maxCandidateN - 1 - pFU;
}

void TIS_T::reviseTIS_HaveHead_Enough( std::vector<ORF_T>& ORFSet, int HaveHeadNum )
{

	for( int num=0; num < maximalInterationNum ; num++)
	{
		initiatePWMs(ORFSet);
		std::vector< ORF_T >::iterator iter = ORFSet.begin();
		for ( ; iter != ORFSet.end(); iter++)
		{
			const char* seq = iter->codingSeq->data();
			int seqLen = iter->seqLen;
			//location.firstСORFûͷORFʵTISڸƬеĸʺС
			//ôORF֣ılocation 뵽PWMѧϰ
			if ( iter->leftmost < 3 )
				continue;

			iter->origLocation = iter->location;

			if( iter->haveTIS == -1 )
				continue;
			
			double maxScore = -1;

			iter->location.first = iter->leftmost;//ÿζҪORFԤ
			int hint = iter->location.first ;
			int indexc = 0;
		
			for( ; hint < iter->location.second -30 && indexc < maxCandidateN; hint += 3 )
			{
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if( STARTS.find(subStr) != STARTS.end() && hint-UPBP >= 0 && hint+3+DOWNBP+ORDER <= seqLen )
				{
					++indexc;
					int beg = hint - UPBP;
					double t = 0, d = 0, u = 0;
					int index = 0;
					int f = 0;
					for( ; f < ORDER ; ++f ){
						index += (int)pow(4.0,ORDER-f)*(seq[beg+f]-'0');
					}
					for( f = 0; f < 4; ++f ){
						t += exp(trueTIS(index+f,0)); 
						d += exp(downFalse(index+f,0));
						u += exp(upFalse(index+f,0));
					}

					t = log(t) + log(pT),  d = log(d) + log(pFD), u = log(u) + log(pFU);
					int i = 0;
					for( ; i < trueTIS.getColum(); ++i ){
						int index = 0;
						int f = 0;
						for( ; f < ORDER ; ++f ){
							index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
						}
						double pt = 0, pt2 = 0, pd = 0, pu = 0;
						for( f = 0; f < 4; ++f ){
							pt += exp(trueTIS(index+f,i)); 
							pd += exp(downFalse(index+f,i));
							pu += exp(upFalse(index+f,i));
						}
						t += trueTIS(index+(seq[beg+i+ORDER]-'0'),i) - log(pt);
						d += downFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pd);
						u += upFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pu);
					}
					t = exp(t), d = exp(d); u = exp(u);
					double score = (t)/(t+u+d);
					if( score > maxScore )
					{
						maxScore = score;
						iter->trueScore = score;
						iter->upFalseScore = (u)/(t+u+d);
						iter->downFalseScore = (d)/(t+u+d);
						iter->location.first = hint;
						iter->ORFLength =  iter->location.second - iter->location.first ;
					}
				}
			}
		}
		
		double unchanged=0;
		for ( iter = ORFSet.begin(); iter != ORFSet.end(); iter++)
		{
			if( iter->leftmost >=3 && iter->origLocation == iter->location )
				++unchanged;
		
		}

		int size=(int)ORFSet.size();
		double percent=unchanged/HaveHeadNum*100;
		cout << "\t\tunchanged percent:\t" << percent << endl;
		if( percent > 99.9 ){
			break;
		}
	}
}

void TIS_T::reviseTIS_HaveHead_NotEnough( std::vector<ORF_T>& ORFSet )
{
	std::vector< ORF_T >::iterator iter = ORFSet.begin();
	for ( ; iter != ORFSet.end(); iter++)
	{
		const char* seq = iter->codingSeq->data();
		int seqLen = iter->seqLen;

		//location.first>=3ѡȡ߷
		if ( iter->leftmost >= 3 )
		{
			iter->origLocation = iter->location;

			if( iter->haveTIS == -1 )
				continue;

			double maxScore = -1;
			int hint = iter->location.first ;
			int indexc = 0;

				
			for( ; hint < iter->location.second -30 && indexc < maxCandidateN; hint += 3 )
			{
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if( STARTS.find(subStr) != STARTS.end() && hint-UPBP >= 0 && hint+3+DOWNBP+ORDER <= seqLen )
				{
					int beg = hint - UPBP;
					double t = 0, d = 0, u = 0;
					int index = 0;
					int f = 0;
					for( ; f < ORDER ; ++f ){
						index += (int)pow(4.0,ORDER-f)*(seq[beg+f]-'0');
					}
					for( f = 0; f < 4; ++f ){
						t += exp(trueTIS(index+f,0)); 
						d += exp(downFalse(index+f,0));
						u += exp(upFalse(index+f,0));
					}

					t = log(t) + log(pT),  d = log(d) + log(pFD), u = log(u) + log(pFU);
					int i = 0;
					for( ; i < trueTIS.getColum(); ++i ){
						int index = 0;
						int f = 0;
						for( ; f < ORDER ; ++f ){
							index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
						}
						double pt = 0, pt2 = 0, pd = 0, pu = 0;
						for( f = 0; f < 4; ++f ){
							pt += exp(trueTIS(index+f,i)); 
							pd += exp(downFalse(index+f,i));
							pu += exp(upFalse(index+f,i));
						}
						t += trueTIS(index+(seq[beg+i+ORDER]-'0'),i) - log(pt);
						d += downFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pd);
						u += upFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pu);
					}
					t = exp(t), d = exp(d); u = exp(u);
					double score = (t)/(t+u+d);
					if( score > maxScore )
					{
						maxScore = iter->trueScore = score;
						iter->upFalseScore = (u)/(t+u+d);
						iter->downFalseScore = (d)/(t+u+d);
						iter->location.first = hint;
					}
				}
			}
		}
	}
}

void TIS_T::reviseTIS_NoHead( std::vector<ORF_T>& ORFSet )
{
	std::vector< ORF_T >::iterator iter = ORFSet.begin();
	for( ; iter != ORFSet.end(); ++iter )
	{
		const char* seq = iter->codingSeq->data();
		int seqLen = iter->seqLen;

		//ûͷORF
		if ( iter->leftmost >= 3 ) //ΪORFûͷĲŴ
			continue;

		//location.firstߣҵһUPBPͬλXTG
		int hint = iter->location.first ;
		int initTIS = -1;
		for( ; hint < iter->location.second; hint += 3 )
		{
			if ( hint >= UPBP )
			{
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if(STARTS.find(subStr) != STARTS.end())
				{
					initTIS = hint;
					break;
				}
			}
		}

		//initTISΪ-1 δҵͬλXTG 
		if ( initTIS == -1 )
		{
			iter->haveTIS = -1;
			continue;
		}
		
		double score = -1;
		int beg = initTIS - UPBP;
		if( beg >= 0 && beg + UPBP + 3 + DOWNBP + 1 + ORDER < seqLen  )
		{
			int beg = hint - UPBP;
			double t = 0, d = 0, u = 0;
			int index = 0;
			int f = 0;
			for( ; f < ORDER ; ++f ){
				index += (int)pow(4.0,ORDER-f)*(seq[beg+f]-'0');
			}
			for( f = 0; f < 4; ++f ){
				t += exp(trueTIS(index+f,0)); 
				d += exp(downFalse(index+f,0));
				u += exp(upFalse(index+f,0));
			}

			t = log(t) + log(pT),  d = log(d) + log(pFD), u = log(u) + log(pFU);
			int i = 0;
			for( ; i < trueTIS.getColum(); ++i ){
				int index = 0;
				int f = 0;
				for( ; f < ORDER ; ++f ){
					index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
				}
				double pt = 0, pt2 = 0, pd = 0, pu = 0;
				for( f = 0; f < 4; ++f ){
					pt += exp(trueTIS(index+f,i)); 
					pd += exp(downFalse(index+f,i));
					pu += exp(upFalse(index+f,i));
				}
				t += trueTIS(index+(seq[beg+i+ORDER]-'0'),i) - log(pt);
				d += downFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pd);
				u += upFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pu);
			}
			t = exp(t), d = exp(d); u = exp(u);
			score = d/(t+d+u);
			//cout << score << endl;
		}

		//cutoff жXTGcoding ORFTISڴƬ
		//UPBPڻXTG û㹻Ϣ ޷ж
		if ( score  >= cutoff ) 
		{
			iter->location.first = iter->leftmost; //ԳʼעͷΪORFûͷ ʱжΪûͷ
		}
		//Сcutoff ΪһXTGnoncoding εXTGе÷ߵжΪTIS
		else
		{
			double maxScore = -1;
		
			for( ; hint < iter->location.second -30; hint += 3 )
			{
				int subStr = ((seq[hint] - 48)<<4) + ((seq[hint+1] - 48)<<2) + seq[hint + 2] - 48;
				if( STARTS.find(subStr) != STARTS.end() && hint-UPBP >= 0 && hint+3+DOWNBP+1+ORDER <= seqLen )
				{
					int beg = hint - UPBP;
					double t = 0, d = 0, u = 0;
					int index = 0;
					int f = 0;
					for( ; f < ORDER ; ++f ){
						index += (int)pow(4.0,ORDER-f)*(seq[beg+f]-'0');
					}
					for( f = 0; f < 4; ++f ){
						t += exp(trueTIS(index+f,0)); 
						d += exp(downFalse(index+f,0));
						u += exp(upFalse(index+f,0));
					}

					t = log(t) + log(pT),  d = log(d) + log(pFD), u = log(u) + log(pFU);
					int i = 0;
					for( ; i < trueTIS.getColum(); ++i ){
						int index = 0;
						int f = 0;
						for( ; f < ORDER ; ++f ){
							index += (int)pow(4.0,ORDER-f)*(seq[beg+i+f]-'0');
						}
						double pt = 0, pt2 = 0, pd = 0, pu = 0;
						for( f = 0; f < 4; ++f ){
							pt += exp(trueTIS(index+f,i)); 
							pd += exp(downFalse(index+f,i));
							pu += exp(upFalse(index+f,i));
						}
						t += trueTIS(index+(seq[beg+i+ORDER]-'0'),i) - log(pt);
						d += downFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pd);
						u += upFalse(index+(seq[beg+i+ORDER]-'0'),i) - log(pu);
					}
					t = exp(t), d = exp(d); u = exp(u);
					double score = t/(t+d+u);
					if( score > maxScore )
					{
						maxScore = iter->trueScore = score;
						iter->upFalseScore = (u)/(t+u+d);
						iter->downFalseScore = (d)/(t+u+d);
						iter->location.first = hint;
					}
				}
			}
		}
	}
}

void TIS_T::reviseTIS( )
{
	this->cutoff = 0.5;
	std::map< Str, std::vector<ORF_T> >::iterator iter;
	iter = GenusORFSetMap.begin();
	Str file = "parameterMatrixs.txt";
	std::ofstream out(file.data());

	STARTS.clear();
	STARTS.insert(14); //ATG
	STARTS.insert(30); //CTG
	STARTS.insert(62); //TTG
	STARTS.insert(46); //GTG
	
	for( ; iter!=GenusORFSetMap.end(); iter++)
	{
		Str genus = iter->first;
		if ( genus == "Mycoplasma" || genus == "Acholeplasma" || genus == "Aster" || genus == "Onion" || genus == "Ureaplasma" )
		{
			STOPS.clear();
			STOPS.insert(48); //TAA
			//STOPS.insert(56); //TGA
			STOPS.insert(50); //TAG
		}
		else
		{
			STOPS.clear();
			STOPS.insert(48); //TAA
			STOPS.insert(56); //TGA
			STOPS.insert(50); //TAG
		}
		
		std::vector<ORF_T>& ORFSet = (iter->second);

		//ORFΪORF
		for ( int i=0 ; i < (int)ORFSet.size(); i++ )
		{
			Pa_I_I tmp  = ORFSet[i].location;
			if (tmp.first > 2)
			{
				x2LongestORF( ORFSet[i].codingSeq->data(), tmp);
			}
			ORFSet[i].origLocation = ORFSet[i].location;
			ORFSet[i].leftmost = tmp.first; //¼ʼlocation.first ʱҪ
		}
		
		int HaveHeadNum = 0 ;
		for( int i=0; i<(int)ORFSet.size(); i++ )
		{
			if(ORFSet[i].leftmost >= 3)
				HaveHeadNum++;
		}

		cout<<endl<<"----------------------------------------------------------------\n";
		cout<<endl<<"Processing ORFs binned in genus "<<iter->first<<"..."<<endl;
		cout<<"HaveHeadNum:\t" << HaveHeadNum << endl;
		
		if ( HaveHeadNum > HaveHeadNumCutoff )
		{
			cout<<"CASCADE COMBINATION OF DIFFERENT MARKOV MODELS...\n" << endl;
			for ( ORDER=0; ORDER < MAXORDER+0.1; ORDER++)
			{
				if( ORDER < 1 ) 
					cout<<"  POST-PROCESS ORIGNAL INPUT TISs BY A 0-TH ORDER MARKOV MODEL:\n";
				else 
					cout<<"  POST-PROCESS OUTPUT TISs FROM A "<<(ORDER-1)<<"-TH ORDER MARKOV MODEL BY \n"
						  "                                A "<<ORDER<<"-TH ORDER MARKOV MODEL:\n";
				reviseTIS_HaveHead_Enough(ORFSet,HaveHeadNum);
				if( ORDER == 0 )
				{
					out << "--------------------------------------------------------------------" << endl;
					out << "Genus:\t" << iter->first << "\tHaveHeadNum:\t" << HaveHeadNum << endl;
					out << "----------------------------trueTIS---------------------------------" << endl;
					displayMatrix(trueTIS.toExp(),out);
					//out << "----------------------------downFalse-------------------------------" << endl;
					//displayMatrix(downFalse.toExp(),out);
					//out << "----------------------------upFalse---------------------------------" << endl;
					//displayMatrix(upFalse.toExp(),out);
					//out << endl;
				}
			}
			ORDER = ORDER-1; //reviseʱ ORDER1 ˴ָMAXORDER
			reviseTIS_NoHead(ORFSet);
		}
		else
		{
			cout << "Too Few ORFs have head, Using priori genus parameter...." << endl;
			//ȡ
			Str genusName = iter->first;
			getprioriParameters(genusName);
			ORDER=0; //ֻ0
			reviseTIS_HaveHead_NotEnough(ORFSet);
			reviseTIS_NoHead(ORFSet);
		}

	}
	out.close();
}

void TIS_T::resultToFile( Str& resultFile, Str& resultFormat )
{
	std::ofstream result(resultFile.data());
	Str tmp="";

	std::map< Str, std::vector<ORF_T> >::iterator iter = GenusORFSetMap.begin();

	if( resultFormat == "MED" || resultFormat == "med")
	{
		for( ; iter!=GenusORFSetMap.end(); iter++)
		{
			std::vector<ORF_T>& ORFSet = (iter->second);
			for( int i=0; i<(int)ORFSet.size(); i++ )
			{
				ORFSet[i].location2FileForm((int)ORFSet[i].codingSeq->size());
				if(ORFSet[i].id!=tmp)
					result << ">" << ORFSet[i].id << endl;
				result << setw(8) << ORFSet[i].location.first << setw(8);
				result << ORFSet[i].location.second << setw(4);
				if (ORFSet[i].isPositive)
					result << "+" << endl;
				else
					result << "-" << endl;
				//Ϊ˺ȷʱ
				//result << setw(10) << ORFSet[i].start << setw(4) << ORFSet[i].seqLen << endl;
				
				tmp = ORFSet[i].id;
			}
		}
	}
	else if ( resultFormat == "GFF" || resultFormat == "gff")
	{
		for( ; iter!=GenusORFSetMap.end(); iter++)
		{
			std::vector<ORF_T>& ORFSet = (iter->second);
			for( int i=0; i<(int)ORFSet.size(); i++ )
			{
				ORFSet[i].location2FileForm((int)ORFSet[i].codingSeq->size());
				if(ORFSet[i].id!=tmp)
				{
					result << "##gff-version  3" << endl;
					result << "##MetaTISA" << endl;
					result << "##metagenome sequence name: " << ORFSet[i].id << endl;
				}
				result << ORFSet[i].id << "\t";
				result << "MetaTISA" << "\t" << "CDS";
				result << "\t" << ORFSet[i].location.first << "\t";
				result << ORFSet[i].location.second << "\t" << "." << "\t";
				if (ORFSet[i].isPositive)
					result << "+" << "\t" << "." << endl;
				else
					result << "-" << "\t" << "." << endl;
				tmp = ORFSet[i].id;
				
			}
		}
	}	
}
