#include "getseqs.h"


void GetRead::getRead(Str& seqsFilename)
{
	std::ifstream seqsFile( seqsFilename.data() );

	if( !seqsFile.good() ){
		std::cout<<"file "<<seqsFilename<<" not found!"<<std::endl;
		exit(1);
	}


	Str line;
	Str seq;
	Str id;



	std::getline( seqsFile, line );

	while(!seqsFile.eof()){

		id = line.substr(1);
	


		getline( seqsFile,line );

		while ( line.find(">") == std::string::npos )              
		{
			
			seq+=SequenceTransform_T::char2DigitalSeq(line);

			if ( seqsFile.eof() )
				break;
			getline( seqsFile, line);

		
		}

	


		IdSeqVe.push_back(make_pair(id, seq));



		int i = (int)IdSeqVe.size();
		NumSidMap.insert(make_pair(i, id));
		SidNumMap.insert(make_pair(id, i));
	

		seq.erase( seq.begin(), seq.end() );

	}
	



	seqsFile.close();

}

void GetRead::outRead( const char* seqsFilename )
{
	std::ofstream outfile( seqsFilename );
	int size = (int)IdSeqVe.size();


	for(int i = 0; i < size; ++i){

	

			outfile<<">"<< IdSeqVe[i].first << "\n";
			Str seq = SequenceTransform_T::digital2CharSeq(IdSeqVe[i].second);
			int k;
			for( k = 0; k < (int)seq.size()/65; k++ )
				outfile << seq.substr(65*k,65) << endl;
			if ( seq.substr(65*k).size() != 0 )    
				outfile << seq.substr(65*k) << endl;
		
	}

	map<int, pair<int, pair<int, int> > >::iterator IteP;
	for(IteP=PairMa.begin(); IteP!=PairMa.end(); ++IteP){
		outfile<<"<"<<IteP->first<<","<<IteP->second.first<<">"<<endl;
	}
	outfile.close();
}

void GetRead::getqualfromfile(Str &qualfile)
{
	ifstream in(qualfile.data());

	if( !in.good() ){
		std::cout<<"file "<<qualfile<<" not found!"<<std::endl;
		exit(1);
	}


	string line;
	getline(in, line);

	vector<int> scve;
	
	while(!in.eof()){

		getline( in,line );
		
		while ( line.find(">") == std::string::npos ){
			if(!line.empty()){
				stringstream ss;
				
				ss << line;
				while (!ss.eof()){
					int a;
					ss >> a;
					
					scve.push_back(a);
				}
			}
		
			if ( in.eof() )
				break;
			
			getline( in, line );
			
			
		}
	
		
		Qualve.push_back(scve);


	

		scve.clear();
	}
	if((int)Qualve.size() != (int)IdSeqVe.size()){
		cout<<"error size "<<Qualve.size()<<","<<IdSeqVe.size()<<endl;  exit(2);
	}

}

void GetRead::getqualfromcons(int sc)
{
	int size = (int)IdSeqVe.size();
	vector<int> scve;
	for(int i = (int)Qualve.size(); i < size; ++i){
		string seq = IdSeqVe[i].second;
		int seqsize = (int)seq.size();
		for(int j = 0; j < seqsize; ++j){
			scve.push_back(sc);
		}
		Qualve.push_back(scve);
		scve.clear();
	}
}

void GetRead::outqual(const char *outfilename)
{
	std::ofstream out( outfilename );

	int size = (int)Qualve.size();

	for(int i = 0; i < size; ++i){
		

			vector<int> scve = Qualve[i];
			out<<">"<< IdSeqVe[i].first << "\n";

			int vesize = (int)scve.size();
			if(vesize != (int)IdSeqVe[i].second.size()){
				cout<<"seq size != sc size "<<i+1<<"\t"<<vesize<<","<<IdSeqVe[i].second.size()<<endl;  exit(1);
			}
			for(int j = 0; j < vesize; ++j){
				out <<scve[j];
				if((j+1) % 17 == 0)
					out<<"\n";
				else if(j != vesize-1)
					out<<" ";

				if(j == vesize-1){
					if((j+1) % 17 != 0)
						out<<"\n";
				}
				
			}
		
	}
}

void GetRead::substract()
{
	ifstream inf("Edit1.txt");
	vector< int > segve;
	string a;
	while(!inf.eof()){
		inf >> a;
		if(a.empty())
			break;
		int ai = atoi(a.c_str());
	
		segve.push_back(ai);
		cout<<ai<<endl;
	}
	cout<<inset.size()<<endl;

	ofstream fseq ("tt.txt");
	ofstream fqul ("tt.qul");
	
	for ( int i = 0; i < (int)segve.size(); ++i ) {
		fseq<< ">" << IdSeqVe[segve[i]-1].first<<endl;
		
		Str seq = SequenceTransform_T::digital2CharSeq(IdSeqVe[segve[i]-1].second);
		int k;
		for( k = 0; k < (int)seq.size()/65; k++ )
			fseq << seq.substr(65*k,65) << endl;
		if ( seq.substr(65*k).size() != 0 )    
			fseq << seq.substr(65*k) << endl;

		fqul <<" >" << IdSeqVe[segve[i]-1].first<<endl;
		
		vector<int> scve = Qualve[segve[i]-1];
			

			int vesize = (int)scve.size();
			
			for(int j = 0; j < vesize; ++j){
				fqul  <<scve[j];
				if((j+1) % 17 == 0)
					fqul <<"\n";
				else if(j != vesize-1)
					fqul <<" ";

				if(j == vesize-1){
					if((j+1) % 17 != 0)
						fqul <<"\n";
				}
				
			}

	}
	

}

void GetRead::substract2()
{
	ifstream inf("Edit1.txt");
	
	string a;
	while(!inf.eof()){
		inf >> a;
		if(a.empty())
			break;
		int ai = atoi(a.c_str());
		inset.insert(ai);
	
		cout<<ai<<endl;
	}
	cout<<inset.size()<<endl;

	ofstream fseq ("temp.txt");
	ofstream fqul ("temp.qul");
	set<int>::iterator ite;
	for ( ite = inset.begin(); ite != inset.end(); ++ite ) {
		fseq<< ">" << IdSeqVe[*ite-1].first<<endl;
		
		Str seq = SequenceTransform_T::digital2CharSeq(IdSeqVe[*ite-1].second);
		int k;
		for( k = 0; k < (int)seq.size()/65; k++ )
			fseq << seq.substr(65*k,65) << endl;
		if ( seq.substr(65*k).size() != 0 )    
			fseq << seq.substr(65*k) << endl;

		fqul <<">" << IdSeqVe[*ite-1].first<<endl;
		
		vector<int> scve = Qualve[*ite-1];
			

			int vesize = (int)scve.size();
			
			for(int j = 0; j < vesize; ++j){
				fqul  <<scve[j];
				if((j+1) % 17 == 0)
					fqul <<"\n";
				else if(j != vesize-1)
					fqul <<" ";

				if(j == vesize-1){
					if((j+1) % 17 != 0)
						fqul <<"\n";
				}
				
			}

	}
	

}

void GetRead::substract3()
{
	ifstream inf("Edit1.txt");
	vector< string > segve;
	set<string > segset;
	string a;
	while(!inf.eof()){
		inf >> a;
		if(a.empty())
			break;
	
		segve.push_back(a);
		segset.insert(a);
		cout<<a<<endl;
	}

	map< string, size_t > key_map;
	for ( size_t i = 0; i < IdSeqVe.size(); ++i ) {
		if ( segset.find( IdSeqVe[i].first ) != segset.end() ) {
			key_map.insert( make_pair( IdSeqVe[i].first, i ) );
		}
	}

	ofstream fseq ("temp.txt");
	ofstream fqul ("temp.qul");
	
	for ( int i = 0; i < (int)segve.size(); ++i ) {
		if ( key_map.find( segve[i] ) == key_map.end() ) {
			cout<<"error in substract3() "<<segve[i]<<endl;   exit(1);
		}
		size_t m = key_map[segve[i]];
		fseq<< ">" << IdSeqVe[m].first<<endl;
		
		Str seq = SequenceTransform_T::digital2CharSeq(IdSeqVe[m].second);
		int k;
		for( k = 0; k < (int)seq.size()/65; k++ )
			fseq << seq.substr(65*k,65) << endl;
		if ( seq.substr(65*k).size() != 0 )    
			fseq << seq.substr(65*k) << endl;

		fqul <<" >" << IdSeqVe[m].first<<endl;
		
		vector<int> scve = Qualve[m];
			

			int vesize = (int)scve.size();
			
			for(int j = 0; j < vesize; ++j){
				fqul  <<scve[j];
				if((j+1) % 17 == 0)
					fqul <<"\n";
				else if(j != vesize-1)
					fqul <<" ";

				if(j == vesize-1){
					if((j+1) % 17 != 0)
						fqul <<"\n";
				}
				
			}

	}
	

}

void GetRead::getmatepair(Str& matefilename)
{

	ifstream in(matefilename.data());

	if( !in.good() ){
		std::cout<<"file "<<matefilename<<" not found!"<<std::endl;
		exit(1);
	}

	string a, b , c ,d;
	int i = 0;
	double e = 0;
	double v = 0;
	

	while(!in.eof()){
		in >> a >> b >> c >> d;
		if(a.empty())
			break;

		int m = atoi(c.c_str());
		int t = atoi(d.c_str());
		t = min ( (int)m/10, t );
		

		int id_a = SidNumMap[a];
		int id_b = SidNumMap[b];

		if(m != 0){
			PairMa.insert(make_pair(id_a, make_pair(id_b, make_pair(m, t))));
			PairMa.insert(make_pair(id_b, make_pair(id_a, make_pair(m, t))));
			v = ( v * i + t ) / ( i + 1 );
			e = ( e * i + m ) / ( i + 1 );
			++i;
		}

	}

	mate_len = (int)e;
	mate_len_var = (int)v;
	mate_type = 2;
	

}

pair< bool, bool > clear_pN( string &seq, int cutlen, pair< int, int > &cle )
{
	vector< pair< int, int > > Nspos;
	for ( size_t i = 0; i < seq.size(); ++i ) {
		if ( seq[i] == 'N' ) {
			int pos = (int)i;
			int ss = 1;

			++i;
			while ( i<seq.size() ) {
				if ( seq[i] != 'N' ) {
					break;
				} else {
					ss += 1;
					++i;
				}

			}
			if ( ss > 1 ) {
				Nspos.push_back( make_pair(pos, ss) );
			}
		}
	}

	if ( !Nspos.empty() ) {
		map< int, int, greater<int > > cleve;
		int f_p = 0;
		for ( size_t i = 0; i < Nspos.size(); ++i ) {
		
			int len = Nspos[i].first - f_p;
			if ( len >= cutlen )
				cleve.insert( make_pair( len, f_p ) );
			f_p = Nspos[i].first + Nspos[i].second;
		}
		int len = (int)seq.size() - f_p;
		if ( len >= cutlen )
			cleve.insert( make_pair( len, f_p ) );
		if ( !cleve.empty() ) {
			cle = make_pair( cleve.begin()->second, cleve.begin()->first );
			return make_pair( true, true );
		} else
			return make_pair( true, false );
	} else {
		return make_pair( false, true );
	}
}

void GetRead::getReadfromFRG(std::string &frgfile)
{

	ifstream in ( frgfile.data() );
	if ( !in.good() ) {
		cout<<"error could not open file "<<frgfile<<endl;  exit(1);
	}

	int state = 0;

	string line;

	getline( in, line );

	int insert_m = 0;
	int insert_v = 0;

	bool mate = true;

	while ( !in.eof() ) {
		if ( state == 1 ) {
			if ( line.find("ori:") != line.npos ) {
				string mate_s = line.substr( 4 );
				if ( mate_s == "I" )
					mate = true;
				else if ( mate_s == "U" )
					mate = false;
				else {
					cout<<"Unexpected ori of Lib in frg file reading "<<mate_s<<endl;
					mate = false;
				}
			} else if ( line.find("mea:") != line.npos ) {
				string mean = line.substr( 4 );
				insert_m = (int)atof( mean.c_str() );
			} else if ( line.find( "std:") != line.npos ) {
				string v = line.substr( 4 );
				insert_v = (int)atof( v.c_str() );
			} else if ( line[0] == '}' ) {
				state = 0;
				getline( in, line );
				break;
			}
		} else {
			if ( line == "{LIB" ) {
				state = 1;
				getline( in, line );
				continue;
			}
		}

		getline( in, line );
	}

//	cout<<insert_m<<","<<insert_v<<endl;

	mate_len = insert_m;
	mate_len_var = insert_v;
	mate_type = 2;

	vector< string > ridve;
	vector< string > seqve;
	vector< string > qualve;
	map< string, string > mp;

	while ( !in.eof() ) {
		if ( state == 0 ) {
			if ( line[0] == '{' )
				state = 1;
		} else if ( state == 1 ) {
			if ( line[0] == '}' ) {
				state = 0;
				getline( in, line );
				continue;
			}
		}

		if ( state == 1 ) {
			if ( line == "{FRG" ) {
				getline( in, line );   // "act:A"
				getline( in, line );   // "acc:id".
				if ( line.substr( 0, 4 ) != "acc:" ) {
					cout<<"Unexpected ! at \"acc\" "<<line.substr( 0, 4 )<<endl;   exit(1);
				}
				string rid = line.substr(4);
				
				getline( in, line );   // "rnd:1"
				getline( in, line );   // "sta:G"
				bool sta = true;
				if ( line.substr(4) != "G" )
					sta = false;
				getline( in, line );   // "lib:"
				getline( in, line );   // "pla:"
				getline( in, line );   // "loc:"
				getline( in, line );   // "src:"
				getline( in, line );   // "."
				getline( in, line );   // "seq:"
				if ( line != "seq:" ) {
					cout<<"Unexpected ! at \"seq:\" "<<line <<endl;  exit(1); 
				}
				string seq = "";
				getline( in, line );
				while ( line != "." ) {
					seq+=SequenceTransform_T::char2DigitalSeq(line);
					getline( in, line );
				}
				getline( in, line );   // "qlt:"
				if ( line != "qlt:" ) {
					cout<<"Unexpected ! at \"qlt:\" "<<line <<endl;  exit(1); 
				}
				
				string qual = "";
				getline( in, line );   
				while ( line != "." ) {
					qual += line;
				
					getline( in, line );
				}
				if ( (int)seq.size() != (int)qual.size() ) {
					cout<<"error seq size != qual size "<<seq.size()<<","<<qual.size()<<", rid == " <<rid<<endl; exit(1);
				}
				getline( in, line );   // "hps:"
				getline( in, line );   // "."
				getline( in, line );   
				while ( line.substr( 0, 4 ) != "clr:" ) {
					if ( line == "}" ) {
						cout<<"Unexpected ! no clr "<<endl;
						exit(1);
					}
					getline( in, line );
				}
				string clr_l = "";
				string clr_r = "";
				line = line.substr( 4 );
				size_t i = 0;
				for ( ; i < line.size(); ++i ) {
					if ( line[i] == ',' ) 
						break;
					else
						clr_l += line[i];
				}
				++i;
				for ( ; i < line.size(); ++i )
					clr_r += line[i];
				int clr_li = atoi(clr_l.c_str());
				int clr_ri = atoi(clr_r.c_str());
				
				if ( clr_li < 0 || clr_li >= clr_ri )
					sta = false;

				if ( sta ) {
					seq = seq.substr( clr_li, clr_ri-clr_li );
					qual = qual.substr( clr_li, clr_ri-clr_li );
					ridve.push_back( rid );
					seqve.push_back( seq );
					qualve.push_back( qual );
				}

				getline( in, line );  //"}"
				if ( line == "}" ) {
					state = 0;
					getline( in, line );
					continue;
				}
			} else if ( line == "{LKG" ) {
				bool sta = true;
				getline( in, line );   //"act:"
				if ( line.substr(0,4) != "act:" ) {
					cout<<"Unexpected ! at \"act\" "<<line.substr( 0,4 ) <<endl;  exit(1);
				}
				if ( line.substr( 4 ) != "A" ) {
					sta = false;
				}
				getline( in, line );   //"frg:"
				if ( line.substr(0,4) != "frg:" ) {
					cout<<"Unexpected ! at \"LKG frg 1\" "<<line.substr( 0,4 ) <<endl;  exit(1);
				}
				string frg1 = line.substr(4);
				getline( in, line );   //"frg:"
				if ( line.substr(0,4) != "frg:" ) {
					cout<<"Unexpected ! at \"LKG frg 2\" "<<line.substr( 0,4 ) <<endl;  exit(1);
				}
				string frg2 = line.substr(4);
				if ( sta ) {
					mp.insert( make_pair( frg1, frg2 ) );
					mp.insert( make_pair( frg2, frg1 ) );
				}
				getline( in, line );   //"}"
				if ( line == "}" ) {
					state = 0;
					getline( in, line );
					continue;
				}

			} else {
				cout<<"unexpected line: "<<line<<endl;
				getline( in, line );
			}
		} else {
			cout<<"unexpected line: "<<line<<endl;
			getline( in, line );
		}
	}


	size_t vesize = ridve.size();

	fstream fout;
	fout.open( "clearrange", ios::app|ios::out );


	set<size_t > discardid;
	for ( size_t i = 0; i < vesize; ++i ) {
	
		pair<int, int > cle;
		pair< bool, bool > cut_cle = clear_pN(seqve[i], 100, cle);
		if ( cut_cle.first ) {
			if ( cut_cle.second ) {
				fout<<"cle:"<<ridve[i]<<","<<cle.first<<","<<cle.second<<endl;
				string cleseq = seqve[i].substr( cle.first, cle.second );
				string clequl = qualve[i].substr( cle.first, cle.second );
				seqve[i] = cleseq;
				qualve[i] = clequl;
				if ( cleseq.size() != clequl.size() ) {
					cout<<"error cleseq.size() != clequl.size() "<<cleseq.size()<<","<<clequl.size()<<","<<cle.second<<endl; exit(1);
				}
			} else {
				discardid.insert( i );
			}
		}
	}
	fout.close();



	if ( mate && insert_m > 0 ) {
		map< string, int > imap;
		for ( size_t i = 0; i < vesize; ++i ) {
			
			if ( mp.find( ridve[i] ) != mp.end() ) {
				if ( discardid.find( i ) == discardid.end() ) {
					IdSeqVe.push_back( make_pair( ridve[i], seqve[i] ) );
					vector<int> qua = qlt_char_int( qualve[i], 32 );
					Qualve.push_back( qua );
					imap.insert( make_pair( ridve[i], (int)IdSeqVe.size() ) );
					NumSidMap.insert(make_pair((int)IdSeqVe.size(), ridve[i]));
					SidNumMap.insert(make_pair(ridve[i], (int)IdSeqVe.size()));
				
				}
			}
		}

		map< string, int >::iterator Ite;
		for ( Ite = imap.begin(); Ite != imap.end(); ++Ite ) {
			if ( imap.find( mp[Ite->first] ) != imap.end() )
				PairMa.insert( make_pair( Ite->second, make_pair( imap[mp[Ite->first]], make_pair( insert_m, insert_v ) ) ) );
		}
	} else {
		for ( size_t i = 0; i < vesize; ++i ) {
			if ( discardid.find( i ) == discardid.end() ) {
				IdSeqVe.push_back( make_pair( ridve[i], seqve[i] ) );
				vector<int> qua = qlt_char_int( qualve[i], 32 );
				Qualve.push_back( qua );
				NumSidMap.insert(make_pair((int)IdSeqVe.size(), ridve[i]));
				SidNumMap.insert(make_pair(ridve[i], (int)IdSeqVe.size()));
			
			}
			
			
		}
	}


}

void GetRead::outmatepair(const char* filename)
{

	ofstream ouf(filename);
	map<int, pair<int, pair<int, int> > >::iterator Ite;
	for ( Ite = PairMa.begin(); Ite != PairMa.end(); ++Ite ) {
		ouf << Ite->first<<","<<Ite->second.first<<","<<Ite->second.second.first<<","<<Ite->second.second.second<<endl;
	}

}


