#include "kmer.h"
#include <sstream>

void Matchseed::reversep()
{
	set<pair<int, int> > n_fwmatch;
	set<pair<int, int> >::iterator Ites;
	for(Ites = fwmatch.begin(); Ites != fwmatch.end(); ++Ites){
	
		n_fwmatch.insert(make_pair(Ites->second, Ites->first));
	}
	fwmatch = n_fwmatch;
	set<pair<int, int> > n_revmatch;
	for(Ites = revmatch.begin(); Ites != revmatch.end(); ++Ites){
	
		n_revmatch.insert(make_pair(Ites->second, Ites->first));
	}
	revmatch = n_revmatch;
}

set<pair<int, int> > antimatch(set<pair<int, int> > &match, int size, int wl)
{
	set<pair<int, int> > va;
	set<pair<int, int> >::iterator Ite;
	for(Ite = match.begin(); Ite != match.end(); ++Ite){
		int a = Ite->second + wl - 1;
		int b = size - a + 1;
		va.insert(make_pair(Ite->first, b));
	}
	return va;
}



double Kmer::kmerinfo(string &str)
{
	double va = 0;
	int size = (int)str.size();
	int as = 0;
	int cs = 0;
	int gs = 0;
	int ts = 0;
	for(int i = 0; i < (int)str.size(); ++i){
		if(str[i] == 'A' || str[i] == 'a')
			as += 1;
		else if(str[i] == 'C' || str[i] == 'c')
			cs += 1;
		else if(str[i] == 'G' || str[i] == 'g')
			gs += 1;
		else if(str[i] == 'T' || str[i] == 't')
			ts += 1;
		else
			return 0;
	}

	if(as != 0){
		double pro = double(as)/size;
		va += (-1)*pro*(log(pro)/log(double(4)));
	}
	if(cs != 0){
		double pro = double(cs)/size;
		va += (-1)*pro*(log(pro)/log(double(4)));
	}
	if(gs != 0){
		double pro = double(gs)/size;
		va += (-1)*pro*(log(pro)/log(double(4)));
	}
	if(ts != 0){
		double pro = double(ts)/size;
		va += (-1)*pro*(log(pro)/log(double(4)));
	}

	return va;
}

int Kmer::getkeykmer_t( int ii, string &read, int len, map<long long, map<int, set<pair<int, int> > > > &keykmermap, set<long long> &abund, int si )
{

	

	int Read_size = (int)read.size();

	for(int i = 0; i < Read_size-len+1; ++i){
		
		string lstr = read.substr(i, len);
		if ( lstr.find("-") != lstr.npos )
			continue;
		if ( lstr.find("N") != lstr.npos )
			continue;
		double sh1 = kmerinfo(lstr);
		long long t1 = motif2Digital(lstr);
		string rlstr = revs(lstr);
		double sh2 = kmerinfo(rlstr);
		long long t2 = motif2Digital(rlstr);
		int forb;
		if(sh1 < sh2){
			forb = 1;
			lstr = rlstr;
			t1 = t2;
			sh1 = sh2;
		}else if(sh1 == sh2){
			if(t2 < t1){
				forb = 1; 
				lstr = rlstr;
				t1 = t2;
				sh1 = sh2;
			}else
				forb = 0;
		}else
			forb = 0;

		if(abund.find(t1) == abund.end()){
		
				si += 1;
				if((int)keykmermap[t1].size() < 100){
					keykmermap[t1][ii+1].insert(make_pair(i+1, forb));
				}else{
					abund.insert(t1);
					keykmermap.erase(t1);
				}
	
		}
	}
	return si;
}

int Kmer::readin_kmerfile1( int n, int si, int file_n, int numb,
						   map<long long, map<int, set<pair<int, int> > > > &keykmermap, 
						   map<int, vector<string> > &tempfiles,
						   vector<long long> &cutline )
{

	file_n += 1;
	int max = int(si / n);

	int tag = 0;
	map<long long, map<int, set<pair<int, int> > > >::iterator ite = keykmermap.begin();
	while(ite!=keykmermap.end()){
		ostringstream FileName;
		tag += 1;
		FileName << "numb_"<<numb<<"_tmp_" << file_n << "_"<<tag<<".txt";
		ofstream File;
		File.open ( FileName.str().c_str(), ios::out );
		tempfiles[tag].push_back(FileName.str());
		int hh = 0;
		while(1){
			File << ite->first<<":";
			map<int, set<pair<int, int> > >::iterator Ites;
			for(Ites = ite->second.begin(); Ites != ite->second.end(); ++Ites){
				hh += (int)Ites->second.size();
				File <<Ites->first;
				set<pair<int, int> >::iterator Itep;
				for(Itep = Ites->second.begin(); Itep != Ites->second.end(); ++Itep){
					File<<"("<<Itep->first<<","<<Itep->second<<")";
				
				}
				File <<"|";
			}
			File <<"\n";
			if(hh > max){
			
				cutline.push_back(ite->first);
				File.close();
				++ite;
				
				break;
			}
			++ite;
			if(ite == keykmermap.end()){
			
				File.close();
				break;
			}
		}
	}

	return file_n;

}

int Kmer::readin_kmerfile2( int file_n, int numb, 
						   map<long long, map<int, set<pair<int, int> > > > &keykmermap, 
						   map<int, vector<string> > &tempfiles,
						   vector<long long> &cutline)
{
	if(!((int)cutline.size() == (int)tempfiles.size() || (int)cutline.size()+1 == (int)tempfiles.size())){
		cout<<"error sdf"<<cutline.size()<<","<<tempfiles.size()<<endl;  exit(1);
		
	}

	
	int tagsize = (int)tempfiles.size();
	map<long long, map<int, set<pair<int, int> > > >::iterator ite = keykmermap.begin();
	
	file_n += 1;

	int tag = 1;
	for(; tag < tagsize; ++tag){
		ostringstream FileName;
		FileName << "numb_"<<numb<<"_tmp_" << file_n << "_"<<tag<<".txt";
		ofstream File;
		File.open ( FileName.str().c_str(), ios::out );
		tempfiles[tag].push_back(FileName.str());
		long long c = cutline[tag-1];
	
		while(ite->first<=c){
			File << ite->first<<":";
			map<int, set<pair<int, int> > >::iterator Ites;
			for(Ites = ite->second.begin(); Ites != ite->second.end(); ++Ites){
				File <<Ites->first;
				set<pair<int, int> >::iterator Itep;
				for(Itep = Ites->second.begin(); Itep != Ites->second.end(); ++Itep){
					File<<"("<<Itep->first<<","<<Itep->second<<")";
				
				}
				File <<"|";
			}
			File <<"\n";
			++ite;
			if(ite == keykmermap.end()){
				break;
			}
		}
		File.close();
	}
	tag = tagsize;
	ostringstream FileName;
	FileName << "numb_"<<numb<<"_tmp_" << file_n << "_"<<tag<<".txt";
	ofstream File;
	File.open ( FileName.str().c_str(), ios::out );
	tempfiles[tag].push_back(FileName.str());
	while(ite != keykmermap.end()){
		File << ite->first<<":";
		map<int, set<pair<int, int> > >::iterator Ites;
		for(Ites = ite->second.begin(); Ites != ite->second.end(); ++Ites){
			File <<Ites->first;
			set<pair<int, int> >::iterator Itep;
			for(Itep = Ites->second.begin(); Itep != Ites->second.end(); ++Itep){
				File<<"("<<Itep->first<<","<<Itep->second<<")";
			
			}
			File <<"|";
		}
		File <<"\n";
		++ite;
	}
	File.close();

	return file_n;
}



int Kmer::getkeykmer2(vector<pair<string, string > > &wholeread, int numb )
{
	set<long long> abund;

	int readsize = (int)wholeread.size();
	int si = 0;
	int file_n = 0;
	int n = 0;
	vector<long long> cutline;
	for(int ii = 0; ii < readsize; ++ii){
		int Read_size = (int)wholeread[ii].second.size();
		ReadLenVe.push_back(Read_size);

		si = getkeykmer_t( ii, wholeread[ii].second, len, keykmermap, abund, si );

		if(si >= kmernum){
			if(n == 0){
				int layn = int(readsize/ii) + 1;
				file_n = readin_kmerfile1( layn, si, file_n, numb, keykmermap, tempfiles, cutline );
			
				n += 1;
				si = 0;
				keykmermap.clear();
				continue;
			}

		
			file_n = readin_kmerfile2( file_n, numb, keykmermap, tempfiles, cutline );
		
			
			si = 0;
			keykmermap.clear();
			continue;
		}

	}

	if(!tempfiles.empty()){

		readin_kmerfile2( file_n, numb, keykmermap, tempfiles, cutline );
		
	
		keykmermap.clear();
	}

	return 0;
}

int Kmer::syplifymatrix()
{
	map<int, map<int, Matchseed> >::iterator Itemm;
	for(Itemm = MatchMatrix.begin(); Itemm != MatchMatrix.end(); ++Itemm){
		map<int, Matchseed>::iterator Item;
		for(Item = Itemm->second.begin(); Item != Itemm->second.end(); ++Item){
			set<pair<int, int> > ns;
			set<pair<int, int> >::iterator Ites;
			int length = 0;
			int size = (int)Item->second.fwmatch.size();
			if(size < 10)
				length = len;
			else if(size < 20)
				length = 50;
			else
				length = 100;
			for(Ites = Item->second.fwmatch.begin(); Ites != Item->second.fwmatch.end(); ++Ites){
				if(ns.empty()){
					ns.insert(*Ites);
					continue;
				}
				pair<int, int> p1 = *ns.rbegin();
				pair<int, int> p2 = *Ites;

				int diff1, diff2;
				if(p2.first >= p1.first)
					diff1 = p2.first-p1.first;
				else
					diff1 = p1.first - p2.first;
				if(p2.second >= p1.second)
					diff2 = p2.second - p1.second;
				else
					diff2 = p1.second - p2.second;
				
				if(diff1 == diff2 && diff1 < length)
					continue;
				else
					ns.insert(p2);

			}
			if(!Item->second.fwmatch.empty())
				ns.insert(*Item->second.fwmatch.rbegin());
			Item->second.fwmatch = ns;
			ns.clear();
			size = (int)Item->second.revmatch.size();
			if(size < 10)
				length = len;
			else if(size < 20)
				length = 50;
			else
				length = 100;
			for(Ites = Item->second.revmatch.begin(); Ites != Item->second.revmatch.end(); ++Ites){
				if(ns.empty()){
					ns.insert(*Ites);
					continue;
				}
				pair<int, int> p1 = *ns.rbegin();
				pair<int, int> p2 = *Ites;
				int diff1, diff2;
				if(p2.first >= p1.first)
					diff1 = p2.first-p1.first;
				else
					diff1 = p1.first - p2.first;
				if(p2.second >= p1.second)
					diff2 = p2.second - p1.second;
				else
					diff2 = p1.second - p2.second;
				if(diff1 == diff2 && diff1 < length)
					continue;
				else
					ns.insert(p2);

			}
			if(!Item->second.revmatch.empty())
				ns.insert(*Item->second.revmatch.rbegin());
			Item->second.revmatch = ns;
			ns.clear();
		}
	}

	return 0;
}


int Kmer::getmatrix()
{
	if(!tempfiles.empty()){
		map<int, vector<string> >::iterator Itek;
		for(Itek = tempfiles.begin(); Itek != tempfiles.end(); ++Itek){
			vector<string> ve = Itek->second;
			int size = (int)ve.size();
			for(int i = 0; i < size; ++i){
				
				string filename = ve[i];
				ifstream File(filename.data());
				if(!File.good()){
					cout<< "the file "<<filename<< " not found! " <<endl;  exit(1);
				}
				
				int ind = 0;
				int state = 0;
				long long it;
				int id;
				string line;
				while(!File.eof()){
					getline(File, line);
					ind += 1;
				
					if(line.empty())
						break;
					if(line.find(":") == line.npos){
						cout<<"error1001 "<<line<<endl;  exit(1);

					}
					string it_s = line.substr(0, line.find(":"));
					double a = atof(it_s.c_str());
					
					
					it = (long long)a;
				
					line = line.substr(line.find(":")+1);
			
					while(line.find("|") != line.npos){
						string aline = line.substr(0,line.find("|"));
					
						line = line.substr(line.find("|")+1);
						
						string id_s = aline.substr(0, aline.find("("));
						
						id = atoi(id_s.c_str());
				
						aline = aline.substr(aline.find("("));
						while(!aline.empty()){
							string p1_s = aline.substr(1,aline.find(","));
							string p2_s = aline.substr(aline.find(",")+1, aline.find(")")-aline.find(",")-1);
							int p1 = atoi(p1_s.c_str());
							int p2 = atoi(p2_s.c_str());
						
						
							keykmermap[it][id].insert(make_pair(p1, p2));
							aline = aline.substr(aline.find(")")+1);
						
							
						}
						
					}
				

				}
				File.close();
				if( unlink( filename.c_str() ) == -1 )
					perror( "Could not delete" );

				
			

			}

			while(!keykmermap.empty()){
				map<long long, map<int, set<pair<int, int> > > >::iterator Ite = keykmermap.begin();
				map<int, set<pair<int, int> > > rel = Ite->second;
				keykmermap.erase(Ite);
				if((int)rel.size() == 1)
					continue;
				map<int, set<pair<int, int> > >::iterator item;
				for(item = rel.begin(); item != rel.end(); ++item){
					int read_x = item->first;
					set<pair<int, int> > idx_x = item->second;
					map<int, set<pair<int, int> > >::iterator item_2 = item;
					++item_2;
					if(item_2 == rel.end())
						break;
					for(; item_2 != rel.end(); ++item_2){
						int read_y = item_2->first;
						set<pair<int, int> > idx_y = item_2->second;
						
					
						set<pair<int, int> >::iterator itese_x;
						for(itese_x = idx_x.begin(); itese_x != idx_x.end(); ++itese_x){
							set<pair<int, int> >::iterator itese_y;
							for(itese_y = idx_y.begin(); itese_y != idx_y.end(); ++itese_y){
								if(itese_x->second == itese_y->second){
								
										MatchMatrix[read_x][read_y].fwmatch.insert(make_pair(itese_x->first, itese_y->first));
								
								
								}
								else{
								
									MatchMatrix[read_x][read_y].revmatch.insert(make_pair(itese_x->first, itese_y->first));
								
								}
							}
						}
					}
				}
			}
			keykmermap.clear();
			syplifymatrix();

		}
	}


	
	if(!keykmermap.empty()){
		while(!keykmermap.empty()){
			map<long long, map<int, set<pair<int, int> > > >::iterator Ite = keykmermap.begin();
			map<int, set<pair<int, int> > > rel = Ite->second;
			keykmermap.erase(Ite);
			if((int)rel.size() == 1)
				continue;
			map<int, set<pair<int, int> > >::iterator item;
			for(item = rel.begin(); item != rel.end(); ++item){
				int read_x = item->first;
				set<pair<int, int> > idx_x = item->second;
				map<int, set<pair<int, int> > >::iterator item_2 = item;
				++item_2;
				if(item_2 == rel.end())
					break;
				for(; item_2 != rel.end(); ++item_2){
					int read_y = item_2->first;
					set<pair<int, int> > idx_y = item_2->second;
					
				
					set<pair<int, int> >::iterator itese_x;
					for(itese_x = idx_x.begin(); itese_x != idx_x.end(); ++itese_x){
						set<pair<int, int> >::iterator itese_y;
						for(itese_y = idx_y.begin(); itese_y != idx_y.end(); ++itese_y){
							if(itese_x->second == itese_y->second){
							
								MatchMatrix[read_x][read_y].fwmatch.insert(make_pair(itese_x->first, itese_y->first));
							
							
							}
							else{
							
								MatchMatrix[read_x][read_y].revmatch.insert(make_pair(itese_x->first, itese_y->first));
							
							}
						}
					}
				}
			}
		}
		keykmermap.clear();
		syplifymatrix();
	}
	



	return 0;
}
// check there are shared kmers in the end of reads
bool Kmer::filtmatchseed(Matchseed &match, int len1, int len2)
{
	int l = 100;
	set<pair<int, int> >::iterator Ites;
	set<pair<int, int> >::reverse_iterator Iters;
	if(!match.fwmatch.empty()){
		Ites = match.fwmatch.begin();
		int f1 = Ites->first;
		int f2 = Ites->second;
		Iters = match.fwmatch.rbegin();
		int b1 = Iters->first;
		int b2 = Iters->second;
		++Ites;
		for(; Ites != match.fwmatch.end(); ++Ites){
			if(Ites->second < f2)
				f2 = Ites->second;
		}
	
		++Iters;
		for(; Iters != match.fwmatch.rend(); ++Iters){
			if(Iters->second > b2)
				b2 = Iters->second;
		}
	
		if(f1 <= l && (len2 - (b2+len)) <= l )
			return true;
		if(f2 <= l && (len1-(b1+len)) <= l)
			return true;
		if(f1 <= l && (len1-(b1+len)) <= l)
			return true;
		if(f2 <= l && (len2 - (b2+len)) <= l)
			return true;

	}
	if(!match.revmatch.empty()){
		Ites = match.revmatch.begin();
		int f1 = Ites->first;
		int b2 = Ites->second;
		Iters = match.revmatch.rbegin();
		int b1 = Iters->first;
		int f2 = Iters->second;
		++Ites;
		for(; Ites != match.revmatch.end(); ++Ites){
			if(b2 < Ites->second)
				b2 = Ites->second;
		}
		++Iters;
		for(; Iters != match.revmatch.rend(); ++Iters){
			if(f2 > Iters->second)
				f2 = Iters->second;
		}

	
		if(f1 <= l && f2 <= l)
			return true;
		if((len1-(b1+len)) <= l && (len2 - (b2+len)) <= l)
			return true;
		if(f1 <= l && (len1-(b1+len)) <= l)
			return true;
		if(f2 <= l && (len2 - (b2+len)) <= l)
			return true;

	}

	return false;
}



void Kmer::filtmatrix()
{
	int readsize = (int)ReadLenVe.size();
	for(int i = 1; i <= readsize; ++i){
		if(MatchMatrix.find(i) != MatchMatrix.end()){
		
			int len1 = ReadLenVe[i-1];
			vector<int> dele;
			map<int, Matchseed>::iterator Item;
			for(Item = MatchMatrix[i].begin(); Item != MatchMatrix[i].end(); ++Item){
				int j = Item->first;
				int len2 = ReadLenVe[j-1];
				if(!filtmatchseed(Item->second, len1, len2)){
					dele.push_back(j);
			
				}

			}
		
			if(!dele.empty()){
				int dsize = (int)dele.size();
				for(int k = 0; k < dsize; ++k){
					MatchMatrix[i].erase(dele[k]);
				}
			}
			if(MatchMatrix[i].empty())
				MatchMatrix.erase(i);
		}
	
	}
}

int Kmer::getfinalcluster()
{
	set<int> used;
	map<int, map<int, Matchseed> > matrix1;
	
	while(!MatchMatrix.empty()){

		set<int> seed;
		seed.insert(MatchMatrix.begin()->first);
		used.insert(MatchMatrix.begin()->first);
		while(!seed.empty()){
		
			set<int> al_seed;
			set<int>::iterator ite;
		
			for(ite = seed.begin(); ite != seed.end(); ++ite){
				int id = *ite;
				if(MatchMatrix.find(id) != MatchMatrix.end()){
					matrix1.insert(make_pair(id, MatchMatrix[id]));
					map<int, Matchseed>::iterator item;
					for(item = MatchMatrix[id].begin(); item != MatchMatrix[id].end(); ++item){
						int id2 = item->first;
						if(used.find(id2) == used.end()){
							al_seed.insert(id2);
							used.insert(id2);

						}
					}
				
					MatchMatrix.erase(id);	
				
				}
			}

			if(MatchMatrix.empty())
				break;
		
				map<int, map<int, Matchseed> >::iterator Item;
				int ini = MatchMatrix.begin()->first;
				while(1){
				
					Item = MatchMatrix.find(ini);
					int state = 0;
					int tag = 0;
					for(; Item != MatchMatrix.end(); ++Item){
					
						map<int, Matchseed>::iterator ite_m;
						for(ite_m = Item->second.begin(); ite_m != Item->second.end(); ++ite_m){
							int id2 = ite_m->first;
							if(seed.find(id2) != seed.end()){
								tag = 1;
							
								break;
							}
						}
						if(tag == 1){
							matrix1.insert(make_pair(Item->first, Item->second));
						
							if(used.find(Item->first) == used.end()){
								al_seed.insert(Item->first);
								used.insert(Item->first);
							}
							for(ite_m = Item->second.begin(); ite_m != Item->second.end(); ++ite_m){
								int id2 = ite_m->first;
								if(used.find(id2) == used.end()){
									al_seed.insert(id2);
									used.insert(id2);
								}
							}
							map<int, map<int, Matchseed> >::iterator Item_2 = Item;
							++Item_2;
							if(Item_2 == MatchMatrix.end()){
								state = 1;
								
							}else{
								ini = Item_2->first;
							}
						
							MatchMatrix.erase(Item);
							break;
						}
						

					}
					if(state == 1 || tag == 0)
						break;
				}
			
		
			seed = al_seed;
			al_seed.clear();
			if(MatchMatrix.empty())
				break;
		}
	
	
		
	}
	final.push_back(matrix1);

	MatchMatrix.clear();
	matrix1.clear();


	return 0;
}



void Kmer::ma()
{

	
	getmatrix();


	filtmatrix();
	
	
	
	getfinalcluster();
	


}

