#include "anamergeoverlap.h"

void splitiPathattile( vector< int > &spl_tile_ve, iPath& ip, vector< iPath > &ipve,  Ass_Mapping &amo, Assembly_Bank &ass_bank )
{
	set< pair<bool, Ctg_id > > last_frg;
	int last_spl = 1;
	for ( size_t i = 0; i < spl_tile_ve.size(); ++i )
	{
		if ( spl_tile_ve[i] == 1 )
			continue;
		if ( spl_tile_ve[i] == last_spl + 1 )
			continue;
		if ( spl_tile_ve[i] > (int)ip.Tile_list.size() )
		{
			cout<<"error spl_tile "<<spl_tile_ve[i]<<" "<<ip.Tile_list.size() <<endl; exit(1);
		}
		if ( spl_tile_ve[i] == (int)ip.Tile_list.size() )
			break;
		iPath tip;
		int begin_pos, end_pos;
		int tile_order = 0;
		for ( list< Tile >::iterator ite = ip.Tile_list.begin(); ite != ip.Tile_list.end(); ++ite )
		{
			++tile_order;
			if ( tile_order < last_spl )
				continue;
			if ( tile_order >= last_spl && tile_order <= spl_tile_ve[i] )
			{
				if ( tile_order == last_spl )
				{
					begin_pos = ite->pos_iPath.first;
				}
				if ( tile_order == spl_tile_ve[i] )
					end_pos = ite->pos_iPath.second;
				tip.Tile_list.push_back( *ite );
			}
			if ( tile_order > spl_tile_ve[i] )
				break;

		}
		int rel_start = begin_pos - ip.Tile_list.front().pos_iPath.first;
		int len = end_pos - begin_pos + 1;
		tip.seq = ip.seq.substr( rel_start, len );
		ipve.push_back( tip );

		last_spl = spl_tile_ve[i];
	}

	if ( last_spl == 1 )
		ipve.push_back( ip );
	else
	{
		iPath tip;
		int begin_pos, end_pos;
		int tile_order = 0;
		for ( list< Tile >::iterator ite = ip.Tile_list.begin(); ite != ip.Tile_list.end(); ++ite )
		{
			++tile_order;
			if ( tile_order < last_spl )
				continue;
			if ( tile_order >= last_spl  )
			{
				if ( tile_order == last_spl )
				{
					begin_pos = ite->pos_iPath.first;
				}
				
				tip.Tile_list.push_back( *ite );
			}

		}
		end_pos = ip.Tile_list.back().pos_iPath.second;
		int rel_start = begin_pos - ip.Tile_list.front().pos_iPath.first;
		int len = end_pos - begin_pos + 1;
		tip.seq = ip.seq.substr( rel_start, len );
		ipve.push_back( tip );

	}
}

void anaoverlap( Ass_Mapping &amo, Assembly_Bank &ass_bank )
{
	ofstream outf("overlap_log");
	vector< iPath > new_iPathve;
	for ( size_t i = 0; i < amo.iPath_ve.size(); ++i )
	{
		if ( amo.iPath_ve[i].seq.empty() )
			continue;

		int tile_order = 0;
		vector< ctgfrg > ctgfrg_ve;
		vector< bool > overlap_tag;
		map< pair<bool, Ctg_id >, size_t > last_occupy_frg;   // < subject_ori, ctgid >, ctgfrg_id
		for ( list< Tile >::iterator ite = amo.iPath_ve[i].Tile_list.begin(); ite != amo.iPath_ve[i].Tile_list.end(); ++ite )
		{
			tile_order += 1;
			
			bool first_subj_ori = ite->subject_ori;
			Ctg_id first_ctg = ite->ctg;
			int first_pos_ctg_begin = ite->pos_ctg.first;
			int first_pos_ctg_end = ite->pos_ctg.second;
			pair< bool, Ctg_id > first_frg = make_pair( first_subj_ori, first_ctg );
			size_t first_k;
			if ( last_occupy_frg.find( first_frg ) != last_occupy_frg.end() )
			{
				
				size_t k = last_occupy_frg[first_frg];
				first_k = k;
				ctgfrg_ve[k].end = first_pos_ctg_end;
				ctgfrg_ve[k].last_occupy_tile = tile_order;
				if ( !ite->match )
				{
					ctgfrg_ve[k].unoverlap_rg.push_back( make_pair( first_pos_ctg_begin, first_pos_ctg_end ) );
					ctgfrg_ve[k].unoverlap_tile.push_back( tile_order );
				}
			} else
			{
				first_k = ctgfrg_ve.size();
				ctgfrg cf;
				cf.subject_ori = first_subj_ori;
				cf.ctg = first_ctg;
				cf.first_occupy_tile = tile_order;
				cf.last_occupy_tile = tile_order;
				cf.start = first_pos_ctg_begin;
				cf.end = first_pos_ctg_end;
				if ( !ite->match )
				{
					cf.unoverlap_rg.push_back( make_pair( first_pos_ctg_begin, first_pos_ctg_end ) );
					cf.unoverlap_tile.push_back( tile_order );
				}
				ctgfrg_ve.push_back( cf );
			}
			map< pair<bool, Ctg_id >, size_t > this_occupy_frg;
			this_occupy_frg.insert( make_pair( make_pair( ite->subject_ori, first_ctg ), first_k ) );

			if ( ite->match )
			{
				bool second_subj_ori = !ite->subject_ori;
				Ctg_id second_ctg;
				int second_pos_ctg_begin;
				int second_pos_ctg_end;
				if ( ite->subject_ori )
				{
					second_ctg = ass_bank.ass_query.ctg_name_map[amo.mcoords[ite->coo].qryname()];
					if ( ite->pos_ctg.first == amo.mcoords[ite->coo].refstart() )   // means same direct
					{
						second_pos_ctg_begin = amo.mcoords[ite->coo].qrystart();
						second_pos_ctg_end = amo.mcoords[ite->coo].qryend();
					} else if ( ite->pos_ctg.first == amo.mcoords[ite->coo].refend() )
					{
						second_pos_ctg_begin = amo.mcoords[ite->coo].qryend();
						second_pos_ctg_end = amo.mcoords[ite->coo].qrystart();
					}
				} else
				{
					second_ctg = ass_bank.ass_subject.ctg_name_map[amo.mcoords[ite->coo].refname()];
					if ( ite->pos_ctg.first == amo.mcoords[ite->coo].qrystart() )   // means same direct
					{
						second_pos_ctg_begin = amo.mcoords[ite->coo].refstart();
						second_pos_ctg_end = amo.mcoords[ite->coo].refend();
					} else if ( ite->pos_ctg.first == amo.mcoords[ite->coo].qryend() )
					{
						second_pos_ctg_begin = amo.mcoords[ite->coo].refend();
						second_pos_ctg_end = amo.mcoords[ite->coo].refstart();
					} 
				}

				pair< bool, Ctg_id > second_frg = make_pair( second_subj_ori, second_ctg );
				size_t second_k;
				if ( last_occupy_frg.find( second_frg ) != last_occupy_frg.end() )
				{
					size_t k = last_occupy_frg[second_frg];
					second_k = k;
					ctgfrg_ve[k].end = second_pos_ctg_end;
					ctgfrg_ve[k].last_occupy_tile = tile_order;
					
				} else
				{
					second_k = ctgfrg_ve.size();
					ctgfrg cf;
					cf.subject_ori = second_subj_ori;
					cf.ctg = second_ctg;
					cf.first_occupy_tile = tile_order;
					cf.last_occupy_tile = tile_order;
					cf.start = second_pos_ctg_begin;
					cf.end = second_pos_ctg_end;
					
					ctgfrg_ve.push_back( cf );
				}
				this_occupy_frg.insert( make_pair( make_pair( second_subj_ori, second_ctg ), second_k ) );
			}

			last_occupy_frg = this_occupy_frg;
		}

		
		set< int > spl_tile_set;
		for ( size_t j = 0; j < ctgfrg_ve.size(); ++j )
		{
			
			if ( ctgfrg_ve[j].end < ctgfrg_ve[j].start )
				ctgfrg_ve[j].direct = false;
			else
				ctgfrg_ve[j].direct = true;
			if ( ctgfrg_ve[j].direct )
			{
				ctgfrg_ve[j].total_len = ctgfrg_ve[j].end - ctgfrg_ve[j].start + 1;
				if ( ctgfrg_ve[j].unoverlap_rg.empty() )
				{
					ctgfrg_ve[j].left_fringe_overlap_len = ctgfrg_ve[j].total_len;
					ctgfrg_ve[j].right_fringe_overlap_len = ctgfrg_ve[j].total_len;
				} else
				{
					ctgfrg_ve[j].left_fringe_overlap_len = ctgfrg_ve[j].unoverlap_rg.front().first - ctgfrg_ve[j].start;
					ctgfrg_ve[j].right_fringe_overlap_len = ctgfrg_ve[j].end - ctgfrg_ve[j].unoverlap_rg.back().second;
				}
			} else
			{
				ctgfrg_ve[j].total_len = ctgfrg_ve[j].start - ctgfrg_ve[j].end + 1;
				if ( ctgfrg_ve[j].unoverlap_rg.empty() )
				{
					ctgfrg_ve[j].left_fringe_overlap_len = ctgfrg_ve[j].total_len;
					ctgfrg_ve[j].right_fringe_overlap_len = ctgfrg_ve[j].total_len;
				} else
				{
					ctgfrg_ve[j].left_fringe_overlap_len = ctgfrg_ve[j].start - ctgfrg_ve[j].unoverlap_rg.front().first;
					ctgfrg_ve[j].right_fringe_overlap_len = ctgfrg_ve[j].unoverlap_rg.back().second - ctgfrg_ve[j].end;
				}
			}

			double left_fringe_r = double(ctgfrg_ve[j].left_fringe_overlap_len)/ctgfrg_ve[j].total_len;
			double right_fringe_r = double(ctgfrg_ve[j].right_fringe_overlap_len)/ctgfrg_ve[j].total_len;
			if ( ctgfrg_ve[j].left_fringe_overlap_len > 0 && ctgfrg_ve[j].left_fringe_overlap_len < 200 && left_fringe_r < 0.5 )
			{
				int tile_order = ctgfrg_ve[j].first_occupy_tile;
				if ( tile_order > 1 )
				{
					spl_tile_set.insert( tile_order );
					outf<<">iPath"<<i<<" ctgfrg"<<j;
					outf<<" "<<ctgfrg_ve[j].subject_ori<<" "<<ctgfrg_ve[j].ctg<<" "<<ctgfrg_ve[j].start<<" "<<ctgfrg_ve[j].end<<" "<<ctgfrg_ve[j].left_fringe_overlap_len
						<<"("<<double(ctgfrg_ve[j].left_fringe_overlap_len)/ctgfrg_ve[j].total_len<<") "<<ctgfrg_ve[j].right_fringe_overlap_len
						<<"("<<double(ctgfrg_ve[j].right_fringe_overlap_len)/ctgfrg_ve[j].total_len<<") "<<endl;
				}
			}
			if ( ctgfrg_ve[j].right_fringe_overlap_len > 0 && ctgfrg_ve[j].right_fringe_overlap_len < 200 && right_fringe_r < 0.5 )
			{
				int tile_order = ctgfrg_ve[j].last_occupy_tile;
				if ( tile_order < (int)amo.iPath_ve[i].Tile_list.size() )
				{
					spl_tile_set.insert( tile_order );
					outf<<">iPath"<<i<<" ctgfrg"<<j;
					outf<<" "<<ctgfrg_ve[j].subject_ori<<" "<<ctgfrg_ve[j].ctg<<" "<<ctgfrg_ve[j].start<<" "<<ctgfrg_ve[j].end<<" "<<ctgfrg_ve[j].left_fringe_overlap_len
						<<"("<<double(ctgfrg_ve[j].left_fringe_overlap_len)/ctgfrg_ve[j].total_len<<") "<<ctgfrg_ve[j].right_fringe_overlap_len
						<<"("<<double(ctgfrg_ve[j].right_fringe_overlap_len)/ctgfrg_ve[j].total_len<<") "<<endl;
				}
			}

			
		}

		if ( spl_tile_set.empty() )
		{
			new_iPathve.push_back( amo.iPath_ve[i] );
		} else
		{
			vector< int > spl_tile_ve;
			for ( set< int >::iterator ite = spl_tile_set.begin(); ite != spl_tile_set.end(); ++ite )
				spl_tile_ve.push_back( *ite );
			vector< iPath > ipve;
			splitiPathattile( spl_tile_ve, amo.iPath_ve[i], ipve, amo, ass_bank );
			new_iPathve.insert( new_iPathve.end(), ipve.begin(), ipve.end() );
			outf<<"new "<<new_iPathve.size()<<endl;
		}
	}

	amo.iPath_ve = new_iPathve;
}


