/*
Promoter scanning module 
Author: Longshu Yang
Latest update: 2013/11/20
Version: 1.0
*/

#include "one.h"
using namespace std;

void ONEEM::back_ground(Ve_Str seqs){
	B = pb = assign(0);
	double sum = 0;
	for(int i = 0; i < seqs.size(); i ++){
		for(int j = 0; j < seqs[i].size(); j ++){
			B[seqs[i][j]] ++;
			sum ++;
		}
	}
	for(int v = 0; v < Base.size(); v ++){
		pb[Base[v]] = B[Base[v]] / sum;
		O = B;
	}
	//B = assign(0);
}


ONEEM::ONEEM(sig signal, Str Kmer, Ve_Str data){
	reg = signal.reg;
	w1 = signal.wm1;
	dist = signal.dis;
	lower = signal.min;
	start = reg - w1;
	ps.resize(start + 1,1.0 / (start + 1));
	max = 0;
	back_ground(data);
	for(int i = 0; i < w1; i ++)
		wm1.push_back(assign(0));
	rwm1 = assign(Kmer);
	rwm1 = rate(rwm1,pb);
	C.resize(start,0);
	EM(1,data);
	//maxloop = signal.time;
}


//Likelihood score
Str ONEEM::EM(int t, Ve_Str data){
	loglike = last = 0;
	int loop;
	for(loop = 0; loop < t; loop ++){
		Ve_D lam(start,0);
		for(int k = 0; k < data.size(); k ++){
			double sumk = 0;
			sumk += ps[start];
			for(int j = 0; j < start; j ++){
				double v = 1;
				for(int i = 0; i < w1; i ++)
					v *= rwm1[i][data[k][i + j]];
				lam[j] = v * ps[j];
				//lam[n][j] = v * ps[0] * pd[n] /start;
				sumk += lam[j];
			}
			p0 += ps[start] / sumk;
			//p0 += ps[1] / sumk;
			loglike += log(sumk);
			for(int j = 0; j < start; j ++){
				lam[j] /= sumk;
				C[j] += lam[j];
				//sumc += lam[k][n][j];
				for(int i = 0; i < w1; i ++)
					wm1[i][data[k][i + j]] += lam[j];
			}
		}
		//cout<<loop<<" "<<loglike<<"\t";
		if(abs(loglike - last) < abs(1.0e-6 * loglike))
			break;
		else{
			last = loglike;
			loglike = 0;
			if(t != 1)
				calcu();
		}
	}
	//cout<<endl;
	Str Temp;
	convertFromNumber(Temp,loop);
	return "EM iteration with " + Temp + " times has been completed!";
}


//Parameters calculation
Str ONEEM::calcu(){
	double sumb = 0;
	Ve_D sumw1(w1,0);
	double sumc = 0;
	for(int j = 0; j < start; j ++)
		sumc += C[j];
	//sum up!!
	for(int v = 0; v < Base.size(); v ++){
		Map_C_D sumw = assign(0);
		for(int i = 0; i < w1; i ++){
			sumw1[i] += wm1[i][Base[v]];
			sumw[Base[v]] += wm1[i][Base[v]];
		}
		B[Base[v]] -= sumw[Base[v]];
		//B[Base[v]] += sumw[Base[v]];
		sumb += B[Base[v]];
	}
	//Expectation and Clearance
	for(int v = 0; v < Base.size(); v ++){
		pb[Base[v]] = B[Base[v]] / sumb;
		//cout<<pb[Base[v]]<<" "<<B[Base[v]]<<endl;
		B[Base[v]] = O[Base[v]];
		//B[Base[v]] = 0;
		for(int i = 0; i < w1; i ++){
			wm1[i][Base[v]] /= sumw1[i];
			rwm1[i][Base[v]] = wm1[i][Base[v]] / pb[Base[v]];
			//cout<<wm1[i][Base[v]]<<"\t";
			wm1[i][Base[v]] = 0;
		}
		//cout<<endl;
	}
	ps.clear();
	ps.resize(start + 1,0);
	//ps.resize(2,0);
	for(int j = 0; j < start; j ++){
		ps[j] += C[j] / (sumc + p0);
		C[j] = 0;
	}
	ps[start] = p0 / (sumc + p0);
	//ps[1] = p0 / (sumc + p0);
	p0 = 0;
	sumc = 0;
	return "Parameter calculation has been completed!";
}


ONEEM TSS_pre(sig signal,Ve_Str data, Str NC, Str Out){
	Ofstream out(Out.data());
	Ve_Str Kmer1 = kmer(signal.wm1,data);
	double last = 0;
	Str Kbest;
	for(int i = 0; i < Kmer1.size(); i ++){
		ONEEM temp(signal,Kmer1[i],data);
		temp.EM(1,data);
		if(!i){
			cout<<Kmer1[i]<<"\t"<<temp.last<<endl;
			last = temp.last;
			Kbest = Kmer1[i];
		}
		if(temp.last > last){
			cout<<Kmer1[i]<<"\t"<<temp.last<<endl;
			last = temp.last;
			Kbest = Kmer1[i];
		}
	}
	ONEEM best(signal,Kbest,data);
	cout<<best.EM(maxloop,data)<<endl;
	best.max = max_score(best.rwm1, best.pb);
	out<<"Promoter features:"<<endl;
	out<<"max score: "<<best.max<<endl;
	out<<"TSS background probability:"<<endl;
	for(int v = 0; v < Base.size(); v ++)
		out<<best.pb[Base[v]]<<"\t";
	out<<endl<<"TSS start site probability:"<<endl;
	for(int k = 0; k < best.ps.size() - 1; k ++)
		out<<best.ps[k]<<"\t";
	out<<endl<<"no signal probability:"<<endl<<best.ps[best.ps.size() - 1]<<endl;
	out<<">weight matrix score:"<<endl;
	for(int k = 0; k < best.wm1.size(); k ++){
		for(int v = 0; v < Base.size(); v ++){
			if(!v)
				out<<log(best.rwm1[k][Base[v]]) / log(2.0);
				//* best.rwm1[k][Base[v]] * best.pb[Base[v]];
			else
				out<<"\t"<<log(best.rwm1[k][Base[v]]) / log(2.0);
				//* best.rwm1[k][Base[v]] * best.pb[Base[v]];
		}
		out<<endl;
	}
	out<<">weight matrix probability:"<<endl;
	for(int k = 0; k < best.wm1.size(); k ++){
		for(int v = 0; v < Base.size(); v ++){
			//out<<best.rwm1[k][Base[v]] * best.pb[Base[v]]<<"\t";
			if(!v)
				out<<best.rwm1[k][Base[v]] * best.pb[Base[v]];
			else
				out<<"\t"<<best.rwm1[k][Base[v]] * best.pb[Base[v]];
		}
		out<<endl;
	}
	out.close();
	cout<<"TSSs of " + NC + " have been predicted!"<<endl;
	return best;
}

double max_score(Ve_Map_C_D sm1, Map_C_D pb){
	double result = 0;
	for(int i = 0; i < sm1.size(); i ++){
		double score = 0;
		for(int j = 0 ; j < Base.size(); j ++)
			result += log(sm1[i][Base[j]]) / log(2.0) * sm1[i][Base[j]] * pb[Base[j]];	
	}
	//result /= sm1.size();
	return result;
}

