#include <iostream>
#include <fstream>
#include <cstring>
#include <cstdlib>
#include "typedefbase.h"
#include "sequencetransform.h"
#include "kmer.h"
#include "getseqs.h"
#include "contig.h"
#include "calignment.h"
#include "output.h"
#include "short_assembly.h"
#include "Trans_to_graph.h"
#include "link.h" 
#include "matrixsol.h"
#include "omp.h"
#include "config.h"

using namespace std;

//Const Vars
const char VERSION_STRING [] = "Metagenome Assembly Program (MAP) 1.0\nhttp://mech.ctb.pku.edu.cn/MAP/";

void MAP(Config & c);
void parse_command_line(int argc, char* argv[], Config & c);
void parse_config_file(Config & c);

void split(const string & s, char c, vector < string > & v);

void exit_with_help(const char error[]);
void exit_with_help();


int main(int argc, char* argv[])
{
	int start = (int)time(NULL);
	Config c;
	parse_command_line(argc, argv, c);
	if(c.config_file.length())
		parse_config_file(c);

	

	MAP(c);

	cout << time(NULL)-start << " seconds costs" << endl;
	return 1;
}

void parse_command_line(int argc, char* argv[], Config & c)
{
	if(argc <= 1)
	{
		cerr <<VERSION_STRING <<endl;
		exit_with_help();
	}

	string s;
	for(int i=1; i<argc; i++)
	{
		if(argv[i][0] != '-')
			exit_with_help("Options must start with \'-\'.");

		if(argv[i][2] != '\0')
			exit_with_help("The option should be exactly one letter.");
		int option = argv[i][1];

		i++;
		if(i == argc)
			exit_with_help("The last option has no value.");

		switch(option)
		{
		
		case 's':
			s = argv[i];
			split(s, ',', c.inseqfileve);
			for(unsigned int j=0; j<c.inseqfileve.size(); j++)
			{
				
				if(access(c.inseqfileve[j].c_str(), R_OK) != -1)
					continue;
				
				exit_with_help("The sequence file(s) specified in -s are not accessible.");
			}
			break;
		case 'q':
		
			s = argv[i];
			split(s, ',', c.qualfileve);
			for(unsigned int j=0; j<c.qualfileve.size(); j++)
			{
				if(access(c.qualfileve[j].c_str(), R_OK) == -1)
					exit_with_help("The quality file(s) specified in -s are not accessible.");
			}
		
			break;
		case 'm':
			s = argv[i];
			split(s, ',', c.matefileve);
			for(unsigned int j=0; j<c.matefileve.size(); j++)
			{
				if(access(c.matefileve[j].c_str(), R_OK) == -1)
					exit_with_help("The matepair file(s) specified in -m are not accessible.");
			}
			break;
		case 'c':
			if(access(argv[i], R_OK) == -1)
				exit_with_help("The config file specified in -c is not accessible.");
			if(c.config_file.length())
				exit_with_help("The config file cannot be specified twice.");
			c.config_file = argv[i];
			break;
		case 'o':
			c.output_prefix = argv[i];
			break;
		case 'k':
			c.kth = atoi(argv[i]);
			break;
		case 'l':
			c.len = atoi(argv[i]);
			break;
		case 'd':
			c.default_quality = atoi(argv[i]);
			break;
		case 'e':
			c.er_rate = atof(argv[i]);
			break;
		case 't':
			c.max_thread_num = atoi(argv[i]);
			break;
		case 'n':
			c.kmernum = atoi(argv[i]);
			break;
		default:
			exit_with_help("Undefined option.");
		}
	}
	if(c.config_file.length())
		return;

	if(c.inseqfileve.empty())
		exit_with_help("No sequence file(s) specified.");


	
}

void parse_config_file(Config & c)
{
	//Get the first line of the config file
	ifstream in(c.config_file.c_str());
	string s;
	getline(in, s);
	in.close();

	//Fix the value of argc
	int argc = 2;
	for(unsigned int i=0; i<s.length(); i++)
		if(s[i] == ' ')
			argc ++;

	//Split the line s into argv[][]
	char** argv = new char* [argc];
	for(int i=1; i<argc; i++)
	{
		argv[i] = new char[s.substr(0, s.find(' ')).length()];
		strcpy(argv[i], s.substr(0, s.find(' ')).c_str());
		s = s.substr(s.find(' ')+1);
	}
	argv[argc-1] = new char[s.length()];
	strcpy(argv[argc-1], s.c_str());

	//Redirect to command line
	parse_command_line(argc, argv, c);

	//Recycle some rubbish
	for(int i=0; i<argc; i++)
		delete argv[i];
	delete argv;
}

void MAP( Config &c )
{

	GetRead Getob;
	
	cout<<"MAP config:"<<endl;
	cout<<"error rate "<<c.er_rate<<endl;
	cout<<"max thread num "<<c.max_thread_num<<endl;

	for ( size_t i = 0; i < c.inseqfileve.size(); ++i ) {
		if ( c.inseqfileve[i].substr( c.inseqfileve[i].size() - 4 ) == ".frg" ) {
			Getob.getReadfromFRG( c.inseqfileve[i] );
		} else {
			Getob.getRead( c.inseqfileve[i] );
			set< string > qualfileset;
			for ( size_t k = 0; k < c.qualfileve.size(); ++k )
				qualfileset.insert( c.qualfileve[k] );
			string qualf = c.inseqfileve[i] + ".qual";
			if ( qualfileset.find( qualf ) != qualfileset.end() ) {
				Getob.getqualfromfile( qualf );
				
			}  else {
				Getob.getqualfromcons(c.default_quality);
			}
			set< string > matefileset;
			for ( size_t k = 0; k < c.matefileve.size(); ++k )
				matefileset.insert( c.matefileve[k] );
			string matef = c.inseqfileve[i] + ".mate";
			if ( matefileset.find( matef ) != matefileset.end() ) {
				Getob.getmatepair(matef);
			}
		}
	}

	if ( c.output_prefix.empty() ) {
		c.output_prefix = "assembly";
	}
	
	int tig_len = ( (int)Getob.IdSeqVe[0].second.size() + (int)Getob.IdSeqVe[Getob.IdSeqVe.size()-1].second.size() ) / 2;


	Graph_parameter G_para;
	
	G_para.min_stem_lenthr_opt = 5000;
	
	G_para.max_path_len_opt = 10000;

	G_para.min_cover_rate_opt = 0.5;

	double er_thr_100 = 0.105;
	double er_thr_50 = 0.165;
	double snprate = 0.00025;
	c.er_rate = c.er_rate - 0.00025;


	  
	int start = (int)time(NULL);
	M_sol sol_ob;
	sol_ob.getsol();
	
	short_assembly_bank assbank;
	short_last_assembly( assbank, Getob.IdSeqVe, Getob.Qualve, c.len, c.er_rate, er_thr_100, er_thr_50, snprate, sol_ob, 1, c.kth, c.kth, c.kmernum, 1, c.max_thread_num, Getob.PairMa, tig_len, Getob.mate_len, Getob.mate_len_var, Getob.mate_type, G_para );

	string outstatfile = c.output_prefix + ".stat";

	string outinfofile = c.output_prefix + ".contiginfor";
	
	string outcontigfile = c.output_prefix + ".contigs";
	
	string outsinglefile = c.output_prefix + ".singlets";
	
	cout<<"output.."<<endl;
	
	Output::outcontig(outcontigfile, outinfofile, outsinglefile, outstatfile, assbank.ConVe, Getob.IdSeqVe);
	
	cout<<"assembly finished!"<<endl;
//	hi_ass( Get_ob, len, er_thr, symalign, kth, kmernum );	
//	hi_ass2( Get_ob, tig_len, kmernum, len, er_thr, snprate, sol_ob, symalign, kth, kmernum, G_para, max_thread_num );	
//	cout << time(NULL)-start << " seconds costs" << endl;

	
}

void split(const string & s, char c, vector < string > & v)
{
	string::size_type i = 0;
	string::size_type j = s.find(c);

	while(j != string::npos)
	{
		v.push_back(s.substr(i, j-i));
		i = ++j;
		j = s.find(c, j);
	}
	v.push_back(s.substr(i, s.length()));

}

void exit_with_help(const char error[])
{
	cerr <<"Error:	" <<error <<endl;
	exit_with_help();
}

void exit_with_help()
{
	cerr <<"Usage:	assemble [OPTION1] [VALUE1] [[OPTION2] [VALUE2] ...]" <<endl;
	cerr <<"Options:" <<endl;
	cerr <<"-s		Sequence file(s) in FASTA format or frg format, seperated by comma" <<endl;
	cerr <<"-q		Quality file(s), seperated by comma" <<endl;
	cerr <<"-m		Matepair file(s), seperated by comma" <<endl;
	cerr <<"-o		Output prefix" <<endl;
	cerr <<"-k		Kmer Length" <<endl;
	cerr <<"-l		Minimal overlap length" <<endl;
	cerr <<"-d		Default quality score" <<endl;
        cerr <<"-n              The number of kmer archives to write into the temporary files" <<endl;
	cerr <<"-e      Maximal overlap error rate allowed"<<endl;
	cerr <<"-t      Maximal thread number"<<endl;
	exit(1);
}

