#include <string>
#include <set>
#include <map>
#include <vector>
#include <iostream>
#include <fstream>
#include <cstdlib>
using namespace std;

string getid( string &line, bool paired );

void readinsread( string &infile, set<string >& bireads, set<string > &sgreads )
{
	ifstream inf( infile.data() );
	if ( !inf.good() )
	{
		cout<<"error can not open file "<<infile<<endl; exit(1);
	}
	map< string, int > readcount;
	while ( !inf.eof() )
	{
		string line;
		getline( inf, line );
		if ( line .empty() )
			break;
		string id = line;
                if ( (id[id.size()-1] == '1' || id[id.size()-1] == '2') && (id[id.size()-2] == '.' ) )
                    id = id.substr( 0, id.size()-2 );
		if ( readcount.find( id ) == readcount.end() )
			readcount.insert( make_pair( id, 1 ) );
		else
			readcount[id] += 1;

	}

	inf.close();

	for ( map< string, int >::iterator ite = readcount.begin(); ite != readcount.end(); ++ite )
	{
		if ( ite->second == 1 )
			sgreads.insert( ite->first );
		else
			bireads.insert( ite->first );
	}
}

void filter( string &infile, string &outfile, set<string> &reads, bool pair_end )
{

	ifstream inf( infile.data() );
	ofstream outf( outfile.data() );

	if ( !inf.good () )
	{
		cout<<"error can not open file "<<infile<<endl; exit(1);
	}
	while ( !inf.eof() )
	{
		string line;
		getline( inf, line );

		if ( line.empty() )
			break;
		string id;
		if ( pair_end )
		{
			id = getid( line, true );
		} else
                    id = getid ( line, false );

		bool fd = true;
		if ( reads.find( id ) == reads.end() )
		{
			fd = false;
		}
		if ( !fd )
			outf<<line<<endl;

		getline( inf, line );
		if ( !fd )
			outf<<line<<endl;
		getline( inf, line );
		if ( !fd )
			outf<<line<<endl;
		getline ( inf, line );
		if ( !fd )
			outf<<line<<endl;
	}
	inf.close();
	outf.close();
}

string getid( string &line, bool paired )
{
    
        if ( line[0] != '@' )
        {
            cerr << "unexpected fastq head "<<line<<endl; exit(1);
        }
    
        
        string id = line.substr(1);
        if ( paired )
        {
            bool goon = false;
            do {
                goon = false;
                if ( (int)id.size() < 2 )
                {
                    cerr <<"unexpected fastq head "<<line<<endl;  exit(1);
                }
                bool trim = false;
                if ( !( id[id.size()-1] == '1' || id[id.size()-1] == '2' ) )
                    trim = true;
                else 
                {
                    if ( !( id[id.size()-2] == '/' || id[id.size()-2] == '.' ) )
                        trim = true;

                }
                if ( trim )
                {
                    goon = true;
                    id.erase( id.size()-1, 1 );
                }
            } while ( goon );

            id = id.substr( 0, id.size()-2 );
            return id;
        } else
        {
            bool goon = false;
            do {
                goon = false;
                if ( id.empty() )
                {
                    cerr <<"unexpected fastq head "<<line<<endl; exit(1);
                }
                bool trim = false;
                if ( id[id.size()-1] == ' ' || id[id.size()-1] == '\t' )
                    trim = true;
                if ( trim )
                {
                    goon = true;
                    id.erase( id.size()-1, 1 );
                } 
            } while ( goon );
            return id;
        }
}

void filter( string &infile1, string &infile2, string &outfile1, string &outfile2, set<string> &reads )
{
    ifstream inf1( infile1.data() );
    ifstream inf2( infile2.data() );
    ofstream outf1( outfile1.data() );
    ofstream outf2( outfile2.data() );

    if ( !inf1.good() )
    {
        cerr<<"error can not open file "<<infile1<<endl;
        exit(1);
    }
    if ( !inf2.good () )
    {
        cerr <<"error can not open file "<<infile2<<endl; 
        exit(1);
    }
    while ( !inf1.eof() && !inf2.eof() )
    {
        string line1;
        getline( inf1, line1 );

        string line2;
        getline( inf2, line2 );
        if ( line1.empty() || line2.empty() )
        {
            break;
        }

        string id1 = getid( line1, true );
        string id2 = getid( line2, true );
        
        if ( id1 != id2 )
        {
            cout<<"unexpected read pair: "<<endl;
            cout<<id1<<endl;
            cout<<id2<<endl;
            cout<<line1<<endl;
            cout<<line2<<endl;
            exit(1);
        }
        bool fd = true;
        if ( reads.find( id1 ) == reads.end() )
        {
            fd = false;
        }

        if ( !fd )
        {
            outf1<<line1<<endl;
            outf2<<line2<<endl;
        }

        getline( inf1, line1 );
        getline( inf2, line2 );
        if ( !fd )
        {
            outf1<<line1<<endl;
            outf2<<line2<<endl;
        }
        getline( inf1, line1 );
        getline( inf2, line2 );
        if ( !fd )
        {
            outf1<<line1<<endl;
            outf2<<line2<<endl;
        }
        getline( inf1, line1 );
        getline( inf2, line2 );
        if ( !fd )
        {
            outf1<<line1<<endl;
            outf2<<line2<<endl;
        }
    }

    inf1.close();
    inf2.close();
    outf1.close();
    outf2.close();
}

void exit_with_help( const char s[] )
{
	cout<<s<<endl;
	cout<<"Extract reads which are not in sread file"<<endl;
	cout<<"Usage: prog -r <sreadfile> { -1 <p1> -2 <p2> | -s <sg> } -o <outputprefix>"<<endl;
	cout<<"<sreadfile> file records the reads not needed"<<endl;
	cout<<"<p1>        paired_end fastq file with mate <p2>"<<endl;
	cout<<"<p2>        paired_end fastq file with mate <p1>"<<endl;
	cout<<"<sg>        single read fastq file"<<endl;
	cout<<"<prefix>    prefix of the outputfile, for pair_end reads, output <prefix>_1.fastq and <prefix>_2.fastq;"
		<<"for single reads, output <prefix>.fastq"<<endl;
	exit(1);
}

int main( int argc, char* argv[] )
{
	bool pair_end = true;
	string p1;
	string p2;
	string sg;
	string outprefix;
	string sreadfile;

	for(int i=1; i<argc; i++)
	{
		if(argv[i][0] != '-')
			exit_with_help("Options must start with \'-\'.");

		if(argv[i][2] != '\0')
			exit_with_help("The option should be exactly one letter.");
		int option = argv[i][1];

		i++;
		if(i == argc)
			exit_with_help("The last option has no value.");

		switch(option)
		{
	

		case '1':
			p1 = argv[i]; break;
		case '2':
			p2 = argv[i]; break;
		case 's':
			sg = argv[i]; break;
		case 'o':
			outprefix = argv[i]; break;
		case 'r':
			sreadfile = argv[i]; break;
		default:
			exit_with_help( "Unexpected option");
		}
	}

	if ( sreadfile.empty() )
	{
		exit_with_help( "no sreadfile assigned!");
	}
	if ( outprefix.empty())
	{
		exit_with_help( "no outputprefix assigned!");
	}

	if ( !p1.empty() && !p2.empty() )
	{
		pair_end = true;

	} else if ( !p1.empty() && p2.empty() )
	{
		exit_with_help( "p2 not assigned!");
	} else if ( p1.empty() && !p2.empty() )
		exit_with_help( "p1 not assigned!" );
	else if ( !sg.empty() )
	{
		pair_end = false;
	} else
	{
		exit_with_help("no fastq files assigned!");
	}

	set< string > bireads;
	set< string > sgreads;

	readinsread( sreadfile, bireads, sgreads );

	if ( pair_end )
	{
		string outfile1 = outprefix + "_1.fastq";
	//	filter( p1, outfile1, bireads, true );
		string outfile2 = outprefix + "_2.fastq";
	//	filter( p2, outfile2, bireads, true );
                filter( p1, p2, outfile1, outfile2, bireads );
	} else
	{
		
		string outfile = outprefix + ".fastq";
		filter( sg, outfile, sgreads, false );
	}

	return 1;
}

