#!/usr/bin/env python

#################################
# InteMAP: Integrated Metagenomic Assembly Pipeline
# Binbin Lai
# 2015/2/13
#################################

import sys
import os
from string import *
import argparse


#if len(sys.argv) < 3:
#    print 'Usage: runInteMAP.py read-file libinfo-file [para-spec]'
#    exit()


scriptdir = os.path.dirname(os.path.abspath(sys.argv[0]))
workdir = os.getcwd()
paradir = scriptdir+os.path.sep+'Para'



def readargumentsfromfile( parafiles, seqfile ):
	parafiles.update({ 'quakeparafile' : paradir+os.path.sep+'QuakeParaFile', 
			'idbahighparafile' : paradir+os.path.sep+'idbaparafile',
			'idbalowparafile' : paradir+os.path.sep+'idbaparafile',
			'bowtie2parafile' : paradir+os.path.sep+'bowtie2parafile',
			'Filter-idba-low' : '50',
			'Filter-idba-high' : '30',
			'cabogspecfile' : paradir+os.path.sep+'cabogspecfile',
			'abyssparafile' : paradir+os.path.sep+'abyssparafile',
			'Filter-abyss' : '20',
			'minHighCovLength' : '1000000',
			'clearance' : '0',
			'output' : 'out.fa'
			})
	
	##################
	# read in specfile
	print sys.argv[3]
	if len(sys.argv) == 4:
		try:
			f = file ( sys.argv[3] )
		except:
			print 'error when read in specfile'
			exit()
		while True:
			line = f.readline()
			if len( line ) == 0:
				break
			s = line.split()
			if len(s) == 2:
				if s[0] == 'quakeparafile' \
						or s[0] == 'idbahighparafile' \
						or s[0] == 'idbalowparafile' \
						or s[0] == 'bowtie2parafile' \
						or s[0] == 'cabogspecfile' \
						or s[0] == 'abyssparafile':
					parafiles[s[0]] = workdir+os.path.sep + s[1]
				elif s[0] == 'Filter-idba-low' \
						or s[0] == 'Filter-idba-high' \
						or s[0] == 'Filter-abyss'\
						or s[0] == 'minHighCovLength'\
						or s[0] == 'clearance' \
						or s[0] == 'output':
					parafiles[s[0]] = s[1]
		f.close()

	Opt_Filter_idba_low = ' '.join(['-t 1 -u', parafiles['Filter-idba-low'], '-r 1 -L', parafiles['Filter-idba-low'], '-C 0 -d 0'])
	Opt_Filter_idba_high = ' '.join(['-t 1 -l', parafiles['Filter-idba-high'], '-C 0 -d 0 -r 0'])
	Opt_Filter_abyss = ' '.join(['-t 1 -l', parafiles['Filter-abyss'], '-C 0 -d 0 -r 0'])

	open('op-Filter-idba-low', 'w').write(Opt_Filter_idba_low)
	open('op-Filter-idba-high', 'w').write(Opt_Filter_idba_high)
	open('op-Filter-abyss', 'w').write(Opt_Filter_abyss)
	
	if not os.path.isdir('Para'):
		os.mkdir('Para')
    
	os.system('mv op-Filter-idba-low Para')
	os.system('mv op-Filter-idba-high Para')
	os.system('mv op-Filter-abyss Para')
	
	parafiles['f-Filter-idba-low'] = workdir+os.path.sep + 'Para' + os.path.sep + 'op-Filter-idba-low'
	parafiles['f-Filter-idba-high'] = workdir+os.path.sep + 'Para' + os.path.sep + 'op-Filter-idba-high'
	parafiles['f-Filter-abyss'] = workdir+os.path.sep + 'Para' + os.path.sep + 'op-Filter-abyss'

	seqfile.append(sys.argv[1])
	seqfile.append(sys.argv[2])
	#libraryinfofile = os.path.abspath( libraryinfofile )

	
	

def readargumentbycommondline(parafiles, seqfile ):
	parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter,
		epilog='''\
Alternatively you can assign paramenters via a file.
Usage: runInteMAP runInteMAP.py read-file libinfo-file [para-spec-file]
See MANUAL for details.''')
	parser.add_argument('OriReadFile', metavar='sequence_position_file', 
		help='keeps the directories of sequencing reads in the fastq formats')
	parser.add_argument('libraryinfofile', metavar='libinfo_file', 
		help='keeps the information of the library for sequencing reads')
	parser.add_argument('-o', '--outfile', default='out.fa',
		help='Output contig file')
	parser.add_argument('-t', '--thread_num', type=int, default=2,
		help='number of threads, default=2')
	parser.add_argument('-q', '--quality_start', type=int, choices=[33, 64], default=33,
		help='Quality value ascii start, default=33' )
	parser.add_argument('-l', '--minHighCovLength', type=int, default=1000000,
		help='the high coverage length threashold, above which InteMAP will deem that high-coverage species exist in the community, and run IDBA-UD and ABySS on the ''corrected read set'', default=1000000')
	parser.add_argument('--k_for_abyss', type=int, default=61, 
		help='kmer size for ABySS, default=61')
	parser.add_argument('--min_k_for_idba', type=int, default=23,
		help='minimal kmer size for IDBA_UD, default=23')
	parser.add_argument('--high_cov_idba', type=int, default=30,
		help='lower bound for IDBA-contigs of high coverage, default=30')
	parser.add_argument('--low_cov_idba', type=int, default=50,
		help='higher bound for IDBA-contigs of low coverage, default=50')
	parser.add_argument('--high_cov_abyss', type=int, default=20,
		help='lower bound for ABySS-contigs of high coverage, default=20')
	parser.add_argument('-c', '--clearance', action='store_true',
		help='make clearance of intermediate output files, default=False')
	
	if not os.path.isdir('Para'):
		os.mkdir('Para')
	
	args = parser.parse_args()
	Opt_Filter_idba_low = "-t 1 -u %d -r 1 -L %d -C 0 -d 0" % (args.low_cov_idba, args.low_cov_idba)
	Opt_Filter_idba_high = "-t 1 -l %d -C 0 -d 0 -r 0" % args.high_cov_idba
	Opt_Filter_abyss = "-t 1 -l %d -C 0 -d 0 -r 0" % args.high_cov_abyss
	Opt_run_abyss = "k=%d n=5 np=%d" % (args.k_for_abyss, args.thread_num)
	Opt_run_idba = "--mink %d --pre_correction" % args.min_k_for_idba
	Opt_run_quake = "quality-start %d\nhash_size 2G\nkmer 17\nthreads %d" % (args.quality_start, args.thread_num)
	Opt_run_bowtie = "--phred%d" % args.quality_start
	
	open('op-Filter-idba-low', 'w').write(Opt_Filter_idba_low)
	open('op-Filter-idba-high', 'w').write(Opt_Filter_idba_high)
	open('op-Filter-abyss', 'w').write(Opt_Filter_abyss)
	open('abyssparafile', 'w').write(Opt_run_abyss)
	open('idbaparafile', 'w').write(Opt_run_idba)
	open('QuakeParaFile', 'w').write(Opt_run_quake)
	open('bowtie2parafile', 'w').write(Opt_run_bowtie)
	
	os.system('mv op-Filter-idba-low Para')
	os.system('mv op-Filter-idba-high Para')
	os.system('mv op-Filter-abyss Para')
	os.system('mv abyssparafile Para')
	os.system('mv idbaparafile Para')
	os.system('mv QuakeParaFile Para')
	os.system('mv bowtie2parafile Para')

	parafiles['f-Filter-idba-low'] = workdir+os.path.sep + 'Para' + os.path.sep + 'op-Filter-idba-low'
	parafiles['f-Filter-idba-high'] = workdir+os.path.sep + 'Para' + os.path.sep + 'op-Filter-idba-high'
	parafiles['f-Filter-abyss'] = workdir+os.path.sep + 'Para' + os.path.sep + 'op-Filter-abyss'
	parafiles['abyssparafile'] = workdir+os.path.sep + 'Para' + os.path.sep + 'abyssparafile'
	parafiles['idbalowparafile'] = workdir+os.path.sep + 'Para' + os.path.sep + 'idbaparafile'
	parafiles['idbahighparafile'] = workdir+os.path.sep + 'Para' + os.path.sep + 'idbaparafile'
	parafiles['quakeparafile'] = workdir+os.path.sep + 'Para' + os.path.sep + 'QuakeParaFile'
	parafiles['bowtie2parafile'] = workdir+os.path.sep + 'Para' + os.path.sep + 'bowtie2parafile'
	
	outf = open('cabogspecfile', 'w')
	outf.write("utgErrorRate=0.12\novlErrorRate=0.14\ncnsErrorRate=0.14\ncgwErrorRate=0.14\n")
	outf.write("doOverlapBasedTrimming=0\nmerSize=14\noverlapper=ovl\novlMinLen=23\n")
	t1 = 6
	if args.thread_num < t1:
		t1 = args.thread_num
	t2 = args.thread_num
	
	outf.write("ovlThreads=%d\nmerOverlapperThreads=%d\nmerOverlapperExtendConcurrency =%d\n" % (t1, t1, t1))
	outf.write("merOverlapperSeedConcurrency=%d\novlConcurrency=%d\novlCorrConcurrency=%d\n" % (t1, t2, t2))
	outf.write("frgCorrThreads = %d\nfrgCorrConcurrency = %d\ncnsConcurrency = %d\n" % (t2, t2, t2))
	outf.write("frgCorrBatchSize=199995\novlCorrBatchSize=199995\n")
	outf.write("doExtendClearRanges=1\nunitigger = bogart\ndoToggle=1\n")
	outf.close()
	os.system('mv cabogspecfile Para')
	
	parafiles['cabogspecfile'] = workdir+os.path.sep + 'Para' + os.path.sep + 'cabogspecfile'
	
	parafiles['minHighCovLength'] = "%d" % args.minHighCovLength
	if args.clearance:
		parafiles['clearance'] = "1"
	else:
		parafiles['clearance'] = "0"
	
	parafiles['output'] = args.outfile
	
	seqfile.append(args.OriReadFile)
	seqfile.append(args.libraryinfofile)

parafiles = {}
pairreads_ve = []
libinfo_ve = []
seqfile = []


if len(sys.argv) == 4:
	print "@InteMAP: read arguments from files"	
	readargumentsfromfile( parafiles, seqfile )
else:
	readargumentbycommondline( parafiles, seqfile )
	

print "scrptdir:", scriptdir

print 'workdir:', workdir


OriReadFile = seqfile[0]
libraryinfofile = seqfile[1]

try:
	f_r = file( OriReadFile )
except:
	print 'error when read in Read-file', OriReadFile
	exit()
try:
	f_l = file(libraryinfofile)
except:
	print 'error when read in libinfo-file', libraryinfofile
	exit()


while True:
	line = f_r.readline()
	if len( line ) == 0:
		break
	s = split(line)
	if len(s) == 2:
		pairreads = []
		for i in s:
			if not os.path.exists(i):
				print i, 'not found'
				exit()
			pairreads.append(i)
		pairreads_ve.append( pairreads )
		del pairreads
	elif len(s) == 1:
		print 'Error read file line: '+s
		print 'InteMAP only support paired-end fastq files. Please write two paired fastq files in one line, seperated by a space'
		exit()
f_r.close()

while True: 
	line = f_l.readline()
	if len( line ) == 0:
		break
	if find( line, '-libraryname' ) == -1:
		print "Error line in "+libraryinfofile+":"
		print 'NO -libraryname specified!'
		exit()
	if find( line, '-insertsize' ) == -1:
		print 'Error line in '+libraryinfofile+':'
		print 'NO -insertsize specified!'
		exit()
	if find( line, '-type' ) == -1:
		print 'Error line in '+libraryinfofile+':'
		print 'NO -type specified!'
		exit()
	libinfo_ve.append(line)
f_l.close()

if len(pairreads_ve) != len(libinfo_ve):
    print 'Error! The number of read file lines and the number of libraries are not the same: str(len(pairread_ve)) : str(len(libinfo_ve))'
Tag = '@IntegMAP:'
print Tag, 'OriReadFile:', OriReadFile
for i in pairreads_ve:
	print i
print Tag, 'LibraryInfoFile', libraryinfofile
for i in libinfo_ve:
	print i
print Tag, 'Parameters:'
for k, v in parafiles.items():
    print Tag, k, v


########################
# run errcor
########################
totreadfile = 'tot_read_file'
correadfile = 'cor_read_file'
if not os.path.exists( totreadfile ) and not os.path.exists( correadfile ):
    Errcor = scriptdir + os.path.sep + 'runerrcor.py'
    if not os.path.isfile( Errcor ):
        print "error no runerrcor.py found"
        exit()
    cmmd = ' '.join(['python', Errcor, OriReadFile, parafiles['quakeparafile']])
    print Tag, cmmd
    try:
        os.system( cmmd )
    except:
        print Tag, 'failed in errcor'
        exit()

########################
# run idba on tot read set
########################
runidba = scriptdir+os.path.sep+'runidba.py'
if not os.path.isfile( runidba ):
    print Tag, "error no runidba.py found"
    exit()
idbalowdir = 'IDBA_UD_low.d'
if not os.path.exists( idbalowdir+os.path.sep+'idba.ctg.fa'):
    cmmd = ' '.join(['python', runidba, parafiles['idbalowparafile'], totreadfile, idbalowdir])
    print Tag, cmmd
    try:
        os.system(cmmd)
    except:
        print Tag, 'failed in runidba low'
        exit()

os.chdir(workdir)
##########################
# map and filter idba
if not os.path.exists( workdir+os.path.sep+idbalowdir+os.path.sep+'idba.flt.fa'):
    os.chdir( workdir+os.path.sep+idbalowdir )
    mapandfilter = scriptdir+os.path.sep+'gomapandfilter.sh'
    cmmd = ' '.join(['bash', mapandfilter, 'idba', workdir+os.path.sep+'tot_read_file', parafiles['bowtie2parafile'], '1', parafiles['f-Filter-idba-low']])
    print Tag, cmmd
    try:
        os.system(cmmd)
    except:
        print Tag, 'failed in map and filter idba low'
        exit()
    os.chdir( workdir )

#########################
# run cabog
########################
runcabog = scriptdir+os.path.sep+'runcabog.py'
cmmd = ' '.join(['python', runcabog, parafiles['cabogspecfile'], 'tot_read_file', libraryinfofile])
cabogdir = 'CABOG.d'
if not os.path.exists(cabogdir+os.path.sep+'cabog.ctg.fa'):
    print Tag, cmmd
    try:
        os.system(cmmd)
    except:
        print 'failed in run cabog'
        exit()
os.chdir(workdir)
########################
# map cabog contig
########################
if not os.path.exists( cabogdir+os.path.sep+'cabog_0.sam'):
    os.chdir( workdir+os.path.sep+cabogdir)
    mapandfilter = scriptdir+os.path.sep+'gomapandfilter.sh'
    cmmd = ' '.join(['bash', mapandfilter, 'cabog', workdir+os.path.sep+'tot_read_file', parafiles['bowtie2parafile'], '0'])
    print Tag, cmmd
    try:
        os.system(cmmd)
    except:
        print 'failed in map cabog'
        exit()
    os.chdir(workdir)

HighCov = 1
if os.path.exists( idbalowdir+os.path.sep+'FilterCtgLength'):
    lf = file( idbalowdir+os.path.sep+'FilterCtgLength')
    line = lf.readline()
    if line[-1] == '\n':
        line = line[:-1]
    highcovlength = atoi(line)
    minl = int(parafiles['minHighCovLength'])
    if highcovlength < minl:
        HighCov = 0
if HighCov == 1:
    print Tag, 'Assemble High Cov Seq'
else:
    print Tag, 'NOT Assemble High Cov Seq'
########################
# run abyss on cor_read_set
########################
if HighCov == 1:
    runabyss = scriptdir+os.path.sep+'runabyss.py'
    cmmd = ' '.join(['python', runabyss, parafiles['abyssparafile'], 'cor_read_file'])
    abyssdir = 'ABySS.d'
    if not os.path.exists( abyssdir+os.path.sep+'abyss.ctg.fa'):
        print Tag, cmmd
        try:
            os.system(cmmd)
        except:
            print Tag, 'failed in run abyss'
            exit()
os.chdir(workdir)
########################
# map and filter abyss contigs
########################
if HighCov == 1:
    abyssdir = 'ABySS.d'
    if not os.path.exists( abyssdir+os.path.sep+'abyss.flt.fa'):
        os.chdir(abyssdir)
        mapandfilter = scriptdir+os.path.sep+'gomapandfilter.sh'
        cmmd = ' '.join(['bash', mapandfilter, 'abyss', workdir+os.path.sep+'cor_read_file', parafiles['bowtie2parafile'], '1', parafiles['f-Filter-abyss'] ])
        print Tag, cmmd 
        try:
            os.system(cmmd)
        except:
            print Tag, 'failed in map and filter abyss'
            exit()
        os.chdir(workdir)

############################
# run idba on cor_read_set
###########################
if HighCov == 1:
    idbahighdir = 'IDBA_UD_high.d'
    cmmd = ' '.join(['python', runidba, parafiles['idbahighparafile'], correadfile, idbahighdir])
    if not os.path.exists( idbahighdir+os.path.sep+'idba.ctg.fa'):
        print Tag, cmmd
        try:
            os.system(cmmd)
        except:
            print Tag, 'failed in run idba high'
            exit()
        os.chdir(workdir)

############################
# map and filter idba contigs
############################
if HighCov == 1:
    if not os.path.exists( idbahighdir+os.path.sep+'idba.flt.fa'):
        os.chdir(workdir+os.path.sep+idbahighdir)
        mapandfilter = scriptdir+os.path.sep+'gomapandfilter.sh'
        cmmd = ' '.join(['bash', mapandfilter, 'idba', workdir+os.path.sep+'cor_read_file', parafiles['bowtie2parafile'], '1', parafiles['f-Filter-idba-high']])
        print Tag, cmmd
        try:
            os.system(cmmd)
        except:
            print Tag, 'failed in map and filter idba high'
        os.chdir(workdir)

##########################
# merge the assemblies
#########################
mergeprocess = scriptdir+os.path.sep+'mergeprocess.sh'
cmmd = ' '.join(['bash', mergeprocess, parafiles['bowtie2parafile'], str(HighCov)])
if not os.path.exists('out.fa'):
    print Tag, cmmd
    try:
        os.system(cmmd)
    except:
        print Tag, 'failed in merge assembly'
        exit()

if int(parafiles['clearance']) == 1:
    print "Clearance"
    cmmd = ' '.join(['rm -f -r', 'Para', 'ABySS.d', 'CABOG.d', 'IDBA_UD_high.d', 'IDBA_UD_low.d', 'merge-HighCov-LowCov', 'merge-IDBA-CABOG', 'merge-IDBAcor-ABySS'])
    print cmmd
    os.system(cmmd)

if parafiles['output'] != 'out.fa':
	cmmd = ' '.join(['mv', 'out.fa', parafiles['output'] ])
	print cmmd
	os.system(cmmd)
	
if os.path.exists(parafiles['output']):
    print 'Done!'


exit()


