#!/bin/python

############
# error correction
# require Quake (Kelly DR et al. 2010) and jellyfish
############

import os
import sys
from string import *


############
def give_help():
    '''Error correction for metagenome data by Quake

Usage:
python err_cor.py specfile readfile1 readfile2 ...

Parameters are specified in specfile:
    quality-start value
    prefix value
    threads value
    hash_size value
    '''
    exit()

para = {    'quality-start' : '64',
        'prefix' : 'Read',
        'kmer' : '17',
        'threads' : '1',
        'hash_size' : '2G',
        }
#########################
def pause_command_from_file( sf ):
    if not os.path.isfile( sf ):
        print 'error', sf, 'not found'
        exit()
    f = file(sf)
    while True:
        line = f.readline()
        if len(line) == 0:
            break
        s = line.split()
        if len(s) == 2:
            if s[0] == 'quality-start' \
                    or s[0] == 'prefix' \
                    or s[0] == 'kmer' \
                    or s[0] == 'threads' \
                    or s[0] == 'hash_size':
                para[s[0]] = s[1]


##########################
if len(sys.argv) < 3:
    print give_help.__doc__

specfile = sys.argv[1]
print specfile
pause_command_from_file( specfile )
readfiles = []
for i in sys.argv[2:]:
    if not os.path.exists( i ):
        print 'error', i, 'not found'
        exit()
    readfiles.append( i )

rdfname = para['prefix']+'-files'
rdf = file( rdfname, 'w' )
p1 = ' '.join( readfiles )
rdf.write( p1 )
rdf.close()
###########################
# run jellyfish count
# revised 2-12-2015 for jellyfish version 2.0
jffile = para['prefix']+'.jf'
#cmmd = ' '.join(['jellyfish count -q -c 8', '--quality-start', para['quality-start'], 
#    '-o', para['prefix']+'.db', '-m', para['kmer'], '-t', para['threads'], '-s', para['hash_size'], p1 ])
cmmd = ' '.join(['jellyfish count -c 8 -o', para['prefix']+'.jf', '-m', para['kmer'], '-t', para['threads'], '-s', para['hash_size'], p1])
print cmmd
os.system(cmmd)

###########################
# generates .dbm file
# removed after revision on 2-12-2015 for jellyfish version 2.0
#countoutprefix = para['prefix']+'.db'
#dbmfile = countoutprefix+'m'
#if not os.path.isfile( countoutprefix+'_0' ):
#    print 'error', countoutprefix+'_0', 'not found'
#    exit()
#if not os.path.isfile( countoutprefix+'_1' ):
#    print 'only one .db output file'
#    os.rename( countoutprefix+'_0', dbmfile )
#else:
#    id = 0
#    dbfile = []
#    while True:
#        ss = str(id)
#        if not os.path.isfile( countoutprefix+'_'+ss ):
#            break
#        dbfile.append( countcoutprefix+'_'+ss )
#        id = id+1

#    dbf = ' '.join( dbfile )
#    cmmd = ' '.join([ 'jellyfish qmerge -s', para['hash_size'], '-m', para['kmer'], '-o', dbmfile, dbf ])
#    print cmmd
#    os.system( cmmd )

############################
# jellyfish qdump
# revised on 2-12-2015 for jellyfish version 2.0
qctsfile = rdfname + '.qcts'
#cmmd = ' '.join([ 'jellyfish qdump -c', dbmfile, '>', qctsfile ])
cmmd = ' '.join(['jellyfish dump -c -o', qctsfile, jffile])
print cmmd
os.system( cmmd )
if not os.path.isfile( qctsfile ):
    print 'error failed generating file', qctsfile


############################
# correct
cmmd = ' '.join([ 'correct -c 1 -u --log -f', rdfname, '-m', qctsfile, '-k', 
        para['kmer'], '-q', para['quality-start'], '-p', para['threads'] ])
print cmmd


os.system( cmmd )

# clean
cmmd = ' '.join(['rm -f', qctsfile, jffile ])
print cmmd
os.system(cmmd)

exit()







    
