#!/usr/local/bin/python

import os, sys, datetime, argparse, pysam;
parser = argparse.ArgumentParser();
parser.add_argument('-a', '--unmapped');
parser.add_argument('-r', '--reference');
parser.add_argument('-i', '--index');
parser.add_argument('-v', '--erranchor');
parser.add_argument('-p', '--parallel');
parser.add_argument('-s', '--anchor');
parser.add_argument('-k', '--kmer');
parser.add_argument('-m', '--matchnum');
parser.add_argument('-e', '--errkmer');
parser.add_argument('-g', '--mergenum');
parser.add_argument('-c', '--cutsize');
args = parser.parse_args(sys.argv[1:]);
############################################
##################################################
now_time = datetime.datetime.now()
print now_time;
print "Starting with the bam file!";
#####################################################
DIR = "./"
##############################################

File = open("%stemp.read"%(DIR), "w");
File1 = open("%stemp.head"%(DIR), "w");
File2 = open("%stemp.tail"%(DIR), "w");
anchor = int(sys.argv[12])
cutsize = int(sys.argv[22])
fullanchor = anchor + cutsize
bamfile = pysam.Samfile(sys.argv[2], "rb")
for alignedread in bamfile.fetch(until_eof = True):
	
	if alignedread.is_unmapped and (len(alignedread.seq) > 70):
		if not alignedread.is_paired:
  			 read_name = "@" + alignedread.qname;
                         if cutsize == 0:
                           read_str = alignedread.seq;
                           read_str1 = alignedread.seq[:anchor];
                           read_str2 = alignedread.seq[-anchor:];
                           read_mapq = alignedread.qual;
                         if cutsize > 0:
                           read_str = alignedread.seq[cutsize:-cutsize];
                           read_str1 = alignedread.seq[cutsize:fullanchor];
                           read_str2 = alignedread.seq[-fullanchor:-cutsize];
                           read_mapq = alignedread.qual[cutsize:-cutsize];
                         File.write(read_name + "\t" + read_str + "\n");
                         File1.write(read_name + "/1" + "\n" + read_str1 + "\n" + "+" + "\n" + read_mapq[:anchor] + "\n");
                         File2.write(read_name + "/2" + "\n" + read_str2 + "\n" + "+" + "\n" + read_mapq[-anchor:] + "\n");
                         

		if alignedread.is_paired and alignedread.mate_is_unmapped:	
 			if alignedread.is_read1:
				read_name = "@" + alignedread.qname + "-A";
				if cutsize == 0:
                                  read_str = alignedread.seq;
				  read_str1 = alignedread.seq[:anchor];
				  read_str2 = alignedread.seq[-anchor:];
				  read_mapq = alignedread.qual;
                                if cutsize > 0:
                                  read_str = alignedread.seq[cutsize:-cutsize];
				  read_str1 = alignedread.seq[cutsize:fullanchor];
				  read_str2 = alignedread.seq[-fullanchor:-cutsize];
				  read_mapq = alignedread.qual[cutsize:-cutsize];
				File.write(read_name + "\t" + read_str + "\n");
			       	File1.write(read_name + "/1" + "\n" + read_str1 + "\n" + "+" + "\n" + read_mapq[:anchor] + "\n");
				File2.write(read_name + "/2" + "\n" + read_str2 + "\n" + "+" + "\n" + read_mapq[-anchor:] + "\n");

			if alignedread.is_read2:
		                read_name = "@" + alignedread.qname + "-B";
				if cutsize == 0:
                                  read_str = alignedread.seq;
				  read_str1 = alignedread.seq[:anchor];
				  read_str2 = alignedread.seq[-anchor:];
				  read_mapq = alignedread.qual;
                                if cutsize > 0:  
                                  read_str = alignedread.seq[cutsize:-cutsize];
		                  read_str1 = alignedread.seq[cutsize:fullanchor];
				  read_str2 = alignedread.seq[-fullanchor:-cutsize];
				  read_mapq = alignedread.qual[cutsize:-cutsize];
	                        File.write(read_name + "\t" + read_str + "\n");
	                        File1.write(read_name + "/1" + "\n" + read_str1 + "\n" + "+" + "\n" + read_mapq[:anchor] + "\n");
	                        File2.write(read_name + "/2" + "\n" + read_str2 + "\n" + "+" + "\n" + read_mapq[-anchor:] + "\n");

bamfile.close();
File.close();
File1.close();
File2.close();

################################################
now_time = datetime.datetime.now()
print now_time 
print "Using bowtie for anchored alignment.";
#################################################
index = sys.argv[6]
erroralign = int(sys.argv[8])
parallel = int(sys.argv[10])
os.system("bowtie --sam --quiet --ff -v %d -p %d -k 1 -m 1 -I 60 -X 120 %s -1 %stemp.head -2 %stemp.tail>%stemp.co"%(erroralign,parallel,index,DIR,DIR,DIR));
os.system("rm %stemp.head %stemp.tail"%(DIR,DIR));
################################################
now_time = datetime.datetime.now()
print now_time
print "Start pre-processing.";
####################################################

samfile = pysam.Samfile("%stemp.co"%(DIR), "r");
f_file = open("%stemp.forward"%(DIR), "w");
r_file = open("%stemp.reverse"%(DIR), "w");
for alignedread in samfile.fetch(until_eof = True):
        if alignedread.is_paired and alignedread.is_proper_pair:
	       if alignedread.flag == 67:
			ref = samfile.getrname(alignedread.tid);
			pos_s = alignedread.pos;
			pos_e = alignedread.pnext + anchor;
			f_file.write("@" + alignedread.qname + "\t" + ref + "\t" + str(pos_s) + "\t" + str(pos_e) + "\t" + "+" + "\n");
               if alignedread.flag == 179:
                        ref = samfile.getrname(alignedread.tid);
			pos_s = alignedread.pos;
                        pos_e = alignedread.pnext + anchor;
                        r_file.write("@" + alignedread.qname + "\t" + ref + "\t" + str(pos_s) + "\t" + str(pos_e) + "\t" + "-" + "\n");


samfile.close();
f_file.close();
r_file.close();

os.system("sort -k1b,1 -o %stemp.forward %stemp.forward"%(DIR,DIR));
os.system("sort -k1b,1 -o %stemp.reverse %stemp.reverse"%(DIR,DIR));
os.system("sort -k1b,1 -o %stemp.read %stemp.read"%(DIR,DIR));
os.system("join -a1 %stemp.forward %stemp.read | sort -k1b,1 | uniq > %stempread.forward"%(DIR,DIR,DIR));
os.system("join -a1 %stemp.reverse %stemp.read | sort -k1b,1 | uniq > %stempread.reverse"%(DIR,DIR,DIR));
#os.system("g++ -o %schange %schange.cpp"%(DIR,DIR));
os.system("%schange.o"%(DIR));
os.system("cat %stempread.s.reverse >> %stempread.forward"%(DIR,DIR));

os.system("rm %stemp.co %stemp.forward %stemp.reverse %stemp.read"%(DIR,DIR,DIR,DIR));
######get reference file###################################
now_time = datetime.datetime.now();
print now_time;
print "Getting reference file.";
### 		
from Bio import SeqIO;

seqs={}
for seq_record in SeqIO.parse(open(sys.argv[4]), "fasta"):
        seqs[seq_record.id] = seq_record;
for line in open("%stempread.forward"%(DIR)):
        line = line.rstrip();
        qname, id, start, end, flag, seq = line.split();
        fr_file = open("%stempco"%(DIR), 'a');
	fr_file.write(qname + "\t" + id + "\t" + str(start) + "\t" + str(end) + "\t" + seq + "\t" + str(seqs[id].seq[int(start):int(end)]) + "\t" + flag + "\n");

fr_file.close();
########################################################
os.system("rm  %stempread.forward %stempread.reverse %stempread.s.reverse"%(DIR,DIR,DIR));
######################################################
now_time = datetime.datetime.now();
print now_time;
print "Start detecting.";
#parser.add_argument('-k', '--kmer');
#parser.add_argument('-m', '--matchnum');
#parser.add_argument('-e', '--errkmer');
#parser.add_argument('-g', '--mergenum');
kmer = int(sys.argv[14])                    
matchnum = int(sys.argv[16])                 
errkmer = int(sys.argv[18])                    
mergenum = int(sys.argv[20])               
os.system("%sdetect.o SKIP_LEN %d SUB_STR_LEN %d MATCH_MIN_LEN %d ERROR_NUM %d MERGE_DEVIATION %d"%(DIR,anchor,kmer,matchnum,errkmer,mergenum));
##########################################################
now_time = datetime.datetime.now();
print now_time;
print "Detection ends.";
os.system("sort -k2,2 -k3n,4 -o %soutput_inv %soutput_inv"%(DIR,DIR));
os.system("%srmrm.o"%(DIR));
os.system("rm %stempco"%(DIR));
now_time = datetime.datetime.now();
print now_time;
print "File output_i is the MAF file."
print "File o_inv is the list of microinversions."
#########################################################
