#!/usr/bin/env python
# -*- coding:UTF-8 -*-
'''
Author: Li Fajin

'''
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
from .__init__ import __version__
import sys
import os
import pysam
import itertools
from itertools import groupby
import numpy as np
import pandas as pd
from Bio.Seq import translate
from optparse import OptionParser


class bam_file_attr(object):
	"""Class for bam file attribute"""
	def __init__(self,bamName,bamLen,bamOffset,bamLegend):
		self.bamName=bamName
		self.bamLen=bamLen
		self.bamOffset=bamOffset
		self.bamLegend=bamLegend

class fasta_attrbution(object):
	"""Class for fasta file attribute"""
	def __init__(self,fastaName,fastaLegend):
		self.fastaName=fastaName
		self.fastaLegend=fastaLegend

def parse_gtfFile(gtfFile):
		'''extract trancript information from standard GTF file'''
		transID2GeneIDDict={}
		geneIDNameDict={}
		proteinCodingGeneDict={}
		transWithStrand={}
		transID_start_coor={}
		transID_stop_coor={}
		geneID_Biotype_Dict={}
		transID2ChromDict={}
		with open(gtfFile,'r') as f:
			for line in f:
				if line.strip()[0] == "#" or (not line.strip()):
					continue
				fields=line.strip().split("\t")
				strand=fields[6]
				trans_start=str(fields[3])
				trans_stop=str(fields[4])
				chrom=fields[0]
				if len(fields) < 9:
					raise KeyError("Sorry, you get a unregular GTF file. Please check it again.")
				KeyDesc={i.strip().split(" ")[0]:i.strip().split(" ")[1].strip('"') for i in fields[8].strip(';').split('; ')}
				if fields[2]=='transcript' and ('transcript_biotype "protein_coding"' in line.strip() or 'transcript_type "protein_coding"' in line.strip()) :
					geneID=KeyDesc['gene_id']
					transID=KeyDesc['transcript_id']
					transID2GeneIDDict[transID]=geneID
					transWithStrand[transID]=strand
					transID_start_coor[transID]=trans_start
					transID_stop_coor[transID]=trans_stop
					transID2ChromDict[transID]=chrom
					if "gene_name" not in KeyDesc.keys():
						geneName=geneID
					else:
						geneName=KeyDesc['gene_name']
					if 'transcript_biotype "protein_coding"' in line.strip():
						biotype=KeyDesc['transcript_biotype']
					elif 'transcript_type "protein_coding"' in line.strip():
						biotype=KeyDesc['transcript_type']
					else:
						raise IOError("There is no transcript_biotype annotation in your GTF file!")
					geneIDNameDict[geneID]=geneName
					geneID_Biotype_Dict[geneID]=biotype
					if geneID not in proteinCodingGeneDict:
						proteinCodingGeneDict[geneID]=set([transID])
					else:
						proteinCodingGeneDict[geneID].add(transID)
		return transID2GeneIDDict,geneIDNameDict,proteinCodingGeneDict,transWithStrand,transID_start_coor,transID_stop_coor,geneID_Biotype_Dict,transID2ChromDict

def parse_coorFile(coorFile):
		'''extract transcript information from coorFile generated by RiboCode'''
		startCodonCoorDict={}
		stopCodonCoorDict={}
		with open(coorFile,'r') as f:
			for coors in f:
				startCodonCoorDict[coors.strip().split('\t')[0]]=coors.strip().split('\t')[1]
				stopCodonCoorDict[coors.strip().split('\t')[0]]=coors.strip().split('\t')[2]
		return startCodonCoorDict,stopCodonCoorDict

def get_trans_length_dict(transcriptFile):
	'''
	This function is used to get a dict of transcript length
	'''
	trans_length_dict={}
	transFile=open(transcriptFile,'r')
	faiter=(x[1] for x in groupby(transFile,lambda line: line.strip()[0]==">")) ## groupby returns a tuple (key, group)
	for header in faiter:
		trans_id=header.__next__().strip(">").split(" ")[0]
		seq=''.join(s.strip() for s in faiter.__next__())
		length=int(len(seq.strip()))
		trans_length_dict[trans_id]=length
	return trans_length_dict

def get_longest_transcripts_information(coorFile,transcriptFile,gtfFile,longestTransFile):
	'''
	This function is aimed to get four kinds of information:
	1) all transcripts id of the longest isoform of all protein coding genes containing both start codon and stop codon: selectTrans
	2) start codon dictionary of all transcripts selected above: startCodonCoorDict
	3) stop codon dictionary of all transcripts selected above: stopCodonCoorDict
	4) transcript length dictionary of all transcripts selected above: transLengthDict
	'''
	transID2GeneIDDict,geneIDNameDict,proteinCodingGeneDict,transWithStrand,transID_start_coor,transID_stop_coor,geneID_Biotype_Dict,transID2ChromDict=parse_gtfFile(gtfFile)
	startCodonCoorDict,stopCodonCoorDict=parse_coorFile(coorFile)
	## select the longest transcript for each gene
	transLengthDict=get_trans_length_dict(transcriptFile)
	selectLongestTrans=set()
	for trans_id in proteinCodingGeneDict.values():
		if len(trans_id) >1:
			tmp = sorted(list(trans_id),key=lambda x: transLengthDict[x])[-1]
			selectLongestTrans.add(tmp)
		else:
			selectLongestTrans.add(list(trans_id)[0])
	## select transcripts with both start codon and stop codon
	transWithBothStartAndStopCodon=set(startCodonCoorDict.keys()).intersection(set(stopCodonCoorDict.keys()))
	selectTrans=transWithBothStartAndStopCodon.intersection(set(transLengthDict.keys()))
	selectTrans=selectTrans.intersection(selectLongestTrans)
	print(str(len(selectTrans))+'  transcripts will be used in the follow analysis.\n', file=sys.stderr)
	## output the longest transcript of each gene
	with open(longestTransFile,'w') as fout:
		fout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %("chrom","trans_id","strand","gene_id","gene_name","transcript_biotype","gene_start","gene_stop","CDS_start","CDS_stop","CDS_length","5UTR_length","3UTR_length","transcript_length"))
		for ltid in selectLongestTrans:
			chrom=transID2ChromDict[ltid]
			geneID=transID2GeneIDDict[ltid]
			cds_length=int(stopCodonCoorDict[ltid])-int(startCodonCoorDict[ltid])+1
			Five_UTR_length=int(startCodonCoorDict[ltid])-1
			Three_UTR_length=int(transLengthDict[ltid])-int(stopCodonCoorDict[ltid])
			fout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (chrom,ltid,transWithStrand[ltid],geneID,geneIDNameDict[geneID],geneID_Biotype_Dict[geneID],transID_start_coor[ltid],transID_stop_coor[ltid],startCodonCoorDict[ltid],stopCodonCoorDict[ltid],cds_length,str(Five_UTR_length),str(Three_UTR_length),str(transLengthDict[ltid])))

def get_all_transcripts_information(coorFile,transcriptFile,gtfFile,allTranscriptsInfo):
	with open(gtfFile) as fin:
		transID_geneID_Dict={}
		geneID_Name_Dict={}
		geneID_Biotype_Dict={}
		transID_strand_Dict={}
		transID_start_coor={}
		transID_stop_coor={}
		transID_chrom_dict={}
		for line in fin:
			if line[0] == "#" or (not line.strip()):
				continue
			fields=line.strip().split("\t")
			strand=strand=fields[6]
			trans_start=str(fields[3])
			trans_stop=str(fields[4])
			chrom=fields[0]
			if len(fields) < 9:
					raise KeyError("Sorry, you get a unregular GTF file. Please check it again.")
			KeyDesc={i.strip().split(" ")[0]:i.strip().split(" ")[1].strip('"') for i in fields[8].strip(';').split('; ')}
			if line.strip().split('\t')[2]=='transcript' :
				transID=KeyDesc['transcript_id']
				geneID=KeyDesc['gene_id']
				transID_geneID_Dict[transID]=geneID
				transID_strand_Dict[transID]=strand
				transID_start_coor[transID]=trans_start
				transID_stop_coor[transID]=trans_stop
				transID_chrom_dict[transID]=chrom
				if "gene_name" not in KeyDesc.keys():
					#some genes dont have gene_name but everyone has the gene_id
					# print(geneID)
					geneName=geneID
				else :
					geneName=KeyDesc['gene_name']
				if 'transcript_biotype' in line.strip():
					biotype=KeyDesc['transcript_biotype']
				elif 'transcript_type' in line.strip():
					biotype=KeyDesc['transcript_type']
				else:
					raise IOError("There is no transcript_biotype annotation in your GTF file!")
				geneID_Name_Dict[geneID]=geneName
				geneID_Biotype_Dict[geneID]=biotype
	###get start_codon stop_codon transcript coordinate
	startCodonCoorDict,stopCodonCoorDict=parse_coorFile(coorFile)
	## get transcript length
	transLengthDict=get_trans_length_dict(transcriptFile)
	#####write Dict to outfile
	with open(allTranscriptsInfo,"w") as fout:
		fout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ("chrom","trans_id","strand","gene_id","gene_name","transcript_biotype","gene_start","gene_stop","CDS_start","CDS_stop","CDS_length","5UTR_length","3UTR_length","transcript_length"))
		for fn in sorted( startCodonCoorDict.keys() ):
			chrom=transID_chrom_dict[fn]
			geneID=transID_geneID_Dict[fn]
			cds=int(stopCodonCoorDict[fn])-int(startCodonCoorDict[fn])+1
			Five_UTR=int(startCodonCoorDict[fn])-1
			Three_UTR=int(transLengthDict[fn])-int(stopCodonCoorDict[fn]) ## length-stop+1-3
			fout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (chrom,fn,transID_strand_Dict[fn],geneID,geneID_Name_Dict[geneID],geneID_Biotype_Dict[geneID],str(transID_start_coor[fn]),str(transID_stop_coor[fn]),str(startCodonCoorDict[fn]),str(stopCodonCoorDict[fn]),str(cds),str(Five_UTR),str(Three_UTR),str(transLengthDict[fn])))


def reload_transcripts_information(longestTransFile):
	selectTrans=set()
	transLengthDict={}
	cdsLengthDict={}
	startCodonCoorDict={}
	stopCodonCoorDict={}
	transID2geneID={}
	transID2geneName={}
	transID2ChromDict={}
	with open(longestTransFile,'r') as f:
		for line in f:
			if line.strip()=='':
				continue
			if line.strip().split("\t")[0] == 'chrom':
				continue
			chrom=line.strip().split("\t")[0]
			transID=line.strip().split("\t")[1]
			geneID=line.strip().split("\t")[3]
			geneName=line.strip().split("\t")[4]
			startCodon=int(line.strip().split("\t")[8])
			stopCodon=int(line.strip().split("\t")[9])
			cds_length=int(line.strip().split("\t")[10])
			transLength=int(line.strip().split("\t")[13])
			selectTrans.add(transID)
			transLengthDict[transID]=transLength
			startCodonCoorDict[transID]=startCodon
			stopCodonCoorDict[transID]=stopCodon
			transID2geneID[transID]=geneID
			transID2geneName[transID]=geneName
			cdsLengthDict[transID]=cds_length
			transID2ChromDict[transID]=chrom
			# print(transID,geneID,geneName,startCodon,stopCodon,transLength)
	print(str(len(selectTrans))+'  transcripts will be used in the follow analysis.\n', file=sys.stderr)
	return selectTrans,transLengthDict,startCodonCoorDict,stopCodonCoorDict,transID2geneID,transID2geneName,cdsLengthDict,transID2ChromDict



def getWindowsVector(upLength,downLength,transVector,inCoor):
		"""
		get every transcript windows reads density vector
		"""
		## inCoor is 0-based coordinate
		windowsVector=np.zeros(int(upLength+downLength+1),dtype="float64")
		posVector=np.zeros(int(upLength+downLength+1),dtype="int64")

		## left
		if inCoor <= upLength :
				transLeftIndex=0
				windowsLeftIndex=upLength-inCoor
		else:
				transLeftIndex=inCoor-upLength
				windowsLeftIndex=0

		## right
		if len(transVector)-(inCoor+1) <= downLength:
				transRightIndex=len(transVector) ## python splice do not contain the last one position,so the last index should be len(transVector)-1+1
				windowsRightIndex=upLength-inCoor+len(transVector)
		else:
				transRightIndex=inCoor+1+downLength
				windowsRightIndex=len(windowsVector)
		## final vector for that transcript
		windowsVector[windowsLeftIndex:windowsRightIndex]+=transVector[transLeftIndex:transRightIndex]
		posVector[windowsLeftIndex:windowsRightIndex]+=1
		return (windowsVector,posVector)
def lengths_offsets_split(value):
		''' Split the given comma separated values to multiple integer values'''
		values=[]
		for item in value.split(','):
				item=int(item)
				values.append(item)
		return values

def get_trans_frame_counts(ribo_fileobj, transcript_name, read_lengths, read_offsets, transLength, startCoor, stopCoor):
	"""For each mapped read of the given transcript in the BAM file,get the P-site and codon unit reads density
	ribo_fileobj -- file object - BAM file opened using pysam AlignmentFile
	transcript_name -- Name of transcript to get counts for
	read_length -- If provided, get counts only for reads of this length.
	read_offsets -- the offset length corresponding to 5' mapped position.
	transLength -- the length of the transcript.
	startCoor -- the coordinate of the first base of start codon 0-based.
	stopCoor -- the coordinate of the first base of stop codon 0-based.
	"""
	read_counts = np.zeros(transLength,dtype="int64")
	total_reads = 0
	if read_lengths == "ALL" : ## RNA
		for record in ribo_fileobj.fetch(transcript_name):
			if record.flag == 16 or record.flag == 272:
				continue
			total_reads += 1
			position = record.pos
			read_counts[position]+=1
	else:
		read_lengths=lengths_offsets_split(read_lengths)
		read_offsets=lengths_offsets_split(read_offsets)
		for record in ribo_fileobj.fetch(transcript_name):
			if record.flag == 16 or record.flag == 272:
				continue
			for R_length, R_offset in zip(read_lengths,read_offsets):
				if  record.query_length == R_length :
					# if an offset is specified, increment position by that offset.
					position = record.pos + R_offset ## transform into the position of P-site
				else:
					# ignore other reads/lengths
					continue
				total_reads += 1
				try:
					read_counts[position]+=1
				except KeyError:
					print("Dont has this position after offset : transcript_name -> position"+" "+transcript_name+" -> "+position)
	#get trans counts for each 3 frames
	read_counts_frame0=read_counts[(startCoor+0):(stopCoor-2):3]
	read_counts_frame1=read_counts[(startCoor+1):(stopCoor-1):3]
	read_counts_frame2=read_counts[(startCoor+2):(stopCoor-0):3]
	read_counts_frameSum=read_counts_frame0+read_counts_frame1+read_counts_frame2
	cds_reads=sum(read_counts_frameSum)
	return read_counts,read_counts_frameSum,total_reads,cds_reads


def RPKM_of_all_genes(in_bamFile,in_selectTrans,in_transLengthDict,in_startCodonCoorDict,in_stopCodonCoorDict,in_readLengths,in_readOffset,Type):
	'''calculate the RPKM values for the CDS region or all transcript region'''
	pysamFile=pysam.AlignmentFile(in_bamFile,'rb')
	pysamFile_trans=pysamFile.references
	in_selectTrans=set(pysamFile_trans).intersection(in_selectTrans)
	# in_selectTrans=list(in_selectTrans)
	RPKM={}
	all_counts=0
	for trans in in_startCodonCoorDict.keys():
		leftCoor =int(in_startCodonCoorDict[trans])-1
		rightCoor=int(in_stopCodonCoorDict[trans])-3
		(trans_counts,read_counts_frameSum,total_reads,cds_reads)=get_trans_frame_counts(pysamFile, trans, in_readLengths, in_readOffset, in_transLengthDict[trans], leftCoor, rightCoor)
		if Type.upper() in ['CDS']:
			all_counts+=cds_reads
		elif Type.upper() in ['TRANSCRIPT','TRANS','TRANSCRIPTS']:
			all_counts+=total_reads
		else:
			raise IOError("please choose your type of RPKM.[CDS or exon]")
	for trans in in_selectTrans:
		leftCoor =int(in_startCodonCoorDict[trans])-1
		rightCoor=int(in_stopCodonCoorDict[trans])-3
		(trans_counts,read_counts_frameSum,total_reads,cds_reads)=get_trans_frame_counts(pysamFile, trans, in_readLengths, in_readOffset, in_transLengthDict[trans], leftCoor, rightCoor)
		if Type.upper() in ['CDS']:
			cds_reads_normed=10**9*(cds_reads/(all_counts*len(read_counts_frameSum)))
			RPKM[trans]=cds_reads_normed
		elif Type.upper() in ['TRANSCRIPT','TRANS','TRANSCRIPTS','EXON','EXONS']:
			trans_counts_normed=10**9*(total_reads/(all_counts*len(trans_counts)))
			RPKM[trans]=trans_counts_normed
	return RPKM

def write_bam_file_density_dataframe(inBamAttr,outFile):
	data=[]
	for bms in inBamAttr:
		k=pd.DataFrame([bms.bamLegend]*len(bms.start_density))
		start=pd.DataFrame(bms.start_density)
		stop=pd.DataFrame(bms.stop_density)
		density=pd.merge(start,stop,how="left",left_index=True,right_index=True)
		density=pd.merge(k,density,how="left",left_index=True,right_index=True)
		data.append(density)
	temp=data[0]
	if len(data) < 1:
		raise EOFError("Empty file, there is nothing in the file.")
	if len(data) == 1:
		temp.columns=['sample','start_density','stop_density']
		temp.to_csv(outFile,sep="\t",index=0)
	else:
		for i in np.arange(1,len(data)):
			temp=np.vstack((temp,data[i]))
		temp=pd.DataFrame(temp,columns=["sample","start_density","stop_density"])
		temp.to_csv(outFile,sep="\t",index=0)

def write_bam_file_read_counts_dataframe(inBamAttr,outFile):
	data=[]
	data_index=[]
	for bms in inBamAttr:
		d=bms.RPKM
		i=bms.bamLegend
		data.append(d)
		data_index.append(i)
	data=pd.DataFrame(data,index=data_index)
	data=data.T
	data.to_csv(outFile,sep="\t")

def parse_bamListFile(bamListFile):
	bamFileList=[]
	readLengthsList=[]
	OffsetsList=[]
	bamLegendsList=[]
	flag=1
	with open(bamListFile,'r') as f:
		for line in f:
			if flag == 1:
				flag+=1
				continue
			bamFile=line.strip().split("\t")[0]
			readLengths=line.strip().split("\t")[1]
			Offsets=line.strip().split("\t")[2]
			bamLegends=line.strip().split("\t")[3]
			bamFileList.append(bamFile)
			readLengthsList.append(readLengths)
			OffsetsList.append(Offsets)
			bamLegendsList.append(bamLegends)
	return bamFileList,readLengthsList,OffsetsList,bamLegendsList

def fastaIter(transcriptFile):
	'''
	This function is used to get a dict of transcript sequence
	'''
	fastaDict={}
	f=open(transcriptFile,'r')
	faiter=(x[1] for x in groupby(f,lambda line: line.strip()[0]==">")) ## groupby returns a tuple (key, group)
	for header in faiter:
		geneName=header.__next__().strip(">").split(" ")[0]
		seq=''.join(s.strip() for s in faiter.__next__())
		flag=0
		for nt in ['I','K','M','R','S','W','Y','B','D','H','V','N','X']:
			if nt in seq:
				flag+=1
				flag_nt=nt
		if flag != 0:
			print(geneName+" filtered"+"--"+"There is a ambiguous nucleotide",flag_nt,"in your sequence")
			continue
		fastaDict[geneName]=seq
	return fastaDict

def translation(seq,table=1,cds=True):
	"""
	translate the DNA to protein sequence using the translation table
	table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
	Code from RiboCode.[Xiao,et al. NAR. 2018]
	"""
	if len(seq) % 3 != 0:
		sys.stderr.write("Warning: sequence is not divisible by 3\n")
		seq = seq[:-(len(seq) % 3)]
	return translate(seq,table=table,cds=cds)

def flatten(xs):
	for x in xs:
		if isinstance(x,tuple):
			for xx in flatten(x):
				yield xx
		else:
			yield x

## create parser for different scripts
def create_parser_for_cAI():
	'''argument parser.'''
	usage="usage: python %prog [options]"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-i","--input", action="store",type="string",dest="transcriptFiles",
			help="Input file(s) in fasta format. All files should be split by comma e.g. 1.fasta,2.fasta,3.fasta[required]\n. Note: input sequence must be cds sequences which could be generated by GetProteinCodingSequence.py.")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-t","--trans_file_legend",action="store",type="string",dest="trans_file_legend",
			help="The legend of each fasta file.[required]")
	parser.add_option("-u","--upstream_codon",action="store",type="int",default=0,dest="upstream_codon",
			help="Upstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the downstream codon.default=%default")
	parser.add_option("-d","--downstream_codon",action="store",type="int",default=500, dest="downstream_codon",
			help="Downstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the upstream codon.default=%default")
	parser.add_option('--reference',action='store',type='string',default=None,dest='reference',
			help="A reference cds sequences used for calculating the relative synonymous codon usage [RSCU].")
	parser.add_option("--RSCU",action="store",type="string",default=None,dest="RSCUs",
			help="A RSCU files used for calculating each codon weight. default=%default.")
	parser.add_option("--weight",action="store",type="string",default=None,dest="weights",
			help="A weight file used for calculating the cAI.")
	parser.add_option("--table",action="store",type="int",default=1,dest="genetic_table",
			help="The genetic code to used. default=1. table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi")
	return parser

def create_parser_for_enrichment_analysis():
	'''argument parser.'''
	usage="usage: %prog [options]" + '\n' + __doc__ + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("--ctrl", action="store",type="string",default=None,dest="ctrlDensity",
			help="Control density file generated by RiboDensityAtEachPosition.py.[required].")
	parser.add_option("--treat", action="store",type="string",default=None,dest="treatDensity",
			help="Treat density file generated by RiboDensityAtEachPosition.py.[required].")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-U","--unit_type",action="store",type="string",dest="unit", default="codon",
			help="Unit type we used for metagene analysis. Either 'nt' or 'codon'. default=%default")
	parser.add_option("-M","--filter_mode",action="store",type="string",dest="mode",default='counts',
			help="Mode for filtering transcripts. Either 'counts' or 'RPKM'. default=%default.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option("-l","--minimum_cds_codon",action="store",type="int",default=150,dest="min_cds_codon",
			help="Minimum CDS codon (codon unit). CDS codons smaller than \"minimum_cds_codon\" will be skipped. default=%default")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=128,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option("-m","--minimum_norm_region_counts",action="store",type="int",default=64,dest="min_norm_region_counts",
			help="Minimum counts in normalization region . Counts in norm region smaller than \"minimum_norm_region_counts\" will be skipped. default=%default")
	parser.add_option("-u","--upstream_codon",action="store",type="int",default=0,dest="upstream_codon",
			help="Upstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the downstream codon.")
	parser.add_option("-d","--downstream_codon",action="store",type="int",default=500, dest="downstream_codon",
			help="Downstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the upstream codon.")
	parser.add_option("-e","--normalize_exclude_codon",action="store",type="int",default=30, dest="norm_exclude_codon",
			help="The region of the normalization for each genes should exclude the first \"normalize_exclude_codon\". default=%default")
	parser.add_option("--CI",action="store",type='float',dest="confidence",default=0.95,
			help="The confidence intervals of mean on each position based on t distribution. default=%default")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")
	return parser

def create_parser_for_single_ratio_plot():
	'''argument parser.'''
	usage="usage: python %prog [options]" + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-i","--input", action="store",type="string",dest="ratioFile",
			help="All ratio file generated by EnrichmentAnalysis.py.")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option('-s','--single-trans',action="store",type='string',dest='singleTrans',
			help="A single selected transcript.")
	parser.add_option('-S','--selected-trans-list',action="store",type="string",dest="in_selectTrans",
			help="Selected transcripts list. The first column of this file must be transcript id or gene identifier with a column name.")
	parser.add_option("--ymin",action="store",type="float",dest="ymin",default=None,help="The max of ylim. default=%default")
	parser.add_option("--ymax",action="store",type="float",dest="ymax",default=None,help="The max of ylim. default=%default")
	parser.add_option("--unit",action="store",type="string",dest="unit",default='codon',help="Unit for density calculation.[codon or nt]")
	parser.add_option("--axvline",action="store",type="float",dest="axvline",default=None,help="Position to plot a vertical line in x axis. default=%default")
	parser.add_option("--axhline",action="store",type="float",dest="axhline",default=None,help="Position to plot a vertical line in y axis. default=%default")
	parser.add_option("--slide-window",action="store",type="string",dest="slideWindow",default=None,help="Using slide window to average the density.Input a	true strings such as yes, y or 1. %default=default")
	parser.add_option("--start",action="store",type="int",dest="start_position",default=5,help="The start position need to be averaged.default=%default")
	parser.add_option("--window",action="store",type="int",dest="window",default=7,help="The length of silde window. default=%default")
	parser.add_option("--step",action="store",type='int',dest="step",default=1,help="The step length of slide window. default=%default")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. %default=default")
	return parser

def create_parser_for_sequence_extraction():
	'''argument parser'''
	usage="usage: python %prog [options]"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-i","--input", action="store",type="string",dest="transcriptFile",
			help="Input file(s) in fasta format. generated by RiboCode")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option("-l","--left_coor",action="store",type="int",dest="left_position",
			help="The left position (codon level) of the sequence you  wanna to extract.[required]")
	parser.add_option("-r","--right_coor",action="store",type="int",dest="right_position",
			help=" The right position (codon level) of the sequence you  wanna to extract[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("--mode",action="store",type="string",dest="mode",default="whole",
			help="Mode for sequence extraction.[whole or local]")
	parser.add_option("--table",action="store",type="int",dest="geneticCode",default=1,
			help="Which genetic code to use. default is the standard code.table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")
	return parser

def create_parser_for_UTR_sequence_extraction():
	'''argument parser'''
	usage="usage: python %prog [options]"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-i","--input", action="store",type="string",dest="transcriptFile",
			help="Input file(s) in fasta format. All transcript sequences are from the longest transcripts of protein coding genes which may be generated by GetProteinCodingSequence.py")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by RiboCode.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	return parser

def create_parser_for_hydropathy_or_charge():
	'''argument parser.'''
	usage="usage: %prog [options]" + '\n' + __doc__ + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-i","--input", action="store",type="string",dest="transcriptFiles",
			help="Input file(s) in fasta format. All files should be split by comma e.g. 1.fasta,2.fasta,3.fasta[required]\n. Note: input sequence must be cds sequences which could be generated by GetProteinCodingSequence.py.")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-t","--trans_file_legend",action="store",type="string",dest="trans_file_legend",
			help="The legend of each fasta files, comma split. e.g. 'condition1,condition2,condition3' [required]")
	parser.add_option("--index",action='store',type='string',dest='index',
			help="Input the hydropathy index or charge index of each amino acids.")
	parser.add_option("-u","--upstream_codon",action="store",type="int",default=0,dest="upstream_codon",
			help="Upstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the downstream codon.")
	parser.add_option("-d","--downstream_codon",action="store",type="int",default=500, dest="downstream_codon",
			help="Downstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the upstream codon.")
	parser.add_option("--table",action="store",type="int",dest="geneticCode",default=1,
			help="Which genetic code to use. default is the standard code.table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi")
	return parser

def create_parser_for_metagene_analysis():
	'''argument parser.'''
	usage="usage: python %prog [options]" + '\n' + __doc__ + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option("-U","--unit_type",action="store",type="string",dest="unit", default="codon",
			help="Unit type we used for metagene analysis. Either 'nt' or 'codon'. default=%default")
	parser.add_option("-M","--filter_mode",action="store",type="string",dest="mode",default='counts',
			help="Mode for filtering transcripts. Either 'counts' or 'RPKM'. default=%default.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option("-l","--minimum_cds_codon",action="store",type="int",default=150,dest="min_cds_codon",
			help="Minimum CDS codon (codon unit). CDS codons smaller than \"minimum_cds_codon\" will be skipped. default=%default")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=128,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option("-m","--minimum_norm_region_counts",action="store",type="int",default=64,dest="min_norm_region_counts",
			help="Minimum counts in normalization region . Counts in norm region smaller than \"minimum_norm_region_counts\" will be skipped. default=%default")
	parser.add_option("-u","--upstream_codon",action="store",type="int",default=0,dest="upstream_codon",
			help="Upstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the downstream codon.")
	parser.add_option("-d","--downstream_codon",action="store",type="int",default=500, dest="downstream_codon",
			help="Downstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the upstream codon.")
	parser.add_option("-e","--normalize_exclude_codon",action="store",type="int",default=30, dest="norm_exclude_codon",
			help="The region of the normalization for each genes should exclude the first \"normalize_exclude_codon\". default=%default")
	parser.add_option("-y","--normalize_codon_density_num",action="store",type="int",default=100, dest="norm_codon_density_num",
			help="Output the first \"normalize_codon_density_num\" from the start codon for each qualified genes in each bam files. Pay attention that this parameter should not bigger than -l parameter. default=%default")
	parser.add_option("--type",action='store',type='string',dest='type',default='CDS',help='Type of counts statistics.[CDS/cds or UTR/utr].default=%default')
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")
	parser.add_option("--CI",action="store",type='float',dest="confidence",default=0.95,
			help="The confidence intervals of mean on each position based on t distribution. default=%default")
	parser.add_option("--norm",action="store",type="string",dest="norm",default='yes',
			help="Use density beyound the first 30 (-e) codons for normalization or not. [yes/no]")
	parser.add_option("--plot",action="store",type="string",dest="plot",default='yes',help="Output a metagene plot or not. default=%default")
	return parser

def create_parser_for_metagene_analysis_for_the_whole_regions():
	'''argument parser.'''
	usage="usage: python %prog [options]" + '\n' + __doc__ + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option("-b","--bins",action='store',type='string',dest="bins",default='15,90,60',
			help="Bins to scale the transcript length.e.g.'15,90,60'. bins must be separated by comma, namely '5UTRBins,CDSBins,3UTRBins'. default=%default")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option("-l","--minimum_cds_codon",action="store",type="int",default=150,dest="min_cds_codon",
			help="Minimum CDS codon (codon unit). CDS codons smaller than \"minimum_cds_codon\" will be skipped. default=%default")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=128,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option("-m","--minimum_norm_region_counts",action="store",type="int",default=64,dest="min_norm_region_counts",
			help="Minimum counts in normalization region . Counts in norm region smaller than \"minimum_norm_region_counts\" will be skipped. default=%default")
	parser.add_option("-e","--normalize_exclude_codon",action="store",type="int",default=30, dest="norm_exclude_codon",
			help="The region of the normalization for each genes should exclude the first \"normalize_exclude_codon\". default=%default")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. %default=default")
	parser.add_option("--plot",action="store",type="string",dest="plot",default='yes',
			help="Output a metagene plot or not.default=%default")
	return parser

def create_parser_for_output_transInfo():
	'''argument parser.'''
	usage="usage: python %prog [options]" + '\n' + __doc__ + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon of each transcript. Generated by RiboCode.[required]")
	parser.add_option("-g","--gtfFile",action="store",type="string",dest="gtfFile",
			help="Reference gene annotation in gtf format.[required]")
	parser.add_option("-f","--transcriptFile",action="store",type="string",dest="transcriptFile",
			help="transcript file with fasta format generated by RiboCode.[required]")
	parser.add_option("-o","--output-longest-transcrpts",action="store",type="string",dest="longestTransInfo",
			help="Longest transcripts information.")
	parser.add_option("-O","--output-all-transcrpts",action="store",type="string",dest="allTransInfo",
			help="All transcripts information.")
	return parser

def create_parse_for_pausing_score_calculation():
	'''arguments definition'''
	usage="usage: python %prog [options]" + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option("-M","--filter_mode",action="store",type="string",dest="mode",default='counts',
			help="Mode for filtering transcripts. Either 'counts' or 'RPKM'. default=%default.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")

	parser.add_option("-l","--minimum_cds_codon",action="store",type="int",default=150,dest="min_cds_codon",
			help="Minimum CDS codon (codon unit). CDS codons smaller than \"minimum_cds_codon\" will be skipped. default=%default")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=64,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option("-L","--Left_position",action="store",type="int",dest="left_position", default=None,
			help="Left position of the interesting region [codon level]. default=%default")
	parser.add_option("-R","--Right_position",action="store",type="int",dest="right_position", default=None,
			help="Right position of the interesting region [codon level]. default=%default")
	parser.add_option("-F","--transcript_fasta",action="store",type="string",dest="transcript_fasta",
			help="Input the transcript fasta file used for motif detection and codon density calculation. cds sequences related with coorFile (longest.trans.info.txt) generated by GetProteinSequence.py")
	parser.add_option("--table",action="store",type="int",dest="geneticCode",default=1,
			help="Which genetic code to use. default is the standard code.table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")

	return parser

def create_parser_for_periodicity():
	'''argument parser.'''
	usage="usage: python %prog [options]" + "\n"
	parser=OptionParser(usage=usage)
	parser.add_option("-i","--input", action="store",type="string",dest="bamFile",
			help="Input file(s) in bam format.")
	parser.add_option("-a","--annot_dir",action="store",type="string",dest="annot_dir",
	                    help="transcripts annotation directory, generated by prepare_transcripts, a function of RiboCode.")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-L","--left_length",action="store",type="int",default=25,dest="left_length",help="The left range of read length we will consider.")
	parser.add_option("-R","--right_length",action="store",type="int",default=35,dest="right_length",help="The right range of read length we will consider.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. %default=default")
	return parser

def create_parser_for_polarity_calculation():
	'''argument parser.'''
	usage='usage: python %prog[options]'
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
		help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=64,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
		help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. %default=default")
	parser.add_option("--plot",action="store",type="string",dest="plot",default='yes',
			help="Output a polarity plot or not.default=%default")
	return parser

def create_parser_for_triplete_AA_motif():
	'''argument parser.'''
	usage="usage: python %prog [options]"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option("-M","--filter_mode",action="store",type="string",dest="mode",default='counts',
			help="Mode for filtering transcripts. Either 'counts' or 'RPKM'. default=%default.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option('--type1',action='store',type='string',dest='type1',default=None,
			help="The first motif you want to find. It must be a string with length equal to 2. e.g. PP,DD,KK motifs. default=%default.")
	parser.add_option('--type2',action='store',type='string',dest='type2',default=None,
			help="The second motif you want to find. It must be a string with length equal to 3. e.g. PPP,DDD,KKK motifs. default=%default.")
	parser.add_option('--motifList1',action='store',type='string',dest='motifList1',default=None,
			help="The first motif list you want to find. Mutually exclusive with '--type1' '--type2'. default=%default")
	parser.add_option('--motifList2',action='store',type='string',dest='motifList2',default=None,
			help="The second motif list you want to find. Mutually exclusive with '--type1' '--type2'. default=%default")
	parser.add_option("-l","--minimum_cds_codon",action="store",type="int",default=150,dest="min_cds_codon",
			help="Minimum CDS codon (codon unit). CDS codons smaller than \"minimum_cds_codon\" will be skipped. default=%default")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=64,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option("-F","--transcript_fasta",action="store",type="string",dest="transcript_fasta",
			help="Input the transcript fasta file used for motif detection and codon density calculation. cds sequences related with coorFile (longest.trans.info.txt) generated by GetProteinSequence.py")
	parser.add_option("--table",action="store",type="int",dest="geneticCode",default=1,
			help="Which genetic code to use. default is the standard code.table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")

	return parser

def creat_parser_for_calculation_of_codon_density():
	'''argument parser.'''
	usage="usage: %prog [options]"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option("-M","--filter_mode",action="store",type="string",dest="mode",default='counts',
			help="Mode for filtering transcripts. Either 'counts' or 'RPKM'. default=%default.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")


	parser.add_option("-l","--minimum_cds_codon",action="store",type="int",default=150,dest="min_cds_codon",
			help="Minimum CDS codon (codon unit). CDS codons smaller than \"minimum_cds_codon\" will be skipped. default=%default")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=64,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option("-u","--upstream_position",action="store",type="int",default=None,dest="upstream_position",
			help="The left position (codon level) you want to chose.[upstream position]. default=%default")
	parser.add_option("-d","--downstream_position",action="store",type="int",default=None, dest="downstream_position",
			help="The right position (codon level) you want to chose.[downstream position]. default=%default")
	parser.add_option("-F","--transcript_fasta",action="store",type="string",dest="transcript_fasta",
			help="Input the transcript fasta file used for motif detection and codon density calculation. cds sequences related with coorFile (longest.trans.info.txt) generated by GetProteinSequence.py")
	parser.add_option("--table",action="store",type="int",dest="geneticCode",default=1,
			help="Which genetic code to use. default is the standard code.table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")

	return parser


def create_parser_for_riboDensity_atEachPosition():
	'''argument parser.'''
	usage="usage: python %prog [options]"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
		help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",dest="bam_files",
		help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
		help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
		help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
		help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
		help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
		help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
		help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option("-U","--unit_type",action="store",type="string",dest="unit", default="codon",
		help="Unit type we used for metagene analysis. Either 'nt' or 'codon'. default=%default")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
		help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. %default=default")

	return parser

def creat_parser_for_specific_region():
	'''argument parser.'''
	usage="usage: python %prog [options]" + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option("-U","--unit_type",action="store",type="string",dest="unit", default="codon",
			help="Unit type we used for metagene analysis. Either 'nt' or 'codon'. default=%default")
	parser.add_option("-M","--filter_mode",action="store",type="string",dest="mode",default='counts',
			help="Mode for filtering transcripts. Either 'counts' or 'RPKM'. default=%default.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option("-L","--Left_position",action="store",type="int",dest="left_position",
			help="Left position of the interesting region")
	parser.add_option("-R","--Right_position",action="store",type="int",dest="right_position",
			help="Right position of the interesting region")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")
	return parser

def create_parser_for_frame_density():
	'''argument parser.'''
	usage="usage: python %prog [options]" + '\n' + __doc__ + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")
	parser.add_option("--plot",action="store",type="string",dest="plot",default='yes',
			help="Output a  plot or not.default=%default")
	return parser

def create_parser_for_RPFdist():
	'''argument parser.'''
	usage="usage: python %prog [options]" + '\n' + __doc__ + "\n"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-f","--bamListFile",action="store",type="string",default=None,dest="bamListFile",
			help="Bam file list, containing 4 columns.Namely bamFiles,readLength, offSet, bamLegend. '-f' and '-i, -r, -s, -t' parameters are mutually exclusive.default=%default.")
	parser.add_option("-i","--input", action="store",type="string",default=None,dest="bam_files",
			help="Input file(s) in bam format. All files should be split by comma e.g. 1.bam,2.bam,3.bam[required]. '-i' and '-f' are mutually exclusive. default=%default")
	parser.add_option("-c","--coordinateFile",action="store",type="string",dest="coorFile",
			help="The file should contain the coordinate of start and stop codon. Generated by OutputTranscriptInfo.py.[required]")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-r","--specific_reads_length",action="store",type="string",dest="read_length",
			help="Specific the lenght to do analysis, comma split. e.g. '28,29,30'.If use all length set 'All'. Bam files diff length select split by '_' e.g. '28,29,30_ALL_27,28' [required]. '-r' and '-f' are mutually exclusive.")
	parser.add_option("-s","--offset",action="store",type="string",dest="read_offset",
			help="Specific the offset corresponding to read length, comma split. e.g. '12,13,13'. No offset set 0. Bam files diff offset select split by '_' e.g. '12,13,13_0_12,12' [required]. '-s' and '-f' are mutually exclusive.")
	parser.add_option("-t","--bam_file_legend",action="store",type="string",dest="bam_file_legend",
			help="The legend of each bam files, comma split. e.g. 'condition1,condition2,condition3' [required]. '-t' and '-f' are mutually exclusive.")
	parser.add_option("-M","--filter_mode",action="store",type="string",dest="mode",default='counts',
			help="Mode for filtering transcripts. Either 'counts' or 'RPKM'. default=%default.")
	parser.add_option('-S','--select_trans_list',action="store",type='string',dest='in_selectTrans',
			help="Selected transcript list used for metagene analysis.This files requires the first column must be the transcript ID  with a column name.")

	parser.add_option("-l","--minimum_cds_codon",action="store",type="int",default=150,dest="min_cds_codon",
			help="Minimum CDS codon (codon unit). CDS codons smaller than \"minimum_cds_codon\" will be skipped. default=%default")
	parser.add_option("-n","--minimum_cds_counts",action="store",type="int",default=128,dest="min_cds_counts",
			help="Minimum CDS counts. CDS counts smaller than \"min_cds_counts\" will be skipped. default=%default")
	parser.add_option("-m","--minimum_norm_region_counts",action="store",type="int",default=64,dest="min_norm_region_counts",
			help="Minimum counts in normalization region . Counts in norm region smaller than \"minimum_norm_region_counts\" will be skipped. default=%default")
	parser.add_option("-e","--normalize_exclude_codon",action="store",type="int",default=30, dest="norm_exclude_codon",
			help="The region of the normalization for each genes should exclude the first \"normalize_exclude_codon\". default=%default")
	parser.add_option('--id-type',action="store",type="string",dest="id_type",default="transcript_id",
			help="define the id type users input. the default is transcript id, if not, will be transformed into transcript id. default=%default")
	return parser

def create_parser_for_tAI():
	'''argument parser.'''
	usage="usage: python %prog [options]"
	parser=OptionParser(usage=usage,version=__version__)
	parser.add_option("-i","--input", action="store",type="string",dest="transcriptFiles",
			help="Input file(s) in fasta format. All files should be split by comma e.g. 1.fasta,2.fasta,3.fasta[required]\n. Note: input sequence must be cds sequences which could be generated by GetProteinCodingSequence.py.")
	parser.add_option("-N","--tRNA_copy_numbers",action="store",type="string",dest="copy_numbers",
			help="tRNA copy numbers file.")
	parser.add_option("-o","--otput_prefix",action="store",type="string",dest="output_prefix",
			help="Prefix of output files.[required]")
	parser.add_option("-t","--trans_file_legend",action="store",type="string",dest="trans_file_legend",
			help="The legend of each fasta files, comma split. e.g. 'condition1,condition2,condition3' [required]")
	parser.add_option("-u","--upstream_codon",action="store",type="int",default=0,dest="upstream_codon",
			help="Upstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the downstream codon.default=%default")
	parser.add_option("-d","--downstream_codon",action="store",type="int",default=500, dest="downstream_codon",
			help="Downstream codon corresponding to start codon (codon unit). While corresponding to stop codon, it is the upstream codon.default=%default")
	parser.add_option("--table",action="store",type="int",dest="geneticCode",default=1,
			help="Which genetic code to use. default is the standard code.table = 1, is the standard table, ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi")
	return parser