#!/usr/bin/runAmos -C
#--------------------------------------- USAGE --------------------------------#

#?
#? Minimo is a de novo assembler based on the AMOS infrastructure. Minimo uses a
#? conservative overlap-layout-consensus algorithm to avoid mis-assemblies and
#? can be applied to short reads. The input is a FASTA file and there are options
#? to control the stringency of the assembly and the processing of the quality
#? scores. By default, the results are in the AMOS format and written to the
#? directory where the input FASTA file is located.
#?
#? Usage:
#?     Minimo FASTA_IN [options]
#?
#? Options:
#?     -D QUAL_IN=<file>   Input quality score file (in Phred format)
#?     -D GOOD_QUAL=<n>    Quality score to set for bases within the clear
#?                           range if no quality file was given (default: 30)
#?     -D BAD_QUAL=<n>     Quality score to set for bases outside clear range
#?                           if no quality file was given (default: 10). If your
#?                           sequences are trimmed, try the same value as GOOD_QUAL.
#?     -D MIN_LEN=<n>      Minimum contig overlap length (at least 20 bp, 
#?                           default: 35)
#?     -D MIN_IDENT=<d>    Minimum contig overlap identity percentage (between 0
#?                           and 100 %, default: 98)
#?     -D ALN_WIGGLE=<d>   Alignment wiggle value (from 2 for short reads to 15 for
#?                           long reads, default: 2)
#?     -D FASTA_EXP=<n>    Export results in FASTA format (0:no 1:yes, default: 0)
#?     -D ACE_EXP=<n>      Export results in ACE format (0:no 1:yes, default: 0)
#?     -D OUT_PREFIX=<s>   Prefix to use for the output file path and name
#?
#? Minimo v1.6. Copyright Florent Angly 2010. Under the GPL v3 open-source license.
#?

BINDIR=/usr/bin

# --------------------------------------- ARGUMENTS AND DEFAULTS ---------------#

# Expecting one mandatory argument, the FASTA filename
EXPECT 1
FASTA_IN   = $(PREFIX)

# Defaults parameters
QUAL_IN    = ""
GOOD_QUAL  = 30
BAD_QUAL   = 10
MIN_LEN    = 35
MIN_IDENT  = 98
ALN_WIGGLE = 2
FASTA_EXP  = 0
ACE_EXP    = 0
OUT_PREFIX = ""

# Verbosity level (from 0 to ??)
VERBOSE    = 0

# File directory and basename (strip FASTA filename of its extensions)
FASTA_BASE = $(shell /bin/echo -n `dirname $(FASTA_IN\)`/`basename $(FASTA_IN\) | cut -d'.' -f1`)

# Output prefix determined from user input if specified, from the fasta file otherwise
BASE       = $(shell if [ "$(OUT_PREFIX\)" != "" ]; then /bin/echo -n $(OUT_PREFIX\); else /bin/echo -n $(FASTA_BASE\); fi)

# Convert percentage of identity between overlaps from % to fractional
MAX_OVL_ERROR  = $(shell /bin/echo -n `/bin/echo "scale=3; 1 - $(MIN_IDENT\) / 100" | bc`)

# The error in the consensus sequence is twice the overlap error to allow
# for independent errors from different sequences
MAX_CONS_ERROR = $(shell /bin/echo -n `/bin/echo "scale=3; $(MAX_OVL_ERROR\) * 2" | bc`)

#------------------------------------ FILENAMES ------------------------------#

BANK        = $(BASE).bnk
SINGLE_LST  = $(BASE).lst
FASTA_OUT   = $(BASE)-contigs.fa
QUAL_OUT    = $(BASE)-contigs.qual
AMOS_OUT    = $(BASE)-contigs.afg
ACE_OUT     = $(BASE)-contigs.ace

## Printing parameters
10: /bin/echo "Running Minimo with:\nFASTA_IN='$(FASTA_IN)'\nQUAL_IN='$(QUAL_IN)'\nGOOD_QUAL='$(GOOD_QUAL)'\nBAD_QUAL='$(BAD_QUAL)'\nMIN_LEN='$(MIN_LEN)'\nMIN_IDENT='$(MIN_IDENT)'\nMAX_OVL_ERROR='$(MAX_OVL_ERROR)'\nMAX_CONS_ERROR='$(MAX_CONS_ERROR)'\nALN_WIGGLE='$(ALN_WIGGLE)'\nFASTA_EXP='$(FASTA_EXP)'\nBASE='$(BASE)'\nFASTA_OUT='$(FASTA_OUT)'\nQUAL_OUT='$(QUAL_OUT)'\nAMOS_OUT='$(AMOS_OUT)'\nACE_OUT='$(ACE_OUT)'\nVERBOSE='$(VERBOSE)'"

# Required input files
INPUTS      = $(FASTA_IN)

# Temp files
TEMPS       = $(BANK) $(SINGLE_LST)

# Output files
OUTPUTS     = $(FASTA_OUT) $(QUAL_OUT) $(AMOS_OUT) $(ACE_OUT)

#----------------------------------- IMPORT SEQUENCES -------------------------#

## Creating output directory
20: if [ ! -d `dirname $(BASE)` ]; then mkdir `dirname $(BASE)`; fi

## Warning about pre-existing AMOS bank
30: if [ -d $(BANK) ]; then /bin/echo "Warning: overwriting existing AMOS bank $(BANK)"; fi;

## Importing the FASTA and QUAL files to an AMOS bank
40: if [ "$(QUAL_IN)" != "" ]; then $(BINDIR)/toAmos -s $(FASTA_IN) -q $(QUAL_IN) -gq $(GOOD_QUAL) -bq $(BAD_QUAL) -o - | $(BINDIR)/bank-transact -c -z -f -b $(BANK) -m -; else $(BINDIR)/toAmos -s $(FASTA_IN) -gq $(GOOD_QUAL) -bq $(BAD_QUAL) -o - | $(BINDIR)/bank-transact -c -z -f -b $(BANK) -m -; fi

#----------------------------------- ASSEMBLE ---------------------------------#

## Calculating overlaps between reads
# The minimizers (approximate k-mers) considered are 20 bp long.
# Default minimum overlap length is 40 bp.
# Default maximum overlap error is 0.06 (6 %).
50: $(BINDIR)/hash-overlap -B -o $(MIN_LEN) -x $(MAX_OVL_ERROR) -v $(VERBOSE) $(BANK)

## Forming contigs
60: $(BINDIR)/tigger -b $(BANK)

## Determining contig consensus sequences
# Default fraction of errors allowed in alignments 0.06 (6 %).
# Default minimum number of bases strings in multialignment must overlap is 5 bp.
70: $(BINDIR)/make-consensus -b -B -o $(MIN_LEN) -e $(MAX_CONS_ERROR) -w $(ALN_WIGGLE) -v $(VERBOSE) $(BANK)

#----------------------------------- EXPORT SEQUENCES AND CONTIGS -------------#

## Exporting results to an AMOS file
#...all objects are exported: universal UNV, contig link CTL, contig edge CTE, fragment FRG, k-mer KMR,  library LIB, matepair MTP, overlap OVL, scaffold SCF, sequence SEQ, contig CTG, read RED, distribution DST, tile TLE
80: $(BINDIR)/bank-report -b $(BANK) > $(AMOS_OUT)

## Exporting contigs to a FASTA and QUAL file
#...using IIDs (EIDs are undefined for contigs) and contig details (num reads, coverage) on fasta header line
90: if [ $(FASTA_EXP) -eq 1 ]; then $(BINDIR)/bank2fasta -b $(BANK) -iid -q $(QUAL_OUT) -d > $(FASTA_OUT); fi
## Listing singletons IDs
91: if [ $(FASTA_EXP) -eq 1 ]; then $(BINDIR)/listReadPlacedStatus -S -I $(BANK) > $(SINGLE_LST); fi
## Exporting singletons to the FASTA file
#...use sequence full range and use EIDs (not IIDs to avoid conflict with contigs IIDs)
92: if [ $(FASTA_EXP) -eq 1 ]; then $(BINDIR)/dumpreads -r -e -I $(SINGLE_LST) $(BANK) >> $(FASTA_OUT); fi
## Exporting singletons to the QUAL file
#...use qualities full range and use EIDs (not IIDs to avoid conflict with contigs IIDs)
93: if [ $(FASTA_EXP) -eq 1 ]; then $(BINDIR)/dumpreads -r -e -q -I $(SINGLE_LST) $(BANK) >> $(QUAL_OUT); fi

## Export results to an ACE file
100: if [ $(ACE_EXP) -eq 1 ]; then $(BINDIR)/amos2ace $(AMOS_OUT) -o $(ACE_OUT); fi

#----------------------------------- CLEAN UP ---------------------------------#

## Removing temporary files and folders
110: rm -rf $(TEMPS)
