#!/usr/bin/runAmos -C
## Validate an assembly in AMOS bank format

#?
#? Amosvalidate is a pipeline to automatically detect mis-assemblies. It takes
#? an AMOS bank as input and calculates a battery of known and novel assembly
#? quality metrics such as matepair happiness, correlated SNP detection, read
#? coverage, singleton breakpoint analysis and kmer composition. The output can
#? be visualized with Hawkeye.
#?
#? Usage:
#?     amosvalidate AMOS_BANK [options]
#?
#? Options:
#?     -D CLEAR_RANGE=<n>   Use the clear range of the reads? (0:no 1:yes,
#?                            default: 1)
#?

BINDIR=/usr/bin
NUCMER=:

# One mandatory argument is expected
EXPECT 1

PREF   = $(strip .bnk PREFIX)
BANK   = $(PREF).bnk
INPUTS = $(BANK)

# Default parameters
CLEAR_RANGE = 1

## Creating FastA
100: $(BINDIR)/bank2fasta -b $(BANK) > $(PREF).fasta

## Analyzing mate-pairs
300: $(BINDIR)/asmQC -b $(BANK) -scaff -recompute -update -numsd 2
310: $(BINDIR)/asmQC -b $(BANK) -scaff -recompute -update -numsd 2
320: $(BINDIR)/asmQC -b $(BANK) -scaff -feat -numsd 3 -shortcvg -1 -longcvg -1
330: $(BINDIR)/cestat-cov -i -f 4 $(BANK) > $(PREF).ce.feat
340: $(BINDIR)/loadFeatures -i $(BANK) $(PREF).ce.feat

## Analyzing SNPs
400: $(BINDIR)/analyzeSNPs -i -b $(BANK) -S -cumqv 40 -minsnps 2 -r -H > $(PREF).snps
410: $(BINDIR)/clusterSnps $(PREF).snps > $(PREF).snp.feat
420: $(BINDIR)/loadFeatures -i $(BANK) $(PREF).snp.feat

## Analyzing read coverage
500: $(BINDIR)/analyze-read-depth -i $(BANK) -c 1000 -x 3 > $(PREF).depth.feat
510: $(BINDIR)/loadFeatures -i $(BANK) $(PREF).depth.feat

## Align singleton reads
600: $(BINDIR)/listReadPlacedStatus -S -E $(BANK) > $(PREF).singletons
CR_CMD = $(shell if [ $(CLEAR_RANGE\) -eq 0 ]; then /bin/echo -n '-r'; fi)
610: $(BINDIR)/dumpreads $(CR_CMD) -E $(PREF).singletons $(BANK) > $(PREF).singletons.seq
620: $(NUCMER) --prefix=$(PREF) $(PREF).fasta $(PREF).singletons.seq

## Analyzing singleton alignment breakpoints
700: $(BINDIR)/casm-breaks -b $(BANK) -t 100 -c 2 -F $(PREF).break.fea $(PREF).delta 
710: $(BINDIR)/bank-transact -b $(BANK) -m $(PREF).break.fea

## Analyzing kmer-coverage
800: $(BINDIR)/count-kmers -n $(BANK) -m 22 > $(PREF).22.n22mers
810: $(BINDIR)/kmer-cov -A -F -L 1000 $(PREF).22.n22mers < $(PREF).fasta > $(PREF).nkmer.feat
820: $(BINDIR)/loadFeatures -u -i $(BANK) $(PREF).nkmer.feat

## Combine features
1000: $(BINDIR)/dumpFeatures -i $(BANK) | sort -k1 -nk3 > $(PREF).all.feat
1010: $(BINDIR)/suspiciousfeat2region $(PREF).all.feat > $(PREF).suspicious.feat
1020: $(BINDIR)/loadFeatures -i $(BANK) $(PREF).suspicious.feat
