###############################################################################
#
# ______ ____ ____ ____ ____ ______ __ __
# | || \ | || \ | || || | |
# | || D ) | | | _ | | | | || | |
# |_| |_|| / | | | | | | | |_| |_|| ~ |
# | | | \ | | | | | | | | | |___, |
# | | | . \ | | | | | | | | | | |
# |__| |__|\_||____||__|__||____| |__| |____/
#
###############################################################################
#
# Required:
#
# --seqType <string> :type of reads: ( cfa, cfq, fa, or fq )
# --kmer_method <string> :method for k-mer counting. Choose among
# jellyfish, meryl, or inchworm. (and see
# method-specific opts below)
# All kmer counting methods are functionally equivalent, but they
# differ in speed according to: jellyfish > meryl >> inchworm
#
# If paired reads:
# --left <string> :left reads
# --right <string> :right reads
#
# Or, if unpaired reads:
# --single <string> :single reads
#
####################################
## Misc: #########################
#
# --SS_lib_type <string> :Strand-specific RNA-Seq read orientation.
# if paired: RF or FR,
# if single: F or R. (dUTP method = RF)
# See web documentation.
# --output <string> :name of directory for output (will be
# created if it doesn't already exist)
# default( "trinity_out_dir" )
# --CPU <int> :number of CPUs to use, default: 2
# --min_contig_length <int> :minimum assembled contig length to report
# (def=200)
# --jaccard_clip :option, set if you have paired reads and
# you expect high gene density with UTR
# overlap (use FASTQ input file format
# for reads).
# (note: jaccard_clip is an expensive
# operation, so avoid using it unless
# necessary due to finding excessive fusion
# transcripts w/o it.)
# --cite :get the Trinity literature citation and those of tools leveraged within.
#
####################################################
# Inchworm and K-mer counting-related options: #####
#
# --min_kmer_cov <int> :min count for K-mers to be assembled by
# Inchworm (default: 1)
# Jellyfish:
# --max_memory <string> :number of GB of system memory to use for
# k-mer counting by jellyfish (eg. 10G) *include the 'G' char
#
# Meryl:
# --meryl_opts <string> :pass on any meryl-specific options
# directly to meryl. Meryl documentation:
# http://sourceforge.net/apps/mediawiki/kmer/index.php?title=Getting_Started_with_Meryl
#
###################################
# Chrysalis-related options: ######
#
# --min_glue <int> :min number of reads needed to glue two inchworm contigs
# together. (default: 2)
# --min_iso_ratio <float> :min fraction of average kmer coverage between two iworm contigs
# required for gluing. (default: 0.05)
# --glue_factor <float> :fraction of max (iworm pair coverage) for read glue support (default: 0.05)
# --max_reads_per_graph <int> :maximum number of reads to anchor within
# a single graph (default: 20000000)
# --max_reads_per_loop <int> :maximum number of reads to read into
# memory at once (default: 1000000)
# --min_pct_read_mapping <int> :minimum percent of a reads kmers that must map to an
# inchworm bundle (aka. component) default: 0
#
# --no_run_chrysalis :stop Trinity after Inchworm and before
# running Chrysalis
# --no_run_quantifygraph :stop Trinity just before running the
# parallel QuantifyGraph computes, to
# leverage a compute farm and massively
# parallel execution..
#####################################
### Butterfly-related options: ####
#
# --bfly_opts <string> :additional parameters to pass through to butterfly
# (see butterfly documentation).
# --max_number_of_paths_per_node <int> :only most supported (N) paths are extended from node A->B,
# mitigating combinatoric path explorations. (default: 10)
# --group_pairs_distance <int> :maximum length expected between fragment pairs (default: 500)
#
# --path_reinforcement_distance <int> :minimum overlap of reads with growing transcript
# path (default: 75)
#
# --lenient_path_extension :require minimal read overlap to allow for path extensions.
# (equivalent to --path_reinforcement_distance=1)
#
# --bflyHeapSpaceMax <string> :java max heap space setting for butterfly
# (default: 20G) => yields command
# 'java -Xmx20G -jar Butterfly.jar ... $bfly_opts'
# --bflyHeapSpaceInit <string> :java initial hap space settings for
# butterfly (default: 1G) => yields command
# 'java -Xms1G -jar Butterfly.jar ... $bfly_opts'
# --bflyGCThreads <int> :threads for garbage collection
# (default, not specified, so java decides)
# --bflyCPU <int> :CPUs to use (default will be normal
# number of CPUs; e.g., 2)
# --bflyCalculateCPU :Calculate CPUs based on 80% of max_memory
# divided by maxbflyHeapSpaceMax
# --no_run_butterfly :stops after the Chrysalis stage. You'll
# need to run the Butterfly computes
# separately, such as on a computing grid.
# Then, concatenate all the Butterfly assemblies by running:
# 'find trinity_out_dir/ -name "*allProbPaths.fasta"
# -exec cat {} + > trinity_out_dir/Trinity.fasta'
#
#################################
# Grid-computing options: #######
#
# --grid_computing_module <string> : Perl module in /Users/bhaas/sVN/trinityrnaseq/trunk/PerlLibAdaptors/
# that implements 'run_on_grid()'
# for naively parallel cmds. (eg. 'BroadInstGridRunner')
#
#
###############################################################################
#
# *Note, a typical Trinity command might be:
# Trinity.pl --seqType fq --kmer_method jellyfish --max_memory 100G --left reads_1.fq --right reads_2.fq --CPU 6
# Or, using Meryl
# Trinity.pl --seqType fq --kmer_method meryl --left reads_1.fq --right reads_2.fq --CPU 6
#
# see: /Users/bhaas/sVN/trinityrnaseq/trunk/sample_data/test_Trinity_Assembly/
# for sample data and 'runMe.sh' for example Trinity execution
# For more details, visit: http://trinityrnaseq.sf.net
#
###############################################################################