==Parameter settings for variant calling== In general, we use parameters recommended for best practice as provided by each algorthitm. Here are example commands for the Illumina HiSeq 2000 platform: -------- Aligning For BWA-mem bwa mem -M -t (#ofProcessor) -R 'Read group' -p human_g1k_v37.fasta DataSet_1.fastq DataSet_2.fastq > DataSet.sam For Bowtie2 bowtie2 -p (#ofProcessor) -x human_g1k_v37 -1 DataSet_1.fastq -2 DataSet_2.fastq -S DataSet.sam --rg-id Readgroup ID --rg XX:ReadGroups For novoalign novoalign -d human_g1k_v37.nix -f Dataset_1.fastq Dataset_2.fastq -o SAM -c (#ofProcessor) -t 20,3.5 -H 15 --hlimit 8 --------- BQSR *Bamfile index java -jar -Xmx16g picard.jar BuildBamIndex $bamindexJar INPUT=bamfile *Markduplicate java -jar -Xmx16g picard.jar MarkDuplicates INPUT=bamfile OUTPUT='Markduplicated Bam file' METRICS_FILE=metrics.txt REMOVE_DUPLICATES=true CREATE_INDEX=true VALIDATION_STRINGENCY=LENIENT *indel realigner java $Memopt -jar picard.jar ReorderSam INPUT='Markduplicated Bam file' OUTPUT=$orderfile REFERENCE=human_g1k_v37.fasta java -jar -Xmx16g picard.jar BuildBamIndex INPUT=$orderfile java -jar -Xmx16g GenomeAnalysisTK.jar -T RealignerTargetCreator -R human_g1k_v37.fasta -I $orderfile -known 1000G_phase1.indels.b37.vcf -known Mills_and_1000G_gold_standard.indels.b37.vcf -o $intervalfile java -jar -Xmx16g GenomeAnalysisTK.jar -T IndelRealigner -R human_g1k_v37.fasta -I $orderfile -targetIntervals $intervalfile -known 1000G_phase1.indels.b37.vcf -known Mills_and_1000G_gold_standard.indels.b37.vcf -o $realignedbam *base recalibrator java -jar -Xmx16g GenomeAnalysisTK.jar -T BaseRecalibrator -R human_g1k_v37.fasta -I $realignedbam -knownSites dbsnp_138.b37.vcf -knownSites 1000G_phase1.indels.b37.vcf -knownSites Mills_and_1000G_gold_standard.indels.b37.vcf -o $recaldata --solid_nocall_strategy PURGE_READ --solid_recal_mode SET_Q_ZERO_BASE_N java -jar -Xmx16g GenomeAnalysisTK.jar -T BaseRecalibrator -R human_g1k_v37.fasta -I $realignedbam -knownSites dbsnp_138.b37.vcf -knownSites 1000G_phase1.indels.b37.vcf -knownSites Mills_and_1000G_gold_standard.indels.b37.vcf -BQSR $recaldata -o $postrecaldata --solid_nocall_strategy PURGE_READ --solid_recal_mode SET_Q_ZERO_BASE_N java -jar -Xmx16g GenomeAnalysisTK.jar -T AnalyzeCovariates -R human_g1k_v37.fasta -before $recaldata -after $postrecaldata -plots $recalpdf java -jar $Memopt GenomeAnalysisTK.jar -T PrintReads -R human_g1k_v37.fasta -I $realignedbam -BQSR $recaldata -o 'BQSRed Bam file' --------- Basecalling For Samtools samtools mpileup -ugf human_g1k_v37.fasta 'BQSRed Bam file' | bcftools call -vmO z -o Output.vcf For Freebayes freebayes --fasta-reference human_g1k_v37.fasta 'Markduplicated Bam file' > Output.vcf For GATK HC java -Xmx16g -jar $GATKjar -R human_g1k_v37.fasta -T HaplotypeCaller -I 'BQSRed Bam file' --genotyping_mode DISCOVERY --dbsnp dbsnp_138.b37.vcf -o Output.vcf