欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

NGS 分析流程 (二)

程序员文章站 2024-03-02 22:38:16
...

一、Call Somatic

1.将bed分割,以提高效率

代码如下(示例):

ref_fa=$1 # b37_Homo_sapiens_assembly19.fasta
target_bed=$2  
split_region_count=$3  # 5
out_path=$4  #  split_interval

gatk SplitIntervals -R ${ref_fa} -L ${target_bed} \
-scatter ${split_region_count} -O ${out_path}

2. Mutect2: Call Somatic

根据上一个的 interval_list 的个数,对 $8 进行赋值;
或者,外层使用 for 循环(示例):

tumor_recal_bam=$1   
ctl_recal_bam=$2 
normal_name=$3  
ref_fa=$4  
out_raw_vcf=$5  
f1r2_stat=$6    
pileup_stat=$7
splited_interval_list=$8   # *.bed  "-L '../data_test/split_interval/0000-scattered.interval_list' -L '../data_test/split_interval/0001-scattered.interval_list' -L '../data_test/split_interval/0002-scattered.interval_list' -L '../data_test/split_interval/0003-scattered.interval_list' -L '../data_test/split_interval/0004-scattered.interval_list']"
task_cpus=$9
core_hotspots_conf='core_hotspots_v1.0.vcf'
b37_af-only-gnomad.raw.sites.vcf='b37_af-only-gnomad.raw.sites.vcf'
b37_custom_pon.vcf='b37_custom_pon.vcf'
b37_small_exac_common_3.vcf='b37_small_exac_common_3.vcf'


gatk Mutect2 -I ${tumor_recal_bam} -I ${ctl_recal_bam} -normal ${normal_name} \
-R ${ref_fa} -alleles ${core_hotspots_conf} \
-germline-resource ${b37_af-only-gnomad.raw.sites.vcf} \
-pon ${b37_custom_pon.vcf} \
--f1r2-tar-gz ${f1r2_stat} -L ${splited_interval_list} \
--native-pair-hmm-threads ${task_cpus} -O ${out_raw_vcf} \
--interval-padding 20 \
--max-reads-per-alignment-start 500 \
--dont-use-soft-clipped-bases true

gatk GetPileupSummaries -I ${tumor_recal_bam} -V ${b37_small_exac_common_3.vcf} \
--interval-set-rule UNION -L $splited_interval_list -O ${pileup_stat}

3.整合过滤

代码如下(示例):

sample_id=$1   
path=$2    #/home/data_test/gatk4/mutect2/
ref_fa=$3   #b37_Homo_sapiens_assembly19.fasta
ref_fa_dict="b37_Homo_sapiens_assembly19.dict"

vcf_items=''
stats_items=''
f1r2_items=""     
pileup_items=""   
for i in `ls ${path}/*vcf | grep -v _merged`
do
    name=${i%.*}
    vcf_items="$vcf_items -I $i"
    stats_items="$stats_items -stats ${i}.stats"
    f1r2_items="$f1r2_items -I ${name}.f1r2_stat.tar.gz"
    pileup_items="$pileup_items -I ${name}.pileup.table"

done

gatk MergeVcfs  ${vcf_items} -O ${path}/${sample_id}.mutect2_merged.vcf
gatk MergeMutectStats  ${stats_items} -O ${path}/${sample_id}.mutect2_merged.vcf.stats
gatk LearnReadOrientationModel  ${f1r2_items} -O ${path}/${sample_id}.read-orientation-model.tar.gz
gatk GatherPileupSummaries  ${pileup_items} --sequence-dictionary ${b37_Homo_sapiens_assembly19.dict} -O ${path}/${sample_id}.getpileupsummaries.table
 
gatk CalculateContamination -I ${path}/${sample_id}.getpileupsummaries.table \
           -tumor-segmentation ${path}/${sample_id}.segments.table \
           -O ${path}/${sample_id}.contamination.table

gatk FilterMutectCalls -R ${ref_fa} -V ${path}/${sample_id}.mutect2_merged.vcf 
--tumor-segmentation ${path}/${sample_id}.segments.table \
--contamination-table ${path}/${sample_id}.contamination.table \
--ob-priors ${path}/${sample_id}.read-orientation-model.tar.gz \
--f-score-beta 1.5 \
--threshold-strategy OPTIMAL_F_SCORE \
-O ${path}/${sample_id}.mutect2_filter_marked.vcf

二、Call Germline

代码如下(示例):

ctl_recal_bam=$1
ref_fa=$2
target_bed=$3
germline_vcf=$4

dbsnp_138.b37.vcf='dbsnp_138.b37.vcf'
chemotherapy_rs_vcf="chemotherapy_rs_v1.0.vcf"


gatk HaplotypeCaller -I ${ctl_recal_bam} -R ${ref_fa} \
-D ${dbsnp_138.b37.vcf} --alleles ${chemotherapy_rs_vcf} \
-L ${target_bed} --dont-use-soft-clipped-bases true -stand-call-conf 10 \
-A AlleleFraction -O ${germline_vcf}


相关标签: NGS