Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...


If you have paired-end umi-tagged fastqs, you can run the ACCESS fastq to bam workflow with the following steps
#python3-conda-virtualenv
conda create --name my_project python=3.9
conda activate my_project#python3-conda-virtualenv
conda create --name my_project python=3.9
conda activate my_project#bash-prompt-example
(my_project)[server]$git clone --recursive --branch 3.0.4 https://github.com/msk-access/chip-var.git#python3
cd chip-var
pip3 install -r requirements.txtmodule load singularityconda install -c conda-forge nodejs$ cwltool --make-template chip-var.cwl > inputs.yamlcwltool chip-var.cwl inputs.yamlusage: chip-var.cwl [-h] --reference_fasta REFERENCE_FASTA --input_bam_case INPUT_BAM_CASE
[--bedfile BEDFILE] --sample_name SAMPLE_NAME
[--vardict_allele_frequency_threshold VARDICT_ALLELE_FREQUENCY_THRESHOLD]
[--retain_info RETAIN_INFO] --concat_output_name CONCAT_OUTPUT_NAME
[--vardict_output_vcf_name VARDICT_OUTPUT_VCF_NAME]
--input_cosmicprevalenceDB_vcf INPUT_COSMICPREVALENCEDB_VCF
--input_cosmicCountDB_vcf INPUT_COSMICCOUNTDB_VCF
[--snpsift_prevalOpName SNPSIFT_PREVALOPNAME]
[--snpsift_countOpName SNPSIFT_COUNTOPNAME] --input_complexity_bed
INPUT_COMPLEXITY_BED
[--output_complexity_filename OUTPUT_COMPLEXITY_FILENAME]
[--column_name_complexity COLUMN_NAME_COMPLEXITY] --oncoKbApiToken
ONCOKBAPITOKEN --opOncoKbMafName OPONCOKBMAFNAME
[--output_vcf2mafName OUTPUT_VCF2MAFNAME] --input_mappability_bed
INPUT_MAPPABILITY_BED
[--output_mappability_filename OUTPUT_MAPPABILITY_FILENAME]
[--column_name_mappability COLUMN_NAME_MAPPABILITY]
--input_47kchpd_tsv_file INPUT_47KCHPD_TSV_FILE --input_hotspot_tsv_file
INPUT_HOTSPOT_TSV_FILE --input_panmeloid_tsv_file INPUT_PANMELOID_TSV_FILE
[job_order]
chip-var
positional arguments:
job_order Job input json file
options:
-h, --help show this help message and exit
--reference_fasta REFERENCE_FASTA
--input_bam_case INPUT_BAM_CASE
--bedfile BEDFILE
--sample_name SAMPLE_NAME
--vardict_allele_frequency_threshold VARDICT_ALLELE_FREQUENCY_THRESHOLD
--retain_info RETAIN_INFO
--concat_output_name CONCAT_OUTPUT_NAME
--vardict_output_vcf_name VARDICT_OUTPUT_VCF_NAME
--input_cosmicprevalenceDB_vcf INPUT_COSMICPREVALENCEDB_VCF
--input_cosmicCountDB_vcf INPUT_COSMICCOUNTDB_VCF
--snpsift_prevalOpName SNPSIFT_PREVALOPNAME
--snpsift_countOpName SNPSIFT_COUNTOPNAME
--input_complexity_bed INPUT_COMPLEXITY_BED
--output_complexity_filename OUTPUT_COMPLEXITY_FILENAME
--column_name_complexity COLUMN_NAME_COMPLEXITY
--oncoKbApiToken ONCOKBAPITOKEN
--opOncoKbMafName OPONCOKBMAFNAME
--output_vcf2mafName OUTPUT_VCF2MAFNAME
--input_mappability_bed INPUT_MAPPABILITY_BED
--output_mappability_filename OUTPUT_MAPPABILITY_FILENAME
--column_name_mappability COLUMN_NAME_MAPPABILITY
--input_47kchpd_tsv_file INPUT_47KCHPD_TSV_FILE
--input_hotspot_tsv_file INPUT_HOTSPOT_TSV_FILE
--input_panmeloid_tsv_file INPUT_PANMELOID_TSV_FILEtoil-cwl-runner chip-var.cwl inputs.yamlTMPDIR=$PWD
TOIL_LSF_ARGS='-W 3600 -P test_nucleo -app anyOS -R select[type==CentOS7]'
_JAVA_OPTIONS='-Djava.io.tmpdir=/scratch/'
SINGULARITY_BINDPATH='/scratch:/scratch:rw'
toil-cwl-runner \
--singularity \
--logFile ./example.log \
--jobStore ./example_jobStore \
--batchSystem lsf \
--workDir ./example_working_directory/ \
--outdir $PWD \
--writeLogs ./example_log_folder/ \
--logLevel DEBUG \
--stats \
--retryCount 2 \
--disableCaching \
--disableChaining \
--preserve-environment TOIL_LSF_ARGS TMPDIR \
--maxLogFileSize 20000000000 \
--cleanWorkDir onSuccess \
chip-var.cwl \
inputs.yaml \
> toil.stdout \
2> toil.stderr &reference_fasta:
class: File
path: >-
/juno/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta
bedfile:
class: File
path: >-
/work/bergerm1/bergerlab/charalk/projects/nucleo_qc/qc_generation_testing/CH_target_3bp.bed
input_bam_case:
class: File
path: >-
/path/to/bam/file.bam
input_cosmicCountDB_vcf:
class: File
path: >-
/work/cch/production/resources/cosmic/versions/v96/CosmicCodingMuts.vcf.gz
input_cosmicprevalenceDB_vcf:
class: File
path: >-
/work/cch/production/resources/cosmic/versions/v96/CosmicCodingMuts_GRCh37_processed.vcf.gz
input_complexity_bed:
class: File
path: >-
/work/bergerm1/bergerlab/sivaprk/chipvar_resources/rmsk_mod.bed
input_mappability_bed:
class: File
path: >-
/work/bergerm1/bergerlab/sivaprk/chipvar_resources/wgEncodeDacMapabilityConsensusExcludable_4cols.bed
oncoKbApiToken:
class: File
path: >-
/work/bergerm1/bergerlab/sivaprk/chipvar_resources/apiToken.txt
input_47kchpd_tsv_file:
class: File
path: >-
/work/bergerm1/bergerlab/sivaprk/chipvar_resources/chpd47k_prevalence.tsv
input_hotspot_tsv_file:
class: File
path: >-
/work/bergerm1/bergerlab/sivaprk/chipvar_resources/hotspots_47kchpd.tsv
input_panmeloid_tsv_file:
class: File
path: >-
/work/bergerm1/bergerlab/sivaprk/chipvar_resources/pan_myeloid_ks.tsv
opOncoKbMafName: sampleName_oncokb.maf
output_complexity_filename: sampleName_complexity.maf
output_mappability_filename: sampleName_mappability.maf
output_vcf2mafName: sampleName_vcf2maf.maf
concat_output_name: sampleName_concat.vcf.gz
retain_info: CNT,TUMOR_TYPE
sample_name: sampleName
vardict_allele_frequency_threshold: 0
vardict_output_vcf_name: sampleName_vardict.vcf
snpsift_countOpName: sampleName_snpsift_cosmic.vcf
snpsift_prevalOpName: sampleName_snpsift_preval.vcf
column_name_complexity: complexity
column_name_mappability: mapabilityFiles present after workflow is finished
/usr/bin/vardict/bin/VarDict -f "0" \
-c "1" \
-g "5" \
-E "3" \
-S "2" \
-G /path/to/reference_fasta.ext \
-N sample_name \
-b /path/to/bam/file.bam \
-bedfile /path/to/bedfile.ext \
> vardict_app_output.vcfRscript /usr/bin/vardict/bin/teststrandbias.R > output_teststrandbias.var \
< /path/to/input_vardict.varperl /usr/bin/vardict/bin/var2vcf_valid.pl \\
-N sample_name
-f 0
> output_vcf_name
< /path/to/input_vardict.varpv vardict single filter -i /path/to/input_vardict.vcf \
--tsampleName sample-name \
-ad 1 \
-fg false \
-mq 0 \
-tnr 1 \
-dp 20 \
-vf 5e-05 bgzip -c /path/to/input_single_filter.vcf > sample_name.vcf.gztabix -p vcf sample_name.vcf.gz #BCFTOOLS SORT
bcftools sort -O z -o sample_name_sorted.vcf.gz sample_name.vcf.gzbcftools norm --check-ref s \
-m + \
-O z \
-o sample_name_norm.vcf.gz \
-f /path/to/ref_fasta.fa \
sample_name_sorted.vcf.gzbcftools concat -a -O z \
-o sample_name_merged.vcf \
sample_name_sorted.vcf.gz sample_name_complex_sorted.vcf.gzjava -jar /snpEff/SnpSift.jar annotate -c snpeff.config \
/cosmicData/CosmicCodingMuts.vcf.gz sample_name_merged.vcf > sample_name_prevalence.vcfperl /opt/vcf2maf-1.6.21/vcf2maf.pl \
--output-maf sample_name_vcf2maf.maf \
--custom-enst /regions_of_interest/current/dmp_ACCESS-panelA-v1-isoform-overrides \
-maf-center mskcc.org \
--min-hom-vaf 0.7 \
--ncbi-build GRCh37 \
--ref-fasta /path/to/ref_fasta.fa \
--retain-info "set,TYPE,FAILURE_REASON" \
--species homo_sapiens_merged \
--tumor-id sample_name \
--vcf-tumor-id sample_name \
--vep-path /usr/local/bin/ \
--vep_data /.vep/ \
--input-vcf sample_name_snpsift.vcfpython3 /oncokb/MafAnnotator.py -o sample_name_oncokb.maf \
-b /path/to/API_toke.txt \
-a TRUE \
-i sample_name_vcf2maf.mafpv maf annotate mafbybed -m sample_name_oncokb.maf \
-b /path/to/bed_file.bed \
-c "column_name" \
-o sample_name_mafbybed.mafpv maf annotate mafbytsv -m sample_name_mafbybed.maf \
-b /path/to/tsv_file.tsv \
-c "column_name" \
-o sample_name_mafbytsv.mafpv maf tag cmo_ch -m sample_name_mafbytsv.maf \
-o sample_name_maf_tagged.mafpv maf filter cmo_ch -m sample_name_maf_tagged.maf \
-o sample_name_maf_filter.mafVersions of tools in order of process