/data/aryee/pub/genomes/bowtie2_index_hg38/hg38
/data/aryee/pub/genomes/bowtie2_index/hg19
/data/aryee/pub/genomes/mm10/bt2/mm10
/data/aryee/pub/genomes/bowtie2_index_mm9/mm9
hg19 reference genom with rCRS mitochondrial genome sequence
/data/aryee/pub/genomes/cellranger/refdata-cellranger-atac-hg19-1.0.0/fasta/hg19_10X
/data/aryee/pub/genomes/star/hg19_chr/
/data/aryee/pub/genomes/mm10/STAR/
/data/molpath/genomes/mm9/STAR_index_mm9/
Just run each sample through a multi-threaded, single job
bsub -q big-multi -n 8 proatac bulk -i fastq -n TF1-BC9-Proatac -o TF1-BC9-Proatac -bi /data/aryee/pub/genomes/cellranger/refdata-cellranger-atac-hg19-1.0.0/fasta/hg19_10X -rg hg19 -c 8
Run samples through multiple jobs
bsub -q big proatac bulk -i fastq -n CLL-BulkATAC_Patient1 -o CLL-BulkATAC_Patient1_11Dec2018 -bi /data/aryee/pub/genomes/cellranger/refdata-cellranger-atac-hg19-1.0.0/fasta/hg19_10X --cluster "bsub -q big -n 4 -o /dev/null" --jobs 6 -rg hg19 -c 24
(bowtie2 -p 4 -x /data/aryee/pub/genomes/bowtie2_index/hg19 -U SRR1593986_1.fastq.gz | samtools view -bS - | samtools sort -@ 4 - -o SRR1593986.all.sorted.bam) 2> SRR1593986.log
(bowtie2 -X 2000 -p 4 -x /data/aryee/pub/genomes/bowtie2_index/hg19 --rg-id $sample -1 "${sample}_1.trim.fastq.gz" -2 "${sample}_2.trim.fastq.gz" | samtools view -bS - | samtools sort -@ 4 - -o "${sample}.all.sorted.bam") 2> "${sample}.log"
#!/bin/bash
fq1=$1
fq2=$2
outname=$3
STAR --runMode alignReads \
--readFilesCommand zcat \
--outFilterMultimapNmax 1 \
--outFilterMatchNmin 35 \
--quantMode GeneCounts \
--twopassMode Basic \
--outFileNamePrefix "counts/${outname}" \
--genomeDir /PHShome/ma695/work/projects/sc_dnmt_ko_RNAalign/refdata-cellranger-GRCh38-1.2.0/star \
--sjdbGTFfile /PHShome/ma695/work/projects/sc_dnmt_ko_RNAalign/refdata-cellranger-GRCh38-1.2.0/genes/genes.gtf \
--readFilesIn $fq1 $fq2
rm "counts/${outname}Aligned.out.sam"
rm -rf "counts/${outname}_STARgenome"
rm -rf "counts/${outname}_STARpass1"
For quantifying hg19 genes and extracting mitochondria:
#!/bin/bash
SRR_IDS=$(cat $1 |tr "\n" " ")
mkdir -p starlogs
mkdir -p mito_bam
mkdir -p counts
for SRR in $SRR_IDS
do
echo $SRR
STAR --runMode alignReads \
--readFilesCommand zcat \
--quantMode GeneCounts \
--genomeDir /data/aryee/pub/genomes/star/hg19_chr/ \
--sjdbGTFfile /data/aryee/caleb/greenleaf_scRNAseq/refdata-cellranger-hg19-1.2.0/genes/gencode.v19.annotation.gtf \
--readFilesIn "fastq/${SRR}_R1_001.fastq.gz" "fastq/${SRR}_R2_001.fastq.gz" \
--outFileNamePrefix "${SRR}"
# Extract Mitochondria
samtools view -H "${SRR}Aligned.out.sam" > "${SRR}.sam"
awk '$3 == "chrM" {print $0}' "${SRR}Aligned.out.sam" >> "${SRR}.sam"
samtools view -Sb "${SRR}.sam" | samtools sort > "mito_bam/${SRR}.mito.bam" && samtools index "mito_bam/${SRR}.mito.bam"
#samtools view "mito_bam/${SRR}.mito.bam" | wc -l > "mito_bam/${SRR}.mitoreads.txt"
mv "${SRR}Log.final.out" "starlogs/${SRR}Log.final.out"
rm "${SRR}Aligned.out.sam"
rm "${SRR}.sam"
rm "${SRR}Log.out"
rm "${SRR}SJ.out.tab"
rm "${SRR}Log.progress.out"
done
Working directory: /data/aryee/caleb/japan_treg/rna
#!/bin/bash
fq1=$1
outname=$2
STAR --runMode alignReads \
--readFilesCommand zcat \
--outFilterMultimapNmax 1 \
--outFilterMatchNmin 35 \
--quantMode GeneCounts \
--twopassMode Basic \
--outFileNamePrefix "counts/${outname}" \
--genomeDir /data/aryee/pub/genomes/star/mm9 \
--sjdbGTFfile /data/aryee/pub/genomes/star/mm9/mm9.gtf \
--readFilesIn $fq1
rm "counts/${outname}Aligned.out.sam"
rm -rf "counts/${outname}_STARgenome"
rm -rf "counts/${outname}_STARpass1"
Align multiple single-end reads to bowtie mm10 reference genome and produce bam files:
SAMPLES="SRR185879 SRR185880"
for SAMPLE in $SAMPLES
do
bowtie2 -x /data/aryee/pub/genomes/mm10/bt2/mm10 -U "${SAMPLE}_1.fastq.gz" | samtools view -bS - > "${SAMPLE}.bam"
done
Note: You canโt just bsub
the alignement with bowtie as some of the arguments will be parsed; stick this puppy in a shell script.