cores_choices = [1, 2, 4]

chromap = expand(
    "output/result.chromap.{cores}.pairs",
    cores=cores_choices,
)
juicer = expand(
    "output/result.juicer.{cores}.pairs",
    cores=cores_choices,
)
hicexplorer = expand(
    "output/result.hicexplorer.{cores}.cool",
    cores=cores_choices,
)
fanc_bwa = expand(
    "output/result.fanc_bwa.{cores}.pairs",
    cores=cores_choices,
)
fanc_bowtie = expand(
    "output/result.fanc_bowtie2.{cores}.pairs",
    cores=cores_choices,
)
hicpro = expand(
    "output/result.hicpro.{cores}.pairs",
    cores=cores_choices,
)
tadbit = expand(
    "output/result.tadbit.{cores}.reads",
    cores=cores_choices,
)
tadbit_bowtie = expand(
    "output/result.tadbit_bowtie2.{cores}.reads",
    cores=cores_choices,
)
pairtools = expand(
    "output/result.pairtools.{cores}.pairs",
    cores=cores_choices,
)
pairtools_bwamem2 = expand(
    "output/result.pairtools_bwamem2.{cores}.pairs",
    cores=cores_choices,
)

# mapping only:
bowtie = expand(
    "output/result.bowtie.{cores}.sam",
    cores=cores_choices,
)
bwamem = expand(
    "output/result.bwamem.{cores}.sam",
    cores=cores_choices,
)
bwamem2 = expand(
    "output/result.bwamem2.{cores}.sam",
    cores=cores_choices,
)

rule all:
    input:
        lambda wildcards: tadbit + tadbit_bowtie + bowtie + bwamem2 + pairtools + pairtools_bwamem2 + chromap + hicpro + fanc_bowtie + fanc_bwa + hicexplorer
    # + bowtie + bwamem + bwamem2
    # + juicer
    # + pairtools + pairtools_bwamem2 + chromap + hicpro + fanc_bowtie + fanc_bwa + hicexplorer

# hicexplorer # heavy because it creates coolers
# juicer # run separately with the number of cores equal to tested, b/c multiplw juicers cannot be run with the same path

rule test:
    input:
        fastq1="data/SRR6107789_1.fastq.gz",
        fastq2="data/SRR6107789_2.fastq.gz",
        genomefile="data/hg38/hg38.fa",
        chromsizes="data/hg38/hg38.fa.sizes",
        genome_index_bwa="data/hg38/index/bwa/hg38.fa",
        genome_index_chromap="data/hg38/index/chromap/hg38",
        genome_index_bwamem2="data/hg38/index/bwa-mem2/hg38",
        genome_index_bowtie2="data/hg38/index/bowtie2/hg38",
        genome_index_gem="data/hg38/index/gem/hg38.gem",
        genome_rsites="data/hg38/hg38.DpnII.bed",
    threads: lambda wildcards: int(wildcards.cores),
    output:
        file="output/result.{mode}.{cores}.{format}",
    benchmark:
        repeat(
            "benchmarks/result.{mode}.{cores}.{format}.benchmark",
            5,
        )
    run:
        if wildcards.mode == "pairtools_bwamem2":
            shell("""
                soft/bwa-mem2/bwa-mem2 mem -t {wildcards.cores} -SP {input.genome_index_bwamem2} {input.fastq1} {input.fastq2} | \
                    soft/pairtools1.0.2/bin/pairtools parse --nproc-in {wildcards.cores} --nproc-out {wildcards.cores} --drop-sam --drop-seq -c {input.chromsizes} | \
                    soft/pairtools1.0.2/bin/pairtools sort --nproc {wildcards.cores} | \
                    soft/pairtools1.0.2/bin/pairtools dedup -p {wildcards.cores} --chunksize 1000000 \
                    -o {output.file}
                """)
        elif wildcards.mode == "pairtools":
            shell("""
                soft/pairtools1.0.2/bin/bwa mem -t {wildcards.cores} -SP {input.genome_index_bwa} {input.fastq1} {input.fastq2} | \
                    soft/pairtools1.0.2/bin/pairtools parse --nproc-in {wildcards.cores} --nproc-out {wildcards.cores} --drop-sam --drop-seq -c {input.chromsizes} | \
                    soft/pairtools1.0.2/bin/pairtools sort --nproc {wildcards.cores} | \
                    soft/pairtools1.0.2/bin/pairtools dedup -p {wildcards.cores} --chunksize 1000000 \
                    -o {output.file}
                """)

        elif wildcards.mode == "chromap":
            shell("""
                soft/chromap/bin/chromap --preset hic \
                  -t {wildcards.cores} -x {input.genome_index_chromap} -r {input.genomefile} \
                  -1 {input.fastq1} -2 {input.fastq2} -o {output.file}
                """)
        elif wildcards.mode == "fanc_bwa":
            shell("""
                TMP_FILE1=$(mktemp -u output/tmp.XXXXXXXX.bam)
                TMP_FILE2=$(mktemp -u output/tmp.XXXXXXXX.bam)
                soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq1} {input.genome_index_bwa} $TMP_FILE1
                samtools sort -n -@ {wildcards.cores} $TMP_FILE1 -o $TMP_FILE1.sorted.bam
                soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq2} {input.genome_index_bwa} $TMP_FILE2
                samtools sort -n -@ {wildcards.cores} $TMP_FILE2 -o $TMP_FILE2.sorted.bam
                soft/fanc/bin/fanc pairs -f -g {input.genome_rsites} $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam {output.file}
                rm $TMP_FILE1 $TMP_FILE2 $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam
            """)
        elif wildcards.mode == "fanc_bowtie2":
            shell("""
                TMP_FILE1=$(mktemp -u output/tmp.XXXXXXXX.bam)
                TMP_FILE2=$(mktemp -u output/tmp.XXXXXXXX.bam)
                soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq1} {input.genome_index_bowtie2} $TMP_FILE1
                samtools sort -n -@ {wildcards.cores} $TMP_FILE1 -o $TMP_FILE1.sorted.bam
                soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq2} {input.genome_index_bowtie2} $TMP_FILE2
                samtools sort -n -@ {wildcards.cores} $TMP_FILE2 -o $TMP_FILE2.sorted.bam
                soft/fanc/bin/fanc pairs -f -g {input.genome_rsites} $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam {output.file}
                rm $TMP_FILE1 $TMP_FILE2 $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam
            """)
        elif wildcards.mode == "hicpro":
            shell("""
                cd soft/HiC-Pro_env/HiC-Pro/
		mkdir -p output
                TMP_DIR=$(mktemp -d -u output/tmp.XXXXXXXX)
                TMP_CONFIG=$(mktemp -u output/tmp.XXXXXXXX.config)
                cp config-hicpro.txt $TMP_CONFIG
                
                sed -i 's/N_CPU = 4/N_CPU = {wildcards.cores}/' $TMP_CONFIG
                bin/HiC-Pro -i rawdata/ -o $TMP_DIR -c $TMP_CONFIG
                
                # Cleanup:
                cp $TMP_DIR/hic_results/data/sample1/sample1.allValidPairs ../../../{output.file}
                rm -r $TMP_DIR; rm $TMP_CONFIG
                """)
        elif wildcards.mode == "juicer":
            # Note that this process is not guaranteed to work well in parallel mode;
            # recommended to run separately
            shell("""
                soft/juicer-1.6/CPU/juicer.sh -g hg38 -d data/4juicer/ -s DpnII -S early \
                    -p {input.chromsizes} -y {input.genome_rsites} -z {input.genome_index_bwa} -t {wildcards.cores} -D soft/juicer-1.6/CPU

                # Cleanup:
                mv data/4juicer/aligned/merged_nodups.txt {output.file}
                rm -rf data/4juicer/aligned; rm -rf data/4juicer/splits/[^S]*
                """)
        elif wildcards.mode == "hicexplorer":
            shell("""
                TMP_DIR=$(mktemp -d -u output/tmp.XXXXXXXX)
                
                soft/hicexplorer/bin/hicBuildMatrix --samFiles \
                    <(bwa mem -A1 -B4 -E50 -L0 {input.genome_index_bwa} -t {wildcards.cores} {input.fastq1} | samtools view -@ {wildcards.cores} -Shb -) \
                    <(bwa mem -A1 -B4 -E50 -L0 {input.genome_index_bwa} -t {wildcards.cores} {input.fastq2} | samtools view -@ {wildcards.cores} -Shb -) \
                     --restrictionSequence GATC \
                     --danglingSequence GATC \
                     --restrictionCutFile {input.genome_rsites}  \
                     --threads {wildcards.cores} \
                     --inputBufferSize 1000000 \
                     --QCfolder $TMP_DIR \
                     -o {output.file}

                # Cleanup:
                rm -r $TMP_DIR
                """)
        elif wildcards.mode == "tadbit":
            shell("""
            
                TMP_DIR=$(mktemp -d -u tadbit_output/tmp.XXXXXXXX)
                
                soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper_binary soft/tadbit/bin/gem-mapper --fastq {input.fastq1} --read 1 --index {input.genome_index_gem} --renz DpnII || true 
                soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper_binary soft/tadbit/bin/gem-mapper --fastq {input.fastq2} --read 2 --index {input.genome_index_gem} --renz DpnII || true
                soft/tadbit/bin/tadbit parse $TMP_DIR --genome data/hg38/hg38.fa || true
                soft/tadbit/bin/tadbit filter $TMP_DIR -C {wildcards.cores} --format mid || true
                
                mv $TMP_DIR/03_filtered_reads/valid_r1-r2_intersection_*.tsv {output.file}
                rm -r $TMP_DIR
                """)
        elif wildcards.mode == "tadbit_bowtie2":
            shell("""
            
                TMP_DIR=$(mktemp -d -u tadbit_output/tmp.XXXXXXXX)
                
                soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper bowtie2 --mapper_binary soft/tadbit/bin/bowtie2 --fastq {input.fastq1} --read 1 --index {input.genome_index_bowtie2} --renz DpnII || true 
                soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper bowtie2 --mapper_binary soft/tadbit/bin/bowtie2 --fastq {input.fastq2} --read 2 --index {input.genome_index_bowtie2} --renz DpnII || true
                soft/tadbit/bin/tadbit parse $TMP_DIR --genome data/hg38/hg38.fa || true
                soft/tadbit/bin/tadbit filter $TMP_DIR -C {wildcards.cores} --format mid || true
                
                mv $TMP_DIR/03_filtered_reads/valid_r1-r2_intersection_*.tsv {output.file}
                rm -r $TMP_DIR
                """)
        elif wildcards.mode == "bowtie":
            shell("""
                soft/tadbit/bin/bowtie2 -p 4 -x {input.genome_index_bowtie2} -1 {input.fastq1} -2 {input.fastq2} -S {output.file}
                """)
        elif wildcards.mode == "bwamem":
            shell("""
                soft/pairtools0.3.0/bin/bwa mem -t 4 -SP {input.genome_index_bwa} {input.fastq1} {input.fastq2} > {output.file}
                """)
        elif wildcards.mode == "bwamem2":
            shell("""
                soft/bwa-mem2/bwa-mem2 mem -t 4 -SP {input.genome_index_bwamem2} {input.fastq1} {input.fastq2} > {output.file}
                """)
