@prefix dcterms: . @prefix np: . @prefix npx: . @prefix ns1: . @prefix orcid: . @prefix prov: . @prefix rdfs: . @prefix schema: . @prefix sub: . @prefix this: . @prefix xsd: . sub:Head { this: a np:Nanopublication; np:hasAssertion sub:assertion; np:hasProvenance sub:provenance; np:hasPublicationInfo sub:pubinfo . } sub:assertion { sub:config-1 dcterms:identifier "config.yml"; schema:text """samples: samples.csv ref: species: \"Drosophila melanogaster\" genome: \"\" annotation: \"\" accession: \"GCF_000001215.4\" ensembl_species: \"\" # e.g., \"homo_sapiens\" build: \"\" # e.g., \"GRCh38\" release: \"\" # e.g., \"105\" read_filter: min_length: 200 minimap2: index_opts: \"\" opts: \"\" maximum_secondary: 100 secondary_score_ratio: 1.0 samtools: samtobam_opts: \"-b\" bamsort_opts: \"\" bamindex_opts: \"\" bamstats_opts: \"\" quant: salmon_libtype: \"U\" deseq2: fit_type: \"\" design_factors: - \"condition\" lfc_null: 1.0 alt_hypothesis: \"greaterAbs\" point_width: 20 mincount: 10 alpha: 0.05 threshold_plot: 10 colormap: \"Blues\" figtype: \"png\" batch_effect: - \"\" isoform_analysis: FLAIR: true qscore: 1 exp_thresh: 10 col_opts: \"--annotation_reliant generate --generate_map --stringent\" protein_annotation: lambda: false uniref: \"https://ftp.imp.fu-berlin.de/pub/lambda/index/lambda3/gen_0/uniref50_20230713.lba.gz\" num_matches: 3""" . sub:dataset a schema:Dataset; ns1:describesWorkflow "RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE"; ns1:description """This workflow performs differential expression analysis of RNA-seq data obtained from Oxford Nanopore long-read sequencing technology. First a transcriptome FASTA is constructed using gffread [https://github.com/gpertea/gffread]. Reads are then mapped to the transcriptome with the long-read optimized alignment tool minimap2 [https://github.com/lh3/minimap2]. Next quantification is performed using salmon [https://github.com/COMBINE-lab/salmon] before normalization and differential expression analysis are conducted by PyDESeq2 [https://github.com/owkin/PyDESeq2]. The workflow can optionally analyze splice-isoforms through integrating the FLAIR [https://github.com/BrooksLabUCSC/flair] workflow. Additionaly, NanoPlot [https://github.com/wdecoster/NanoPlot] is employed to analyze initial sequencing data and QualiMap [https://github.com/EagleGenomics-cookbooks/QualiMap] is used to evaluate mapping results."""; ns1:generatedAt "2026-05-05T17:41:32.587957+00:00"^^xsd:dateTime; ns1:hasConfigurationSection sub:workflow-configuration; ns1:hasRuleSection sub:workflow-rules . sub:rule-alignment_qa ns1:hasInput "sorted_alignments/{sample}_sorted.bam"; ns1:hasOutput "QC/qualimap/{sample}"; ns1:hasSoftwarePackage "v4.4.0" . sub:rule-alignment_qa_report ns1:hasInput "QC/qualimap/{sample}"; ns1:hasOutput "qualimap/{sample}/qualimapReport.html"; ns1:hasSoftwarePackage "python>=3.12.4" . sub:rule-bam_index ns1:hasInput "sorted_alignments/{sample}_sorted.bam"; ns1:hasOutput "sorted_alignments/{sample}_sorted.bam.bai"; ns1:hasSoftwarePackage "v7.6.0" . sub:rule-bam_sort ns1:hasInput "alignments/{sample}.bam"; ns1:hasOutput "sorted_alignments/{sample}_sorted.bam"; ns1:hasSoftwarePackage "v7.6.0" . sub:rule-bam_stats ns1:hasInput "alignments/{sample}.bam"; ns1:hasOutput "QC/bamstats/{sample}.txt"; ns1:hasSoftwarePackage "v3.13.4" . sub:rule-bam_to_bed ns1:hasInput "sorted_alignments/{sample}_sorted.bam", "sorted_alignments/{sample}_sorted.bam.bai"; ns1:hasOutput "iso_analysis/beds/{sample}.bed"; ns1:hasSoftwarePackage "flair=2.0.0" . sub:rule-build_flair_genome_index ns1:hasInput "references/genomic.fa"; ns1:hasOutput "index/flair_genome_index.mmi"; ns1:hasSoftwarePackage "v7.6.0" . sub:rule-build_minimap_index ns1:hasInput "transcriptome/corrected_transcriptome.fa"; ns1:hasOutput "index/transcriptome_index.mmi"; ns1:hasSoftwarePackage "v7.6.0" . sub:rule-concatenate_beds ns1:hasInput "iso_analysis/beds/barcode10.bed", "iso_analysis/beds/barcode11.bed", "iso_analysis/beds/barcode12.bed", "iso_analysis/beds/barcode13.bed", "iso_analysis/beds/barcode15.bed", "iso_analysis/beds/barcode16.bed"; ns1:hasOutput "iso_analysis/beds/all_samples.bed"; ns1:hasSoftwarePackage "python>=3.12.4" . sub:rule-correct_transcriptome ns1:hasInput "transcriptome/transcriptome.fa"; ns1:hasOutput "transcriptome/corrected_transcriptome.fa"; ns1:hasSoftwarePackage "gffread>=0.12.7" . sub:rule-count_reads ns1:hasInput "alignments/{sample}.bam", "transcriptome/corrected_transcriptome.fa"; ns1:hasOutput "counts/{sample}_salmon/quant.sf"; ns1:hasSoftwarePackage "salmon>=1.10.3" . sub:rule-deseq2 ns1:hasInput "de_analysis/all.rds"; ns1:hasOutput "de_analysis/{factor}_{prop_a}_vs_{prop_b}_MA_plot.svg", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_count_heatmap.svg", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_dispersion_plot.svg", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_l2fc.tsv", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_sample_heatmap.svg", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_top_count_heatmap.svg"; ns1:hasSoftwarePackage "bioconductor-deseq2 =1.46.0", "r-ashr =2.2_63", "r-pheatmap", "r-rcolorbrewer", "r-stringr =1.5.1" . sub:rule-deseq2_init ns1:hasInput "/fshpc/meesters/projects/snakemake-workflows/rna-longseq-de-isoform/config/demo/samples.csv", "merged/all_counts_gene.tsv"; ns1:hasOutput "de_analysis/all.rds", "de_analysis/normcounts.tsv"; ns1:hasSoftwarePackage "bioconductor-deseq2 =1.46.0", "r-ashr =2.2_63", "r-pheatmap", "r-rcolorbrewer", "r-stringr =1.5.1" . sub:rule-download_ensembl_annotation ns1:hasOutput "references/ensembl_annotation.gff3"; ns1:hasSoftwarePackage "v7.5.0" . sub:rule-download_ensembl_genome ns1:hasOutput "references/ensembl_genome.fa"; ns1:hasSoftwarePackage "v7.5.0" . sub:rule-download_ncbi_annotation ns1:hasOutput "references/ncbi_dataset_annotation.zip"; ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" . sub:rule-download_ncbi_genome ns1:hasOutput "references/ncbi_dataset_genome.zip"; ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" . sub:rule-filter_reads ns1:hasOutput "filter/{sample}_filtered.fq"; ns1:hasSoftwarePackage "biopython >=1.84", "pandas>=2.2.2", "python>=3.12.4" . sub:rule-flair_align ns1:hasInput "filter/barcode10_filtered.fq", "filter/barcode11_filtered.fq", "filter/barcode12_filtered.fq", "filter/barcode13_filtered.fq", "filter/barcode15_filtered.fq", "filter/barcode16_filtered.fq", "index/flair_genome_index.mmi", "references/genomic.fa"; ns1:hasOutput "iso_analysis/align/flair.bam", "iso_analysis/align/flair.bam.bai", "iso_analysis/align/flair.bed"; ns1:hasSoftwarePackage "flair=2.0.0" . sub:rule-flair_collapse ns1:hasInput "filter/barcode10_filtered.fq", "filter/barcode11_filtered.fq", "filter/barcode12_filtered.fq", "filter/barcode13_filtered.fq", "filter/barcode15_filtered.fq", "filter/barcode16_filtered.fq", "iso_analysis/align/flair_all_corrected.bed", "references/genomic.fa", "references/standardized_genomic.gtf"; ns1:hasOutput "iso_analysis/collapse/flair.isoforms.bed", "iso_analysis/collapse/flair.isoforms.fa"; ns1:hasSoftwarePackage "flair=2.0.0" . sub:rule-flair_correct ns1:hasInput "iso_analysis/align/flair.bed", "references/genomic.fa", "references/standardized_genomic.gtf"; ns1:hasOutput "iso_analysis/align/flair_all_corrected.bed"; ns1:hasSoftwarePackage "flair=2.0.0" . sub:rule-flair_diffexp ns1:hasInput "iso_analysis/quantify/flair.counts.tsv"; ns1:hasOutput "iso_analysis/diffexp/genes_deseq2_QCplots_{condition_value1}_v_{condition_value2}.pdf", "iso_analysis/diffexp/genes_deseq2_{condition_value1}_v_{condition_value2}.tsv", "iso_analysis/diffexp/isoforms_deseq2_QCplots_{condition_value1}_v_{condition_value2}.pdf", "iso_analysis/diffexp/isoforms_deseq2_{condition_value1}_v_{condition_value2}.tsv", "iso_analysis/diffexp/isoforms_drimseq_{condition_value1}_v_{condition_value2}.tsv"; ns1:hasSoftwarePackage "flair=2.0.0" . sub:rule-flair_plot_isoforms ns1:hasInput "iso_analysis/collapse/flair.isoforms.bed", "iso_analysis/quantify/flair.counts.tsv"; ns1:hasOutput "iso_analysis/plots"; ns1:hasSoftwarePackage "flair=2.0.0" . sub:rule-flair_quantify ns1:hasInput "iso_analysis/collapse/flair.isoforms.bed", "iso_analysis/collapse/flair.isoforms.fa", "iso_analysis/reads_manifest.tsv"; ns1:hasOutput "iso_analysis/quantify/flair.counts.tsv"; ns1:hasSoftwarePackage "flair=2.0.0" . sub:rule-generate_gene_query ns1:hasInput "de_analysis/{factor}_{prop_a}_vs_{prop_b}_l2fc.tsv", "transcriptome/corrected_transcriptome.fa"; ns1:hasOutput "protein_annotation/{factor}_{prop_a}_vs_{prop_b}_de_genes.fa"; ns1:hasSoftwarePackage "biopython >=1.84", "pandas>=2.2.2", "python>=3.12.4" . sub:rule-genome_to_transcriptome ns1:hasInput "references/genomic.fa", "references/standardized_genomic.gff"; ns1:hasOutput "references/genomic.fa.fai", "transcriptome/transcriptome.fa"; ns1:hasSoftwarePackage "gffread>=0.12.7" . sub:rule-get_annotation ns1:hasOutput "references/genomic.gff"; ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" . sub:rule-get_genome ns1:hasOutput "references/genomic.fa"; ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" . sub:rule-get_indexed_protein_db ns1:hasOutput "protein_annotation/index/UniRef.lba.gz"; ns1:hasSoftwarePackage "wget>=1.21.4" . sub:rule-get_protein_names ns1:hasInput "protein_annotation/blast_results_{factor}_{prop_a}_vs_{prop_b}.m8"; ns1:hasOutput "protein_annotation/proteins_{factor}_{prop_a}_vs_{prop_b}.csv"; ns1:hasSoftwarePackage "biopython >=1.84", "pandas>=2.2.2", "python>=3.12.4" . sub:rule-gff_to_gtf ns1:hasInput "references/standardized_genomic.gff"; ns1:hasOutput "references/standardized_genomic.gtf"; ns1:hasSoftwarePackage "gffread>=0.12.7" . sub:rule-iso_analysis_report ns1:hasInput "iso_analysis/plots"; ns1:hasOutput "iso_analysis/report/isoforms", "iso_analysis/report/usage"; ns1:hasSoftwarePackage "python>=3.12.4" . sub:rule-lambda_gene_annotation ns1:hasInput "protein_annotation/index/UniRef.lba.gz", "protein_annotation/{factor}_{prop_a}_vs_{prop_b}_de_genes.fa"; ns1:hasOutput "protein_annotation/blast_results_{factor}_{prop_a}_vs_{prop_b}.m8"; ns1:hasSoftwarePackage "lambda>=3.1.0" . sub:rule-map_reads ns1:hasInput "filter/{sample}_filtered.fq", "index/transcriptome_index.mmi"; ns1:hasOutput "alignments/{sample}.sam"; ns1:hasSoftwarePackage "v7.6.0" . sub:rule-merge_read_counts ns1:hasInput "counts/barcode10_salmon/quant.sf", "counts/barcode11_salmon/quant.sf", "counts/barcode12_salmon/quant.sf", "counts/barcode13_salmon/quant.sf", "counts/barcode15_salmon/quant.sf", "counts/barcode16_salmon/quant.sf"; ns1:hasOutput "merged/all_counts.tsv"; ns1:hasSoftwarePackage "pandas>=2.2.2", "python>=3.12.4" . sub:rule-pca ns1:hasInput "de_analysis/all.rds"; ns1:hasOutput "de_analysis/pca_{variable}.svg"; ns1:hasSoftwarePackage "bioconductor-deseq2 =1.46.0", "r-ashr =2.2_63", "r-pheatmap", "r-rcolorbrewer", "r-stringr =1.5.1" . sub:rule-reads_manifest ns1:hasOutput "iso_analysis/reads_manifest.tsv"; ns1:hasSoftwarePackage "pandas>=2.2.2", "python>=3.12.4" . sub:rule-sam_to_bam ns1:hasInput "alignments/{sample}.sam"; ns1:hasOutput "alignments/{sample}.bam"; ns1:hasSoftwarePackage "v7.6.0" . sub:rule-sample_qa_plot ns1:hasOutput "NanoPlot/{sample}/NanoPlot-report.html"; ns1:hasSoftwarePackage "nanoplot" . sub:rule-standardize_gff ns1:hasInput "references/genomic.gff"; ns1:hasOutput "references/standardized_genomic.gff"; ns1:hasSoftwarePackage "agat>=1.4.0" . sub:rule-total_sample_qa_plot ns1:hasInput "/lustre/project/nhr-zdvhpc/dtest/raw/barcode10.fastq.gz", "/lustre/project/nhr-zdvhpc/dtest/raw/barcode11.fastq.gz", "/lustre/project/nhr-zdvhpc/dtest/raw/barcode12.fastq.gz", "/lustre/project/nhr-zdvhpc/dtest/raw/barcode13.fastq.gz", "/lustre/project/nhr-zdvhpc/dtest/raw/barcode15.fastq.gz", "/lustre/project/nhr-zdvhpc/dtest/raw/barcode16.fastq.gz"; ns1:hasOutput "NanoPlot/NanoPlot-report.html"; ns1:hasSoftwarePackage "nanoplot" . sub:rule-transcriptid_to_gene ns1:hasInput "merged/all_counts.tsv", "references/standardized_genomic.gff"; ns1:hasOutput "merged/all_counts_gene.tsv", "merged/transcriptid_to_gene_plot.svg"; ns1:hasSoftwarePackage "anndata=0.10.8", "bioinfokit", "pydeseq2=0.4.10", "seaborn>=0.13.2" . sub:workflow-configuration rdfs:label "from workflow configuration"; ns1:hasConfigurationFile sub:config-1 . sub:workflow-rules rdfs:label "workflow rules"; ns1:hasWorkflowRule sub:rule-alignment_qa, sub:rule-alignment_qa_report, sub:rule-bam_index, sub:rule-bam_sort, sub:rule-bam_stats, sub:rule-bam_to_bed, sub:rule-build_flair_genome_index, sub:rule-build_minimap_index, sub:rule-concatenate_beds, sub:rule-correct_transcriptome, sub:rule-count_reads, sub:rule-deseq2, sub:rule-deseq2_init, sub:rule-download_ensembl_annotation, sub:rule-download_ensembl_genome, sub:rule-download_ncbi_annotation, sub:rule-download_ncbi_genome, sub:rule-filter_reads, sub:rule-flair_align, sub:rule-flair_collapse, sub:rule-flair_correct, sub:rule-flair_diffexp, sub:rule-flair_plot_isoforms, sub:rule-flair_quantify, sub:rule-generate_gene_query, sub:rule-genome_to_transcriptome, sub:rule-get_annotation, sub:rule-get_genome, sub:rule-get_indexed_protein_db, sub:rule-get_protein_names, sub:rule-gff_to_gtf, sub:rule-iso_analysis_report, sub:rule-lambda_gene_annotation, sub:rule-map_reads, sub:rule-merge_read_counts, sub:rule-pca, sub:rule-reads_manifest, sub:rule-sam_to_bam, sub:rule-sample_qa_plot, sub:rule-standardize_gff, sub:rule-total_sample_qa_plot, sub:rule-transcriptid_to_gene . } sub:provenance { sub:assertion prov:generatedAtTime "2026-05-05T19:41:59.930591+02:00"^^xsd:dateTime; prov:wasAttributedTo orcid:0000-0003-2408-7588 . } sub:pubinfo { this: dcterms:created "2026-05-05T17:41:32.587957+00:00"^^xsd:dateTime; dcterms:creator orcid:0000-0003-2408-7588; npx:hasNanopubType schema:Dataset; npx:signedBy orcid:0000-0003-2408-7588; rdfs:label "Snakemake workflow metadata: RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE"; prov:generatedAtTime "2026-05-05T19:41:59.930591+02:00"^^xsd:dateTime; prov:wasAttributedTo orcid:0000-0003-2408-7588 . sub:sig npx:hasAlgorithm "RSA"; npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAkOFUnnRCp/k9/0ugvx8zQJ+Qc675W1Ug6F839+xvJ2QsSBu4iOJ1O2kJCmb3tALp3gJOt8sffRot3VrfY1hbgXxL7BVtDsfHHmXVff4YCeg5Ycdn5cDpLawDpAdYwMhK0LwIkZ3fwH9/o9JniYKXLV/jpF9bMKyiw/6tqlCHaMW1r8gzZzoxVIAakwvlABoY0iNoToLTlBRXEI4mLUNjDMnMwQgfh1KXMxMruNjW3wJyeDEIfa2ooAt0E4CRM9pkrEb37NzD9Jz8aSUFFY6BvIxF4ixK7rm6IUDvQ76LqXkEmgSeRv1kw7gnCe9wV/wHd0ZeW4heoBXmLHX3MvHfjwIDAQAB"; npx:hasSignature "VkxV4Bhfjw6x/ltvfJl+a58Pn8/q66Za1tQKj49BEvafAS5ulgDmz+THGqI6dVKXA9OhIxH59qkur6/Qd+ow2B3JoKV3Aw7CFnIdjJGySXxpfxtWglk0YRApCKlwdvEUttOxnSTNWy0lD3EQja6eVc95nlDaF4987H6UYQO9D4Pc5r7VoJG7EYVZuJPtB3KJf/7XyIU4a9nne5nou+6wNLKWyL8xmJbZlyVkkt3FjKxp4xmxDnfcWRdVq/zFkC2FUdmssCsoFBLXCaOy1ERG1uldpeF285DoYV/cXHAkQIh69i2HtCBJBHfgvjBmbtTMDkGQAPw4pz7fbMZANZkHHA=="; npx:hasSignatureTarget this:; npx:signedBy orcid:0000-0003-2408-7588 . }