Nanopublication

< Home

ID

https://w3id.org/np/RAgcfEq7mB99AkMaYYGTXKoqGdTaofCrSJeT2jAp-eGnI

Formats

.trig | .trig.txt | .jelly | .jelly.txt | .jsonld | .jsonld.txt | .nq | .nq.txt | .xml | .xml.txt

Content

@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix np: <http://www.nanopub.org/nschema#> .
@prefix npx: <http://purl.org/nanopub/x/> .
@prefix ns1: <https://w3id.org/np/snakemake/> .
@prefix orcid: <https://orcid.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix sub: <https://w3id.org/np/RAgcfEq7mB99AkMaYYGTXKoqGdTaofCrSJeT2jAp-eGnI/> .
@prefix this: <https://w3id.org/np/RAgcfEq7mB99AkMaYYGTXKoqGdTaofCrSJeT2jAp-eGnI> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sub:Head {
  this: a np:Nanopublication;
    np:hasAssertion sub:assertion;
    np:hasProvenance sub:provenance;
    np:hasPublicationInfo sub:pubinfo .
}

sub:assertion {
  sub:config-1 dcterms:identifier "config.yml";
    schema:text """samples: samples.csv

ref:
    species: \"Drosophila melanogaster\"
    genome: \"\"
    annotation: \"\"
    accession: \"GCF_000001215.4\"
    ensembl_species: \"\"  # e.g., \"homo_sapiens\"
    build: \"\"  # e.g., \"GRCh38\"
    release: \"\"  # e.g., \"105\"

read_filter:
    min_length: 200

minimap2:
    index_opts: \"\"
    opts: \"\"
    maximum_secondary: 100
    secondary_score_ratio: 1.0

samtools:
    samtobam_opts: \"-b\"
    bamsort_opts: \"\"
    bamindex_opts: \"\"
    bamstats_opts: \"\"

quant:
    salmon_libtype: \"U\"

deseq2:
    fit_type: \"\"
    design_factors:
        - \"condition\"
    lfc_null: 1.0
    alt_hypothesis: \"greaterAbs\"
    point_width: 20
    mincount: 10
    alpha: 0.05
    threshold_plot: 10
    colormap: \"Blues\"
    figtype: \"png\"
    batch_effect:
      - \"\"

isoform_analysis:
    FLAIR: true
    qscore: 1
    exp_thresh: 10
    col_opts: \"--annotation_reliant generate --generate_map --stringent\"

protein_annotation:
    lambda: false
    uniref: \"https://ftp.imp.fu-berlin.de/pub/lambda/index/lambda3/gen_0/uniref50_20230713.lba.gz\"
    num_matches: 3""" .
  
  sub:dataset a schema:Dataset;
    ns1:describesWorkflow "RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE";
    ns1:description """This workflow performs differential expression analysis of RNA-seq data obtained from Oxford Nanopore long-read sequencing technology.
First a transcriptome FASTA is constructed using gffread [https://github.com/gpertea/gffread]. Reads are then mapped to the transcriptome with the long-read optimized alignment tool minimap2 [https://github.com/lh3/minimap2].
Next quantification is performed using salmon [https://github.com/COMBINE-lab/salmon] before normalization and differential expression analysis are conducted by PyDESeq2 [https://github.com/owkin/PyDESeq2].
The workflow can optionally analyze splice-isoforms through integrating the FLAIR [https://github.com/BrooksLabUCSC/flair] workflow.
Additionaly, NanoPlot [https://github.com/wdecoster/NanoPlot] is employed to analyze initial sequencing data and QualiMap [https://github.com/EagleGenomics-cookbooks/QualiMap] is used to evaluate mapping results.""";
    ns1:generatedAt "2026-05-05T17:34:55.389913+00:00"^^xsd:dateTime;
    ns1:hasConfigurationSection sub:workflow-configuration;
    ns1:hasRuleSection sub:workflow-rules .
  
  sub:rule-alignment_qa ns1:hasInput "sorted_alignments/{sample}_sorted.bam";
    ns1:hasOutput "QC/qualimap/{sample}";
    ns1:hasSoftwarePackage "v4.4.0" .
  
  sub:rule-alignment_qa_report ns1:hasInput "QC/qualimap/{sample}";
    ns1:hasOutput "qualimap/{sample}/qualimapReport.html";
    ns1:hasSoftwarePackage "python>=3.12.4" .
  
  sub:rule-bam_index ns1:hasInput "sorted_alignments/{sample}_sorted.bam";
    ns1:hasOutput "sorted_alignments/{sample}_sorted.bam.bai";
    ns1:hasSoftwarePackage "v7.6.0" .
  
  sub:rule-bam_sort ns1:hasInput "alignments/{sample}.bam";
    ns1:hasOutput "sorted_alignments/{sample}_sorted.bam";
    ns1:hasSoftwarePackage "v7.6.0" .
  
  sub:rule-bam_stats ns1:hasInput "alignments/{sample}.bam";
    ns1:hasOutput "QC/bamstats/{sample}.txt";
    ns1:hasSoftwarePackage "v3.13.4" .
  
  sub:rule-bam_to_bed ns1:hasInput "sorted_alignments/{sample}_sorted.bam", "sorted_alignments/{sample}_sorted.bam.bai";
    ns1:hasOutput "iso_analysis/beds/{sample}.bed";
    ns1:hasSoftwarePackage "flair=2.0.0" .
  
  sub:rule-build_flair_genome_index ns1:hasInput "references/genomic.fa";
    ns1:hasOutput "index/flair_genome_index.mmi";
    ns1:hasSoftwarePackage "v7.6.0" .
  
  sub:rule-build_minimap_index ns1:hasInput "transcriptome/corrected_transcriptome.fa";
    ns1:hasOutput "index/transcriptome_index.mmi";
    ns1:hasSoftwarePackage "v7.6.0" .
  
  sub:rule-concatenate_beds ns1:hasInput "iso_analysis/beds/barcode10.bed", "iso_analysis/beds/barcode11.bed",
      "iso_analysis/beds/barcode12.bed", "iso_analysis/beds/barcode13.bed", "iso_analysis/beds/barcode15.bed",
      "iso_analysis/beds/barcode16.bed";
    ns1:hasOutput "iso_analysis/beds/all_samples.bed";
    ns1:hasSoftwarePackage "python>=3.12.4" .
  
  sub:rule-correct_transcriptome ns1:hasInput "transcriptome/transcriptome.fa";
    ns1:hasOutput "transcriptome/corrected_transcriptome.fa";
    ns1:hasSoftwarePackage "gffread>=0.12.7" .
  
  sub:rule-count_reads ns1:hasInput "alignments/{sample}.bam", "transcriptome/corrected_transcriptome.fa";
    ns1:hasOutput "counts/{sample}_salmon/quant.sf";
    ns1:hasSoftwarePackage "salmon>=1.10.3" .
  
  sub:rule-deseq2 ns1:hasInput "de_analysis/all.rds";
    ns1:hasOutput "de_analysis/{factor}_{prop_a}_vs_{prop_b}_MA_plot.svg", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_count_heatmap.svg",
      "de_analysis/{factor}_{prop_a}_vs_{prop_b}_dispersion_plot.svg", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_l2fc.tsv",
      "de_analysis/{factor}_{prop_a}_vs_{prop_b}_sample_heatmap.svg", "de_analysis/{factor}_{prop_a}_vs_{prop_b}_top_count_heatmap.svg";
    ns1:hasSoftwarePackage "bioconductor-deseq2 =1.46.0", "r-ashr =2.2_63", "r-pheatmap",
      "r-rcolorbrewer", "r-stringr =1.5.1" .
  
  sub:rule-deseq2_init ns1:hasInput "/fshpc/meesters/projects/snakemake-workflows/rna-longseq-de-isoform/config/demo/samples.csv",
      "merged/all_counts_gene.tsv";
    ns1:hasOutput "de_analysis/all.rds", "de_analysis/normcounts.tsv";
    ns1:hasSoftwarePackage "bioconductor-deseq2 =1.46.0", "r-ashr =2.2_63", "r-pheatmap",
      "r-rcolorbrewer", "r-stringr =1.5.1" .
  
  sub:rule-download_ensembl_annotation ns1:hasOutput "references/ensembl_annotation.gff3";
    ns1:hasSoftwarePackage "v7.5.0" .
  
  sub:rule-download_ensembl_genome ns1:hasOutput "references/ensembl_genome.fa";
    ns1:hasSoftwarePackage "v7.5.0" .
  
  sub:rule-download_ncbi_annotation ns1:hasOutput "references/ncbi_dataset_annotation.zip";
    ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" .
  
  sub:rule-download_ncbi_genome ns1:hasOutput "references/ncbi_dataset_genome.zip";
    ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" .
  
  sub:rule-filter_reads ns1:hasOutput "filter/{sample}_filtered.fq";
    ns1:hasSoftwarePackage "biopython >=1.84", "pandas>=2.2.2", "python>=3.12.4" .
  
  sub:rule-flair_align ns1:hasInput "filter/barcode10_filtered.fq", "filter/barcode11_filtered.fq",
      "filter/barcode12_filtered.fq", "filter/barcode13_filtered.fq", "filter/barcode15_filtered.fq",
      "filter/barcode16_filtered.fq", "index/flair_genome_index.mmi", "references/genomic.fa";
    ns1:hasOutput "iso_analysis/align/flair.bam", "iso_analysis/align/flair.bam.bai",
      "iso_analysis/align/flair.bed";
    ns1:hasSoftwarePackage "flair=2.0.0" .
  
  sub:rule-flair_collapse ns1:hasInput "filter/barcode10_filtered.fq", "filter/barcode11_filtered.fq",
      "filter/barcode12_filtered.fq", "filter/barcode13_filtered.fq", "filter/barcode15_filtered.fq",
      "filter/barcode16_filtered.fq", "iso_analysis/align/flair_all_corrected.bed", "references/genomic.fa",
      "references/standardized_genomic.gtf";
    ns1:hasOutput "iso_analysis/collapse/flair.isoforms.bed", "iso_analysis/collapse/flair.isoforms.fa";
    ns1:hasSoftwarePackage "flair=2.0.0" .
  
  sub:rule-flair_correct ns1:hasInput "iso_analysis/align/flair.bed", "references/genomic.fa",
      "references/standardized_genomic.gtf";
    ns1:hasOutput "iso_analysis/align/flair_all_corrected.bed";
    ns1:hasSoftwarePackage "flair=2.0.0" .
  
  sub:rule-flair_diffexp ns1:hasInput "iso_analysis/quantify/flair.counts.tsv";
    ns1:hasOutput "iso_analysis/diffexp/genes_deseq2_QCplots_{condition_value1}_v_{condition_value2}.pdf",
      "iso_analysis/diffexp/genes_deseq2_{condition_value1}_v_{condition_value2}.tsv", "iso_analysis/diffexp/isoforms_deseq2_QCplots_{condition_value1}_v_{condition_value2}.pdf",
      "iso_analysis/diffexp/isoforms_deseq2_{condition_value1}_v_{condition_value2}.tsv",
      "iso_analysis/diffexp/isoforms_drimseq_{condition_value1}_v_{condition_value2}.tsv";
    ns1:hasSoftwarePackage "flair=2.0.0" .
  
  sub:rule-flair_plot_isoforms ns1:hasInput "iso_analysis/collapse/flair.isoforms.bed",
      "iso_analysis/quantify/flair.counts.tsv";
    ns1:hasOutput "iso_analysis/plots";
    ns1:hasSoftwarePackage "flair=2.0.0" .
  
  sub:rule-flair_quantify ns1:hasInput "iso_analysis/collapse/flair.isoforms.bed", "iso_analysis/collapse/flair.isoforms.fa",
      "iso_analysis/reads_manifest.tsv";
    ns1:hasOutput "iso_analysis/quantify/flair.counts.tsv";
    ns1:hasSoftwarePackage "flair=2.0.0" .
  
  sub:rule-generate_gene_query ns1:hasInput "de_analysis/{factor}_{prop_a}_vs_{prop_b}_l2fc.tsv",
      "transcriptome/corrected_transcriptome.fa";
    ns1:hasOutput "protein_annotation/{factor}_{prop_a}_vs_{prop_b}_de_genes.fa";
    ns1:hasSoftwarePackage "biopython >=1.84", "pandas>=2.2.2", "python>=3.12.4" .
  
  sub:rule-genome_to_transcriptome ns1:hasInput "references/genomic.fa", "references/standardized_genomic.gff";
    ns1:hasOutput "references/genomic.fa.fai", "transcriptome/transcriptome.fa";
    ns1:hasSoftwarePackage "gffread>=0.12.7" .
  
  sub:rule-get_annotation ns1:hasOutput "references/genomic.gff";
    ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" .
  
  sub:rule-get_genome ns1:hasOutput "references/genomic.fa";
    ns1:hasSoftwarePackage "ncbi-datasets-cli>=18.14.0", "unzip>=6.0.0" .
  
  sub:rule-get_indexed_protein_db ns1:hasOutput "protein_annotation/index/UniRef.lba.gz";
    ns1:hasSoftwarePackage "wget>=1.21.4" .
  
  sub:rule-get_protein_names ns1:hasInput "protein_annotation/blast_results_{factor}_{prop_a}_vs_{prop_b}.m8";
    ns1:hasOutput "protein_annotation/proteins_{factor}_{prop_a}_vs_{prop_b}.csv";
    ns1:hasSoftwarePackage "biopython >=1.84", "pandas>=2.2.2", "python>=3.12.4" .
  
  sub:rule-gff_to_gtf ns1:hasInput "references/standardized_genomic.gff";
    ns1:hasOutput "references/standardized_genomic.gtf";
    ns1:hasSoftwarePackage "gffread>=0.12.7" .
  
  sub:rule-iso_analysis_report ns1:hasInput "iso_analysis/plots";
    ns1:hasOutput "iso_analysis/report/isoforms", "iso_analysis/report/usage";
    ns1:hasSoftwarePackage "python>=3.12.4" .
  
  sub:rule-lambda_gene_annotation ns1:hasInput "protein_annotation/index/UniRef.lba.gz",
      "protein_annotation/{factor}_{prop_a}_vs_{prop_b}_de_genes.fa";
    ns1:hasOutput "protein_annotation/blast_results_{factor}_{prop_a}_vs_{prop_b}.m8";
    ns1:hasSoftwarePackage "lambda>=3.1.0" .
  
  sub:rule-map_reads ns1:hasInput "filter/{sample}_filtered.fq", "index/transcriptome_index.mmi";
    ns1:hasOutput "alignments/{sample}.sam";
    ns1:hasSoftwarePackage "v7.6.0" .
  
  sub:rule-merge_read_counts ns1:hasInput "counts/barcode10_salmon/quant.sf", "counts/barcode11_salmon/quant.sf",
      "counts/barcode12_salmon/quant.sf", "counts/barcode13_salmon/quant.sf", "counts/barcode15_salmon/quant.sf",
      "counts/barcode16_salmon/quant.sf";
    ns1:hasOutput "merged/all_counts.tsv";
    ns1:hasSoftwarePackage "pandas>=2.2.2", "python>=3.12.4" .
  
  sub:rule-pca ns1:hasInput "de_analysis/all.rds";
    ns1:hasOutput "de_analysis/pca_{variable}.svg";
    ns1:hasSoftwarePackage "bioconductor-deseq2 =1.46.0", "r-ashr =2.2_63", "r-pheatmap",
      "r-rcolorbrewer", "r-stringr =1.5.1" .
  
  sub:rule-reads_manifest ns1:hasOutput "iso_analysis/reads_manifest.tsv";
    ns1:hasSoftwarePackage "pandas>=2.2.2", "python>=3.12.4" .
  
  sub:rule-sam_to_bam ns1:hasInput "alignments/{sample}.sam";
    ns1:hasOutput "alignments/{sample}.bam";
    ns1:hasSoftwarePackage "v7.6.0" .
  
  sub:rule-sample_qa_plot ns1:hasOutput "NanoPlot/{sample}/NanoPlot-report.html";
    ns1:hasSoftwarePackage "nanoplot" .
  
  sub:rule-standardize_gff ns1:hasInput "references/genomic.gff";
    ns1:hasOutput "references/standardized_genomic.gff";
    ns1:hasSoftwarePackage "agat>=1.4.0" .
  
  sub:rule-total_sample_qa_plot ns1:hasInput "/lustre/project/nhr-zdvhpc/dtest/raw/barcode10.fastq.gz",
      "/lustre/project/nhr-zdvhpc/dtest/raw/barcode11.fastq.gz", "/lustre/project/nhr-zdvhpc/dtest/raw/barcode12.fastq.gz",
      "/lustre/project/nhr-zdvhpc/dtest/raw/barcode13.fastq.gz", "/lustre/project/nhr-zdvhpc/dtest/raw/barcode15.fastq.gz",
      "/lustre/project/nhr-zdvhpc/dtest/raw/barcode16.fastq.gz";
    ns1:hasOutput "NanoPlot/NanoPlot-report.html";
    ns1:hasSoftwarePackage "nanoplot" .
  
  sub:rule-transcriptid_to_gene ns1:hasInput "merged/all_counts.tsv", "references/standardized_genomic.gff";
    ns1:hasOutput "merged/all_counts_gene.tsv", "merged/transcriptid_to_gene_plot.svg";
    ns1:hasSoftwarePackage "anndata=0.10.8", "bioinfokit", "pydeseq2=0.4.10", "seaborn>=0.13.2" .
  
  sub:workflow-configuration rdfs:label "from workflow configuration";
    ns1:hasConfigurationFile sub:config-1 .
  
  sub:workflow-rules rdfs:label "workflow rules";
    ns1:hasWorkflowRule sub:rule-alignment_qa, sub:rule-alignment_qa_report, sub:rule-bam_index,
      sub:rule-bam_sort, sub:rule-bam_stats, sub:rule-bam_to_bed, sub:rule-build_flair_genome_index,
      sub:rule-build_minimap_index, sub:rule-concatenate_beds, sub:rule-correct_transcriptome,
      sub:rule-count_reads, sub:rule-deseq2, sub:rule-deseq2_init, sub:rule-download_ensembl_annotation,
      sub:rule-download_ensembl_genome, sub:rule-download_ncbi_annotation, sub:rule-download_ncbi_genome,
      sub:rule-filter_reads, sub:rule-flair_align, sub:rule-flair_collapse, sub:rule-flair_correct,
      sub:rule-flair_diffexp, sub:rule-flair_plot_isoforms, sub:rule-flair_quantify, sub:rule-generate_gene_query,
      sub:rule-genome_to_transcriptome, sub:rule-get_annotation, sub:rule-get_genome, sub:rule-get_indexed_protein_db,
      sub:rule-get_protein_names, sub:rule-gff_to_gtf, sub:rule-iso_analysis_report, sub:rule-lambda_gene_annotation,
      sub:rule-map_reads, sub:rule-merge_read_counts, sub:rule-pca, sub:rule-reads_manifest,
      sub:rule-sam_to_bam, sub:rule-sample_qa_plot, sub:rule-standardize_gff, sub:rule-total_sample_qa_plot,
      sub:rule-transcriptid_to_gene .
}

sub:provenance {
  sub:assertion prov:generatedAtTime "2026-05-05T19:34:56.982776+02:00"^^xsd:dateTime;
    prov:wasAttributedTo orcid:0000-0003-2408-7588 .
}

sub:pubinfo {
  this: dcterms:created "2026-05-05T17:34:55.389913+00:00"^^xsd:dateTime;
    dcterms:creator orcid:0000-0003-2408-7588;
    npx:hasNanopubType schema:Dataset;
    npx:signedBy orcid:0000-0003-2408-7588;
    rdfs:label "Snakemake workflow metadata: RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE";
    prov:generatedAtTime "2026-05-05T19:34:56.982776+02:00"^^xsd:dateTime;
    prov:wasAttributedTo orcid:0000-0003-2408-7588 .
  
  sub:sig npx:hasAlgorithm "RSA";
    npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAkOFUnnRCp/k9/0ugvx8zQJ+Qc675W1Ug6F839+xvJ2QsSBu4iOJ1O2kJCmb3tALp3gJOt8sffRot3VrfY1hbgXxL7BVtDsfHHmXVff4YCeg5Ycdn5cDpLawDpAdYwMhK0LwIkZ3fwH9/o9JniYKXLV/jpF9bMKyiw/6tqlCHaMW1r8gzZzoxVIAakwvlABoY0iNoToLTlBRXEI4mLUNjDMnMwQgfh1KXMxMruNjW3wJyeDEIfa2ooAt0E4CRM9pkrEb37NzD9Jz8aSUFFY6BvIxF4ixK7rm6IUDvQ76LqXkEmgSeRv1kw7gnCe9wV/wHd0ZeW4heoBXmLHX3MvHfjwIDAQAB";
    npx:hasSignature "W5Jh4ZHvf3gSYE8F3RWMubixYDtbt0a7XLQWOdgYNqiWQ+PtbkT1KzE9Rj6VgWDlPKYSGpeU5WFN2yLJhk47TgyVpKrkBVZKaSbTfT0+BruxdCyhNHPUCzauo2uAAuuKyoGktL4dnyO2zlXtiUXCIJZR1e6kIVbbGBsiD/6t5C46/e5WRtC4k88fYeIlInEQL6IdjqgFWCpgtY06qutPWv3G22lvkDN86Uieb0DxPe3G+shTMW7iNFzkzXqguc9B6yOHcZOKiZCE1iC7QxUoxzq5DaKS0MMmOXwv1UuD36vvFNC5P0OlXiI2fx5Qe/UNvhFdxunbzxFhHh9BMz4S4A==";
    npx:hasSignatureTarget this:;
    npx:signedBy orcid:0000-0003-2408-7588 .
}