Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • get-nextflow-ngl-bi/wf-illumina-nf
1 result
Show changes
Commits on Source (7)
......@@ -55,7 +55,7 @@ THREADS 8
## have contaminated your sample during the library preparation step.
##
Genome of E. coli
DATABASE E.coli /home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069
DATABASE Bacterium /home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069
Sequence of PhiX
DATABASE PhiX /home/sbsuser/plage/references/indexed/ng6_conta_ref/PhiX/BWA/phi.fa
......
......@@ -6,7 +6,7 @@ intro_text: "This MultiQC report summarise Quality Control analysis results."
report_comment: >
You can find detailed guidance on how to interpret the following graphs in the <a href="https://bios4biol.pages.mia.inra.fr/Help4MultiQC/" target="_blank"><strong>Help4MultiQC GitBook</strong></a>.
Developed by some members of the CATI Bios4Biol team at INRAE, this resource provides valuable insights into data interpretation. While it was not specifically created for this report, it offers complementary information that may help deepen your understanding of the presented data.
Developed by some members of the CATI Bios4Biol team at INRAE, this resource provides valuable insights into data interpretation. While it was not specifically created for this report, it offers complementary information that may enhance your understanding of the presented data.
show_analysis_paths: False
show_analysis_time: False
......@@ -59,8 +59,9 @@ remove_sections:
- flash-histogram
section_comments:
fastqc_sequence_counts: "Tips : Use this graph to visualize the amount of each samples. Shouldn't be use to determine the proportion of duplicated reads (see 'General Statistics')."
fastqc_sequence_counts: "Tip : Use this graph to visualize the amount of each samples. Shouldn't be used to determine the proportion of duplicated reads (see 'General Statistics')."
sortmerna: "Total rRNA percentage is available in the 'General Statistics'. Non-rRNA sequences are NOT USED for this graph."
fastq_screen: "This analysis is performed by permissive alignments using BWA, which display a certain amount of false positive hits. Thus, don't worry if a low percentage of contamination is present.<br><i>NB: This analysis has limited interest on transcriptomic reads.</i>"
module_order:
- fastqc:
......@@ -76,7 +77,6 @@ module_order:
- fastq_screen:
name: "ContaminationSearch"
#info: "This section shows the module with different files"
info: "This analysis is performed by permissive alignments using BWA, which display a certain amount of false positive hits. Thus, don't worry if a low percentage of contamination is present. This analysis has limited interest on transcriptomic reads.<br>"
target: "FastQ-Screen"
- sortmerna:
name: "ContaminationSearch - rRNA"
......
......@@ -232,6 +232,12 @@ process {
]
}
withNAme: FQ_HEADER_RENAME {
time = { checkMax( 2.h * task.attempt, 'time' ) }
memory = { checkMax( 5.GB * task.attempt * params.resource_factor, 'memory' ) }
cpus = 8
}
// ----- WithLabel
withLabel: littleJob {
executor = 'local'
......
......@@ -29,5 +29,5 @@ manifest {
description = "Workflow for Illumina data quality control"
mainScript = 'main.nf'
nextflowVersion = '>=0.32.0'
version = '1.27.0'
version = '1.27.6'
}
\ No newline at end of file
......@@ -26,3 +26,31 @@ process DEMUX_STATS {
$threshold
"""
}
process FQ_HEADER_RENAME {
tag "$meta.name"
input:
tuple val(meta), path(fastx)
output:
tuple val(meta), path("${fileName}.${extension}"), emit: file
script:
def args = task.ext.args ?: ''
extension = "fastq"
if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) {
extension = "fasta"
}
fileName = fastx.toString() - ".${extension}" - '.gz' // remove also .gz if exists
if (fastx.toString().endsWith('.gz')) {
"""
pigz $args -dc -p ${task.cpus} ${fastx} | \
awk -F':' 'BEGIN{OFS=":"} {if (\$0 ~ /^@/) {\$2=""; sub("::", ":", \$0)} print}' - > ${fileName}.${extension}
"""
} else {
"""
awk -F':' 'BEGIN{OFS=":"} {if (\$0 ~ /^@/) {\$2=""; sub("::", ":", \$0)} print}' ${fastx} > ${fileName}.${extension}
"""
}
}
\ No newline at end of file
......@@ -13,11 +13,13 @@
include { BEGIN_NGLBI as BEGIN } from "${params.shared_modules}/workflows/begin_nglbi.nf"
include { COPY_TO_SAVE } from "${params.shared_modules}/workflows/copy_to_save.nf"
include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC} from "${params.shared_modules}/ngl_bi.nf"
include { GZIP } from "${params.shared_modules}/gzip.nf"
include { FQ_HEADER_RENAME } from "$baseDir/modules/local/module_core_element.nf"
include { FILE_RENAME as RENAME_FASTQ } from "$baseDir/modules/local/module_NGL-Bi.nf"
include { FILE_RENAME as RENAME_INDEX } from "$baseDir/modules/local/module_NGL-Bi.nf"
include { NGSRG_ILLUMINA } from "$baseDir/modules/local/module_NGL-Bi.nf"
include { NGSRG_ELEMBIO; } from "$baseDir/modules/local/module_NGL-Bi.nf"
include { NGSRG_ELEMBIO } from "$baseDir/modules/local/module_NGL-Bi.nf"
// -------------------------------------------------
// WORKFLOW
// -------------------------------------------------
......@@ -66,6 +68,12 @@ workflow NGL {
RENAME_FASTQ(fastq.map{it[1]}.collect(), readsets_created, sq_xp_code, 'fastq_read')
fastq_files = fastq_files.mix(RENAME_FASTQ.out.fastq.ifEmpty([]))
if (sequencer_name =~ "AVITI") {
FQ_HEADER_RENAME(fastq_files.flatten().map{it -> [[name: it.simpleName], it]})
GZIP(FQ_HEADER_RENAME.out.file)
fastq_files = GZIP.out.archive.map{it -> it[1]}
}
fq = fastq_files
.flatMap()
.map { it -> [[type: 'fastq', barcode:'all'], it]}
......