Compare revisions

Jules Sabban · Jules Sabban · Jules Sabban · Jules Sabban · Jules Sabban · Jules Sabban
--- a/assets/fastq_screen.conf_example
+++ b/assets/fastq_screen.conf_example
@@ -55,7 +55,7 @@ THREADS		8
 ## have contaminated your sample during the library preparation step.
 ##
 Genome of E. coli
-DATABASE	E.coli	/home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069
+DATABASE	Bacterium	/home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069

 Sequence of PhiX
 DATABASE	PhiX	/home/sbsuser/plage/references/indexed/ng6_conta_ref/PhiX/BWA/phi.fa

--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -6,7 +6,7 @@ intro_text: "This MultiQC report summarise Quality Control analysis results."

 report_comment: >
  You can find detailed guidance on how to interpret the following graphs in the <a href="https://bios4biol.pages.mia.inra.fr/Help4MultiQC/" target="_blank"><strong>Help4MultiQC GitBook</strong></a>.  
-  Developed by some members of the CATI Bios4Biol team at INRAE, this resource provides valuable insights into data interpretation. While it was not specifically created for this report, it offers complementary information that may help deepen your understanding of the presented data.
+  Developed by some members of the CATI Bios4Biol team at INRAE, this resource provides valuable insights into data interpretation. While it was not specifically created for this report, it offers complementary information that may enhance your understanding of the presented data.

 show_analysis_paths: False
 show_analysis_time: False
@@ -59,8 +59,9 @@ remove_sections:
  - flash-histogram

 section_comments:
-  fastqc_sequence_counts: "Tips : Use this graph to visualize the amount of each samples. Shouldn't be use to determine the proportion of duplicated reads (see 'General Statistics')."
+  fastqc_sequence_counts: "Tip : Use this graph to visualize the amount of each samples. Shouldn't be used to determine the proportion of duplicated reads (see 'General Statistics')."
  sortmerna: "Total rRNA percentage is available in the 'General Statistics'. Non-rRNA sequences are NOT USED for this graph."
+  fastq_screen: "This analysis is performed by permissive alignments using BWA, which display a certain amount of false positive hits. Thus, don't worry if a low percentage of contamination is present.<br><i>NB: This analysis has limited interest on transcriptomic reads.</i>"

 module_order:
  - fastqc:
@@ -76,7 +77,6 @@ module_order:
  - fastq_screen:
        name: "ContaminationSearch"
        #info: "This section shows the module with different files"
-        info: "This analysis is performed by permissive alignments using BWA, which display a certain amount of false positive hits. Thus, don't worry if a low percentage of contamination is present. This analysis has limited interest on transcriptomic reads.<br>"
        target: "FastQ-Screen"
  - sortmerna:
        name: "ContaminationSearch - rRNA"

--- a/conf/base.config
+++ b/conf/base.config
@@ -232,6 +232,12 @@ process {
 		]
 	}

+	withNAme: FQ_HEADER_RENAME {
+		time = { checkMax( 2.h * task.attempt, 'time' ) }
+		memory = { checkMax( 5.GB * task.attempt * params.resource_factor, 'memory' ) }
+		cpus = 8
+	}
+
 	// ----- WithLabel
 	withLabel: littleJob {
 		executor = 'local'

--- a/conf/report.config
+++ b/conf/report.config
@@ -29,5 +29,5 @@ manifest {
 	description = "Workflow for Illumina data quality control"
 	mainScript = 'main.nf'
 	nextflowVersion = '>=0.32.0'
-	version = '1.27.0'
+	version = '1.27.6'
 }
\ No newline at end of file
--- a/modules/local/module_core_element.nf
+++ b/modules/local/module_core_element.nf
@@ -26,3 +26,31 @@ process DEMUX_STATS {
        $threshold
 	"""
 }
+
+process FQ_HEADER_RENAME {
+	tag "$meta.name"
+
+	input:
+		tuple val(meta), path(fastx)
+
+	output:
+		tuple val(meta), path("${fileName}.${extension}"), emit: file
+
+	script:
+	def args = task.ext.args ?: ''
+    extension = "fastq"
+    if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) {
+        extension = "fasta"
+    }
+    fileName = fastx.toString() - ".${extension}" - '.gz'	// remove also .gz if exists
+	if (fastx.toString().endsWith('.gz')) {
+	"""
+		pigz $args -dc -p ${task.cpus} ${fastx} | \
+		awk -F':' 'BEGIN{OFS=":"} {if (\$0 ~ /^@/) {\$2=""; sub("::", ":", \$0)} print}' - > ${fileName}.${extension}
+	"""
+	} else {
+	"""
+		awk -F':' 'BEGIN{OFS=":"} {if (\$0 ~ /^@/) {\$2=""; sub("::", ":", \$0)} print}' ${fastx} > ${fileName}.${extension}
+    """
+	}
+}
\ No newline at end of file
--- a/sub-workflows/local/ngl.nf
+++ b/sub-workflows/local/ngl.nf
@@ -13,11 +13,13 @@
 include {	BEGIN_NGLBI as BEGIN							} from "${params.shared_modules}/workflows/begin_nglbi.nf"
 include {	COPY_TO_SAVE									} from "${params.shared_modules}/workflows/copy_to_save.nf"
 include {	UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC} from "${params.shared_modules}/ngl_bi.nf"
+include {	GZIP											} from "${params.shared_modules}/gzip.nf"

+include {	FQ_HEADER_RENAME								} from "$baseDir/modules/local/module_core_element.nf"
 include {	FILE_RENAME as RENAME_FASTQ						} from "$baseDir/modules/local/module_NGL-Bi.nf"
 include {	FILE_RENAME as RENAME_INDEX						} from "$baseDir/modules/local/module_NGL-Bi.nf"
 include {	NGSRG_ILLUMINA									} from "$baseDir/modules/local/module_NGL-Bi.nf"
-include {	NGSRG_ELEMBIO;					 				} from "$baseDir/modules/local/module_NGL-Bi.nf"
+include {	NGSRG_ELEMBIO					 				} from "$baseDir/modules/local/module_NGL-Bi.nf"
 // -------------------------------------------------
 // 					WORKFLOW
 // -------------------------------------------------
@@ -66,6 +68,12 @@ workflow NGL {
 		RENAME_FASTQ(fastq.map{it[1]}.collect(), readsets_created, sq_xp_code, 'fastq_read')
 		fastq_files = fastq_files.mix(RENAME_FASTQ.out.fastq.ifEmpty([]))
 		
+		if (sequencer_name =~ "AVITI") {
+			FQ_HEADER_RENAME(fastq_files.flatten().map{it -> [[name: it.simpleName], it]})
+			GZIP(FQ_HEADER_RENAME.out.file)
+			fastq_files = GZIP.out.archive.map{it -> it[1]}
+		}
+		
 		fq = fastq_files
 			.flatMap()
 			.map { it -> [[type: 'fastq', barcode:'all'], it]}
No results found