From 0508f196e6219cef39a41f6f7f342aa5f3342199 Mon Sep 17 00:00:00 2001 From: Allen Yu Date: Fri, 24 Apr 2026 10:09:46 +0800 Subject: [PATCH 1/2] Escape quality string characters in JSON output --- src/common.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++- src/common.h | 3 +++ src/jsonreporter.cpp | 26 ++++++++++++------------- src/match.cpp | 4 ++-- 4 files changed, 63 insertions(+), 16 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index 6ec9cdb..0914764 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1 +1,45 @@ -#include "common.h" \ No newline at end of file +#include "common.h" + +std::string escapeJsonString(const std::string& input) { + std::string output; + output.reserve(input.size()); + + for(size_t i = 0; i < input.size(); i++) { + const unsigned char c = static_cast(input[i]); + switch(c) { + case '"': + output += "\\\""; + break; + case '\\': + output += "\\\\"; + break; + case '\b': + output += "\\b"; + break; + case '\f': + output += "\\f"; + break; + case '\n': + output += "\\n"; + break; + case '\r': + output += "\\r"; + break; + case '\t': + output += "\\t"; + break; + default: + if(c < 0x20) { + const char* hex = "0123456789abcdef"; + output += "\\u00"; + output += hex[(c >> 4) & 0x0F]; + output += hex[c & 0x0F]; + } else { + output += static_cast(c); + } + break; + } + } + + return output; +} \ No newline at end of file diff --git a/src/common.h b/src/common.h index 911888b..2563f56 100644 --- a/src/common.h +++ b/src/common.h @@ -1,6 +1,8 @@ #ifndef COMMON_H #define COMMON_H +#include + #define FUSIONSCAN_VER "0.8.0" #define _DEBUG true @@ -42,5 +44,6 @@ static const int PACK_IN_MEM_LIMIT = 100; static const int DUPE_NORMAL_LEVEL = -1; static const int DUPE_HIGH_LEVEL = -2; +std::string escapeJsonString(const std::string& input); #endif /* COMMON_H */ diff --git a/src/jsonreporter.cpp b/src/jsonreporter.cpp index 35b7ad3..f5a126a 100644 --- a/src/jsonreporter.cpp +++ b/src/jsonreporter.cpp @@ -19,9 +19,9 @@ extern string command; void JsonReporter::run() { mFile << "{" << endl; - mFile << "\t\"command\":\"" << command << "\"," << endl; + mFile << "\t\"command\":\"" << escapeJsonString(command) << "\"," << endl; mFile << "\t\"version\":\"" << FUSIONSCAN_VER << "\"," << endl; - mFile << "\t\"time\":\"" << getCurrentSystemTime() << "\"," << endl; + mFile << "\t\"time\":\"" << escapeJsonString(getCurrentSystemTime()) << "\"," << endl; mFile << "\t\"fusions\":{"; bool isFirstMut = true; @@ -42,25 +42,25 @@ void JsonReporter::run() { else mFile << "," << endl; - mFile << "\t\t\"" << fusion.mTitle << "\":{" << endl; + mFile << "\t\t\"" << escapeJsonString(fusion.mTitle) << "\":{" << endl; mFile << "\t\t\t\"" << "left" << "\":{" << endl; - mFile << "\t\t\t\t\"" << "gene_name" << "\":" << "\"" << fusion.mLeftGene.mName << "\"," << endl; - mFile << "\t\t\t\t\"" << "gene_chr" << "\":" << "\"" << fusion.mLeftGene.mChr << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_name" << "\":" << "\"" << escapeJsonString(fusion.mLeftGene.mName) << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_chr" << "\":" << "\"" << escapeJsonString(fusion.mLeftGene.mChr) << "\"," << endl; mFile << "\t\t\t\t\"" << "position" << "\":" << fusion.mLeftGene.genePos2ChrPos(fusion.mLeftGP.position) << "," << endl; - mFile << "\t\t\t\t\"" << "reference" << "\":" << "\"" << fusion.mLeftRef << "\"," << endl; - mFile << "\t\t\t\t\"" << "ref_ext" << "\":" << "\"" << fusion.mLeftRefExt << "\"," << endl; - mFile << "\t\t\t\t\"" << "pos_str" << "\":" << "\"" << fusion.mLeftPos << "\"," << endl; + mFile << "\t\t\t\t\"" << "reference" << "\":" << "\"" << escapeJsonString(fusion.mLeftRef) << "\"," << endl; + mFile << "\t\t\t\t\"" << "ref_ext" << "\":" << "\"" << escapeJsonString(fusion.mLeftRefExt) << "\"," << endl; + mFile << "\t\t\t\t\"" << "pos_str" << "\":" << "\"" << escapeJsonString(fusion.mLeftPos) << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron" << "\":" << "\"" << (fusion.mLeftIsExon?"exon":"intron") << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron_id" << "\":" << fusion.mLeftExonOrIntronID << "," << endl; mFile << "\t\t\t\t\"" << "strand" << "\":" << "\"" << (fusion.isLeftProteinForward()?"forward":"reversed") << "\"" << endl; mFile << "\t\t\t}, " << endl; mFile << "\t\t\t\"" << "right" << "\":{" << endl; - mFile << "\t\t\t\t\"" << "gene_name" << "\":" << "\"" << fusion.mRightGene.mName << "\"," << endl; - mFile << "\t\t\t\t\"" << "gene_chr" << "\":" << "\"" << fusion.mRightGene.mChr << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_name" << "\":" << "\"" << escapeJsonString(fusion.mRightGene.mName) << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_chr" << "\":" << "\"" << escapeJsonString(fusion.mRightGene.mChr) << "\"," << endl; mFile << "\t\t\t\t\"" << "position" << "\":" << fusion.mRightGene.genePos2ChrPos(fusion.mRightGP.position) << "," << endl; - mFile << "\t\t\t\t\"" << "reference" << "\":" << "\"" << fusion.mRightRef << "\"," << endl; - mFile << "\t\t\t\t\"" << "ref_ext" << "\":" << "\"" << fusion.mRightRefExt << "\"," << endl; - mFile << "\t\t\t\t\"" << "pos_str" << "\":" << "\"" << fusion.mRightPos << "\"," << endl; + mFile << "\t\t\t\t\"" << "reference" << "\":" << "\"" << escapeJsonString(fusion.mRightRef) << "\"," << endl; + mFile << "\t\t\t\t\"" << "ref_ext" << "\":" << "\"" << escapeJsonString(fusion.mRightRefExt) << "\"," << endl; + mFile << "\t\t\t\t\"" << "pos_str" << "\":" << "\"" << escapeJsonString(fusion.mRightPos) << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron" << "\":" << "\"" << (fusion.mRightIsExon?"exon":"intron") << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron_id" << "\":" << fusion.mRightExonOrIntronID << "," << endl; mFile << "\t\t\t\t\"" << "strand" << "\":" << "\"" << (fusion.isRightProteinForward()?"forward":"reversed") << "\"" << endl; diff --git a/src/match.cpp b/src/match.cpp index 1f81a6d..10c8fab 100644 --- a/src/match.cpp +++ b/src/match.cpp @@ -89,6 +89,6 @@ int Match::countUnique(vector& matches) { } void Match::printReadToJson(ofstream& file, string pad) { - file << pad << "\"seq\":" << "\"" << mRead->mSeq.mStr << "\"," << endl; - file << pad << "\"qual\":" << "\"" << mRead->mQuality << "\"" << endl; + file << pad << "\"seq\":" << "\"" << escapeJsonString(mRead->mSeq.mStr) << "\"," << endl; + file << pad << "\"qual\":" << "\"" << escapeJsonString(mRead->mQuality) << "\"" << endl; } From df7dc0f3a728197b1d498e0ed18f9b06d603366c Mon Sep 17 00:00:00 2001 From: Allen Yu Date: Fri, 24 Apr 2026 10:28:16 +0800 Subject: [PATCH 2/2] Fix JSON escaping and stream escaped output --- src/common.cpp | 29 ++++++++++-------------- src/common.h | 3 ++- src/jsonreporter.cpp | 52 +++++++++++++++++++++++++++++++++----------- src/match.cpp | 8 +++++-- 4 files changed, 59 insertions(+), 33 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index 0914764..54a83ff 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1,45 +1,40 @@ #include "common.h" -std::string escapeJsonString(const std::string& input) { - std::string output; - output.reserve(input.size()); - +void writeEscapedJsonString(std::ostream& output, const std::string& input) { for(size_t i = 0; i < input.size(); i++) { const unsigned char c = static_cast(input[i]); switch(c) { case '"': - output += "\\\""; + output << "\\\""; break; case '\\': - output += "\\\\"; + output << "\\\\"; break; case '\b': - output += "\\b"; + output << "\\b"; break; case '\f': - output += "\\f"; + output << "\\f"; break; case '\n': - output += "\\n"; + output << "\\n"; break; case '\r': - output += "\\r"; + output << "\\r"; break; case '\t': - output += "\\t"; + output << "\\t"; break; default: if(c < 0x20) { const char* hex = "0123456789abcdef"; - output += "\\u00"; - output += hex[(c >> 4) & 0x0F]; - output += hex[c & 0x0F]; + output << "\\u00"; + output << hex[(c >> 4) & 0x0F]; + output << hex[c & 0x0F]; } else { - output += static_cast(c); + output << static_cast(c); } break; } } - - return output; } \ No newline at end of file diff --git a/src/common.h b/src/common.h index 2563f56..ed9c3bf 100644 --- a/src/common.h +++ b/src/common.h @@ -1,6 +1,7 @@ #ifndef COMMON_H #define COMMON_H +#include #include #define FUSIONSCAN_VER "0.8.0" @@ -44,6 +45,6 @@ static const int PACK_IN_MEM_LIMIT = 100; static const int DUPE_NORMAL_LEVEL = -1; static const int DUPE_HIGH_LEVEL = -2; -std::string escapeJsonString(const std::string& input); +void writeEscapedJsonString(std::ostream& output, const std::string& input); #endif /* COMMON_H */ diff --git a/src/jsonreporter.cpp b/src/jsonreporter.cpp index f5a126a..9d2ead0 100644 --- a/src/jsonreporter.cpp +++ b/src/jsonreporter.cpp @@ -19,9 +19,13 @@ extern string command; void JsonReporter::run() { mFile << "{" << endl; - mFile << "\t\"command\":\"" << escapeJsonString(command) << "\"," << endl; + mFile << "\t\"command\":\""; + writeEscapedJsonString(mFile, command); + mFile << "\"," << endl; mFile << "\t\"version\":\"" << FUSIONSCAN_VER << "\"," << endl; - mFile << "\t\"time\":\"" << escapeJsonString(getCurrentSystemTime()) << "\"," << endl; + mFile << "\t\"time\":\""; + writeEscapedJsonString(mFile, getCurrentSystemTime()); + mFile << "\"," << endl; mFile << "\t\"fusions\":{"; bool isFirstMut = true; @@ -42,25 +46,47 @@ void JsonReporter::run() { else mFile << "," << endl; - mFile << "\t\t\"" << escapeJsonString(fusion.mTitle) << "\":{" << endl; + mFile << "\t\t\""; + writeEscapedJsonString(mFile, fusion.mTitle); + mFile << "\":{" << endl; mFile << "\t\t\t\"" << "left" << "\":{" << endl; - mFile << "\t\t\t\t\"" << "gene_name" << "\":" << "\"" << escapeJsonString(fusion.mLeftGene.mName) << "\"," << endl; - mFile << "\t\t\t\t\"" << "gene_chr" << "\":" << "\"" << escapeJsonString(fusion.mLeftGene.mChr) << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_name" << "\":\""; + writeEscapedJsonString(mFile, fusion.mLeftGene.mName); + mFile << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_chr" << "\":\""; + writeEscapedJsonString(mFile, fusion.mLeftGene.mChr); + mFile << "\"," << endl; mFile << "\t\t\t\t\"" << "position" << "\":" << fusion.mLeftGene.genePos2ChrPos(fusion.mLeftGP.position) << "," << endl; - mFile << "\t\t\t\t\"" << "reference" << "\":" << "\"" << escapeJsonString(fusion.mLeftRef) << "\"," << endl; - mFile << "\t\t\t\t\"" << "ref_ext" << "\":" << "\"" << escapeJsonString(fusion.mLeftRefExt) << "\"," << endl; - mFile << "\t\t\t\t\"" << "pos_str" << "\":" << "\"" << escapeJsonString(fusion.mLeftPos) << "\"," << endl; + mFile << "\t\t\t\t\"" << "reference" << "\":\""; + writeEscapedJsonString(mFile, fusion.mLeftRef); + mFile << "\"," << endl; + mFile << "\t\t\t\t\"" << "ref_ext" << "\":\""; + writeEscapedJsonString(mFile, fusion.mLeftRefExt); + mFile << "\"," << endl; + mFile << "\t\t\t\t\"" << "pos_str" << "\":\""; + writeEscapedJsonString(mFile, fusion.mLeftPos); + mFile << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron" << "\":" << "\"" << (fusion.mLeftIsExon?"exon":"intron") << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron_id" << "\":" << fusion.mLeftExonOrIntronID << "," << endl; mFile << "\t\t\t\t\"" << "strand" << "\":" << "\"" << (fusion.isLeftProteinForward()?"forward":"reversed") << "\"" << endl; mFile << "\t\t\t}, " << endl; mFile << "\t\t\t\"" << "right" << "\":{" << endl; - mFile << "\t\t\t\t\"" << "gene_name" << "\":" << "\"" << escapeJsonString(fusion.mRightGene.mName) << "\"," << endl; - mFile << "\t\t\t\t\"" << "gene_chr" << "\":" << "\"" << escapeJsonString(fusion.mRightGene.mChr) << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_name" << "\":\""; + writeEscapedJsonString(mFile, fusion.mRightGene.mName); + mFile << "\"," << endl; + mFile << "\t\t\t\t\"" << "gene_chr" << "\":\""; + writeEscapedJsonString(mFile, fusion.mRightGene.mChr); + mFile << "\"," << endl; mFile << "\t\t\t\t\"" << "position" << "\":" << fusion.mRightGene.genePos2ChrPos(fusion.mRightGP.position) << "," << endl; - mFile << "\t\t\t\t\"" << "reference" << "\":" << "\"" << escapeJsonString(fusion.mRightRef) << "\"," << endl; - mFile << "\t\t\t\t\"" << "ref_ext" << "\":" << "\"" << escapeJsonString(fusion.mRightRefExt) << "\"," << endl; - mFile << "\t\t\t\t\"" << "pos_str" << "\":" << "\"" << escapeJsonString(fusion.mRightPos) << "\"," << endl; + mFile << "\t\t\t\t\"" << "reference" << "\":\""; + writeEscapedJsonString(mFile, fusion.mRightRef); + mFile << "\"," << endl; + mFile << "\t\t\t\t\"" << "ref_ext" << "\":\""; + writeEscapedJsonString(mFile, fusion.mRightRefExt); + mFile << "\"," << endl; + mFile << "\t\t\t\t\"" << "pos_str" << "\":\""; + writeEscapedJsonString(mFile, fusion.mRightPos); + mFile << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron" << "\":" << "\"" << (fusion.mRightIsExon?"exon":"intron") << "\"," << endl; mFile << "\t\t\t\t\"" << "exon_or_intron_id" << "\":" << fusion.mRightExonOrIntronID << "," << endl; mFile << "\t\t\t\t\"" << "strand" << "\":" << "\"" << (fusion.isRightProteinForward()?"forward":"reversed") << "\"" << endl; diff --git a/src/match.cpp b/src/match.cpp index 10c8fab..0d40757 100644 --- a/src/match.cpp +++ b/src/match.cpp @@ -89,6 +89,10 @@ int Match::countUnique(vector& matches) { } void Match::printReadToJson(ofstream& file, string pad) { - file << pad << "\"seq\":" << "\"" << escapeJsonString(mRead->mSeq.mStr) << "\"," << endl; - file << pad << "\"qual\":" << "\"" << escapeJsonString(mRead->mQuality) << "\"" << endl; + file << pad << "\"seq\":\""; + writeEscapedJsonString(file, mRead->mSeq.mStr); + file << "\"," << endl; + file << pad << "\"qual\":\""; + writeEscapedJsonString(file, mRead->mQuality); + file << "\"" << endl; }