Honor allowComments==false mode.

Much of the comment handling in the parsers is bespoke, and does not honor this flag. By unfiying it under a common API, the parser is simplified and strict mode is now more correctly strict. Note that allowComments mode does not allow for comments in arbitrary locations; they are allowed only in certain positions. Rectifying this is a bigger effort, since collectComments mode requires storing the comments somewhere, and it's not immediately clear where in the DOM all such comments should live.
2023-07-19 14:59:14 -04:00 · 2023-07-19 14:59:14 -04:00 · 006b5dfe57
commit 006b5dfe57
parent eb21dc0697
5 changed files with 35 additions and 48 deletions
--- a/include/json/reader.h
+++ b/include/json/reader.h
@ -190,6 +190,7 @@ private:
  using Errors = std::deque<ErrorInfo>;

  bool readToken(Token& token);
+  bool readTokenSkippingComments(Token& token);
  void skipSpaces();
  bool match(const Char* pattern, int patternLength);
  bool readComment();
@ -221,7 +222,6 @@ private:
                                int& column) const;
  String getLocationLineAndColumn(Location location) const;
  void addComment(Location begin, Location end, CommentPlacement placement);
-  void skipCommentTokens(Token& token);

  static bool containsNewLine(Location begin, Location end);
  static String normalizeEOL(Location begin, Location end);
--- a/src/lib_json/json_reader.cpp
+++ b/src/lib_json/json_reader.cpp
@ -129,7 +129,7 @@ bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,

  bool successful = readValue();
  Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
  if (collectComments_ && !commentsBefore_.empty())
    root.setComment(commentsBefore_, commentAfter);
  if (features_.strictRoot_) {
@ -157,7 +157,7 @@ bool Reader::readValue() {
    throwRuntimeError("Exceeded stackLimit in readValue().");

  Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
  bool successful = true;

  if (collectComments_ && !commentsBefore_.empty()) {
@ -225,14 +225,14 @@ bool Reader::readValue() {
  return successful;
 }

-void Reader::skipCommentTokens(Token& token) {
+bool Reader::readTokenSkippingComments(Token& token) {
+  bool success = readToken(token);
  if (features_.allowComments_) {
-    do {
-      readToken(token);
-    } while (token.type_ == tokenComment);
-  } else {
-    readToken(token);
+    while (success && token.type_ == tokenComment) {
+      success = readToken(token);
+    }
  }
+  return success;
 }

 bool Reader::readToken(Token& token) {
@ -446,12 +446,7 @@ bool Reader::readObject(Token& token) {
  Value init(objectValue);
  currentValue().swapPayload(init);
  currentValue().setOffsetStart(token.start_ - begin_);
-  while (readToken(tokenName)) {
-    bool initialTokenOk = true;
-    while (tokenName.type_ == tokenComment && initialTokenOk)
-      initialTokenOk = readToken(tokenName);
-    if (!initialTokenOk)
-      break;
+  while (readTokenSkippingComments(tokenName)) {
    if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
      return true;
    name.clear();
@ -480,11 +475,7 @@ bool Reader::readObject(Token& token) {
      return recoverFromError(tokenObjectEnd);

    Token comma;
-    bool finalizeTokenOk = readToken(comma);
-    while (comma.type_ == tokenComment && finalizeTokenOk) {
-      finalizeTokenOk = readToken(comma);
-    }
-    if (!finalizeTokenOk ||
+    if (!readTokenSkippingComments(comma) ||
        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
      return addErrorAndRecover("Missing ',' or '}' in object declaration",
                                comma, tokenObjectEnd);
@ -518,10 +509,7 @@ bool Reader::readArray(Token& token) {

    Token currentToken;
    // Accept Comment after last item in the array.
-    ok = readToken(currentToken);
-    while (currentToken.type_ == tokenComment && ok) {
-      ok = readToken(currentToken);
-    }
+    ok = readTokenSkippingComments(currentToken);
    bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
                         currentToken.type_ != tokenArrayEnd);
    if (!ok || badTokenType) {
@ -943,6 +931,7 @@ private:
  using Errors = std::deque<ErrorInfo>;

  bool readToken(Token& token);
+  bool readTokenSkippingComments(Token& token);
  void skipSpaces();
  void skipBom(bool skipBom);
  bool match(const Char* pattern, int patternLength);
@ -976,7 +965,6 @@ private:
                                int& column) const;
  String getLocationLineAndColumn(Location location) const;
  void addComment(Location begin, Location end, CommentPlacement placement);
-  void skipCommentTokens(Token& token);

  static String normalizeEOL(Location begin, Location end);
  static bool containsNewLine(Location begin, Location end);
@ -1030,7 +1018,7 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
  bool successful = readValue();
  nodes_.pop();
  Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
  if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
    addError("Extra non-whitespace after JSON value.", token);
    return false;
@ -1058,7 +1046,7 @@ bool OurReader::readValue() {
  if (nodes_.size() > features_.stackLimit_)
    throwRuntimeError("Exceeded stackLimit in readValue().");
  Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
  bool successful = true;

  if (collectComments_ && !commentsBefore_.empty()) {
@ -1145,14 +1133,14 @@ bool OurReader::readValue() {
  return successful;
 }

-void OurReader::skipCommentTokens(Token& token) {
+bool OurReader::readTokenSkippingComments(Token& token) {
+  bool success = readToken(token);
  if (features_.allowComments_) {
-    do {
-      readToken(token);
-    } while (token.type_ == tokenComment);
-  } else {
-    readToken(token);
+    while (success && token.type_ == tokenComment) {
+      success = readToken(token);
+    }
  }
+  return success;
 }

 bool OurReader::readToken(Token& token) {
@ -1449,12 +1437,7 @@ bool OurReader::readObject(Token& token) {
  Value init(objectValue);
  currentValue().swapPayload(init);
  currentValue().setOffsetStart(token.start_ - begin_);
-  while (readToken(tokenName)) {
-    bool initialTokenOk = true;
-    while (tokenName.type_ == tokenComment && initialTokenOk)
-      initialTokenOk = readToken(tokenName);
-    if (!initialTokenOk)
-      break;
+  while (readTokenSkippingComments(tokenName)) {
    if (tokenName.type_ == tokenObjectEnd &&
        (name.empty() ||
         features_.allowTrailingCommas_)) // empty object or trailing comma
@ -1491,11 +1474,7 @@ bool OurReader::readObject(Token& token) {
      return recoverFromError(tokenObjectEnd);

    Token comma;
-    bool finalizeTokenOk = readToken(comma);
-    while (comma.type_ == tokenComment && finalizeTokenOk) {
-      finalizeTokenOk = readToken(comma);
-    }
-    if (!finalizeTokenOk ||
+    if (!readTokenSkippingComments(comma) ||
        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
      return addErrorAndRecover("Missing ',' or '}' in object declaration",
                                comma, tokenObjectEnd);
@ -1533,10 +1512,7 @@ bool OurReader::readArray(Token& token) {

    Token currentToken;
    // Accept Comment after last item in the array.
-    ok = readToken(currentToken);
-    while (currentToken.type_ == tokenComment && ok) {
-      ok = readToken(currentToken);
-    }
+    ok = readTokenSkippingComments(currentToken);
    bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
                         currentToken.type_ != tokenArrayEnd);
    if (!ok || badTokenType) {
--- a/test/data/fail_strict_comment_01.json
+++ b/test/data/fail_strict_comment_01.json
@ -0,0 +1,4 @@
+{
+  "a": "aaa",
+  "b": "bbb" // comments not allowed in strict mode
+}
--- a/test/data/fail_strict_comment_02.json
+++ b/test/data/fail_strict_comment_02.json
@ -0,0 +1,4 @@
+{
+  "a": "aaa", // comments not allowed in strict mode
+  "b": "bbb"
+}
--- a/test/data/fail_strict_comment_03.json
+++ b/test/data/fail_strict_comment_03.json
@ -0,0 +1,3 @@
+{
+  "array" : [1, 2, 3 /* comments not allowed in strict mode */]
+}