diff --git a/lib/ts_utils/utils.py b/lib/ts_utils/utils.py index bf87def015bf..d7dd6ac0fcba 100644 --- a/lib/ts_utils/utils.py +++ b/lib/ts_utils/utils.py @@ -28,6 +28,24 @@ def colored(text: str, color: str | None = None, **kwargs: Any) -> str: # type: return text +_REMOVE_COMMENT_RE = re.compile( + r""" + (\"(?:\\.|[^\\\"])*?\") # matches literal strings + | + (\/\*.*?\*\/ | \/\/[^\r\n]*?(?:[\r\n])) # matches single- and multi-line comments + """, + re.DOTALL | re.VERBOSE, +) +_REMOVE_TRAILING_COMMA_RE = re.compile( + r""" + (\"(?:\\.|[^\\\"])*?\") # matches literal strings + | + ,\s*([\]}]) # matches commas before '}' or ']' + """, + re.DOTALL | re.VERBOSE, +) + + PYTHON_VERSION: Final = f"{sys.version_info.major}.{sys.version_info.minor}" @@ -35,13 +53,15 @@ def strip_comments(text: str) -> str: return text.split("#")[0].strip() -def json5_to_json(text: str) -> str: - """Incomplete conversion from JSON5-like input to valid JSON.""" - # Remove full-line // comments only - # (Can not remove inline comments) - text = re.sub(r"(?m)^\s*//.*\n?", "", text) +def jsonc_to_json(text: str) -> str: + """Conversion from JSONC format input to valid JSON.""" + # Remove comments + if not text.endswith("\n"): + text += "\n" + text = _REMOVE_COMMENT_RE.sub(lambda m: m.group(1) or "", text) + # Remove trailing commas before } or ] - text = re.sub(r",\s*([}\]])", r"\1", text) + text = _REMOVE_TRAILING_COMMA_RE.sub(lambda m: m.group(1) or m.group(2), text) return text diff --git a/tests/check_typeshed_structure.py b/tests/check_typeshed_structure.py index dd986b3f3e8b..8c3892ff04a6 100755 --- a/tests/check_typeshed_structure.py +++ b/tests/check_typeshed_structure.py @@ -17,7 +17,7 @@ from ts_utils.utils import ( get_all_testcase_directories, get_gitignore_spec, - json5_to_json, + jsonc_to_json, parse_requirements, parse_stdlib_versions_file, spec_matches_path, @@ -178,7 +178,7 @@ def check_requirement_pins() -> None: def check_pyright_exclude_order() -> None: """Check that 'exclude' entries in pyrightconfig.stricter.json are sorted alphabetically.""" text = PYRIGHT_CONFIG.read_text(encoding="utf-8") - text = json5_to_json(text) + text = jsonc_to_json(text) data = json.loads(text) exclude: list[str] = data.get("exclude", [])