commit 610939e0b14ba1d0c5727058de651ce1afa0e10d from: Murilo Ijanc date: Fri Apr 17 20:02:31 2026 UTC initial import of jackson, a single-file JSON library commit - /dev/null commit + 610939e0b14ba1d0c5727058de651ce1afa0e10d blob - /dev/null blob + 80b31097717545e53b8f84e85553f12bbde9c274 (mode 644) --- /dev/null +++ .gitignore @@ -0,0 +1,3 @@ +build +public +vendor blob - /dev/null blob + df99c69198f5813df5fc3eaa007a2af0e60a7bbd (mode 644) --- /dev/null +++ .rustfmt.toml @@ -0,0 +1 @@ +max_width = 80 blob - /dev/null blob + 32f3292e31bbd5acf75b7671b679e9017828078c (mode 644) --- /dev/null +++ LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2026 Murilo Ijanc' + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. blob - /dev/null blob + 2fa3c5de8030442c09958f14243f440d9e39684a (mode 644) --- /dev/null +++ Makefile @@ -0,0 +1,142 @@ +# +# Copyright (c) 2026 Murilo Ijanc' +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# + +RUSTC ?= $(shell rustup which rustc 2>/dev/null || which rustc) +RUSTFLAGS ?= -C opt-level=2 +VERSION = 0.1.0 +CURL ?= curl + +BUILD = build +VENDOR = vendor +SRC = jackson.rs +LIB = $(BUILD)/libjackson.rlib +TEST = $(BUILD)/jackson-test +SUITE_SRC = tests/suite.rs +SUITE = $(BUILD)/suite + +BENCH_SRC = bench/bench.rs +BENCH_JACKSON = $(BUILD)/bench-jackson +BENCH_TINYJSON = $(BUILD)/bench-tinyjson +BENCH_ITERS ?= 100 +FIXTURE = $(VENDOR)/fixture.json +FIXTURE_URL = https://github.com/miloyip/nativejson-benchmark/raw/master/data/twitter.json + +TINYJSON_SRC ?= ../tinyjson/src/lib.rs +TINYJSON_LIB = $(BUILD)/libtinyjson.rlib + +JSON_CHECKER_URL = https://www.json.org/JSON_checker/test.zip +JSON_TEST_SUITE_URL = https://github.com/nst/JSONTestSuite +JSON_CHECKER_DIR = $(VENDOR)/JSON_checker +JSON_TEST_SUITE_DIR = $(VENDOR)/JSONTestSuite + +CLIPPY ?= $(shell rustup which clippy-driver 2>/dev/null) +RUSTFMT ?= $(shell rustup which rustfmt 2>/dev/null) +RUSTDOC ?= $(shell rustup which rustdoc 2>/dev/null || which rustdoc) + +DOC = $(BUILD)/doc/jackson/index.html + +.PHONY: all clean test fmt-check clippy ci suite suite-fetch doc \ + bench bench-fetch + +all: $(LIB) + +$(LIB): $(SRC) + mkdir -p $(BUILD) + JACKSON_VERSION=$(VERSION) $(RUSTC) --edition 2024 \ + --crate-type rlib --crate-name jackson $(RUSTFLAGS) \ + -o $@ $< + +$(TEST): $(SRC) + mkdir -p $(BUILD) + JACKSON_VERSION=$(VERSION) $(RUSTC) --edition 2024 \ + --test --crate-name jackson -o $@ $< + +test: $(TEST) + $(TEST) + +fmt-check: + $(RUSTFMT) --edition 2024 --check $(SRC) + +clippy: + mkdir -p $(BUILD) + JACKSON_VERSION=$(VERSION) $(CLIPPY) --edition 2024 \ + --crate-type rlib --crate-name jackson \ + -W clippy::all -o $(BUILD)/jackson.clippy $(SRC) + @rm -f $(BUILD)/jackson.clippy + +ci: fmt-check clippy $(LIB) test + +$(DOC): $(SRC) + mkdir -p $(BUILD) + $(RUSTDOC) --edition 2024 --crate-name jackson \ + -o $(BUILD)/doc $(SRC) + +doc: $(DOC) + +suite-fetch: $(JSON_CHECKER_DIR) $(JSON_TEST_SUITE_DIR) + +$(JSON_CHECKER_DIR): + mkdir -p $(VENDOR) + $(CURL) -sL $(JSON_CHECKER_URL) -o $(VENDOR)/test.zip + rm -rf $(VENDOR)/_jc_tmp + unzip -q $(VENDOR)/test.zip -d $(VENDOR)/_jc_tmp + mv $(VENDOR)/_jc_tmp/test $(JSON_CHECKER_DIR) + rm -rf $(VENDOR)/_jc_tmp $(VENDOR)/test.zip + +$(JSON_TEST_SUITE_DIR): + mkdir -p $(VENDOR) + git clone --depth 1 $(JSON_TEST_SUITE_URL) $(JSON_TEST_SUITE_DIR) + +$(SUITE): $(SUITE_SRC) $(LIB) + mkdir -p $(BUILD) + $(RUSTC) --edition 2024 --test --crate-name suite \ + --extern jackson=$(LIB) -L $(BUILD) -o $@ $(SUITE_SRC) + +suite: $(SUITE) + $(SUITE) + +bench-fetch: $(FIXTURE) + +$(FIXTURE): + mkdir -p $(VENDOR) + $(CURL) -sL $(FIXTURE_URL) -o $(FIXTURE) + +$(TINYJSON_LIB): $(TINYJSON_SRC) + mkdir -p $(BUILD) + $(RUSTC) --edition 2018 --crate-type rlib --crate-name tinyjson \ + $(RUSTFLAGS) -o $@ $< + +$(BENCH_JACKSON): $(BENCH_SRC) $(LIB) + mkdir -p $(BUILD) + $(RUSTC) --edition 2024 --crate-name bench \ + --cfg 'lib="jackson"' --extern jackson=$(LIB) -L $(BUILD) \ + $(RUSTFLAGS) -o $@ $< + +$(BENCH_TINYJSON): $(BENCH_SRC) $(TINYJSON_LIB) + mkdir -p $(BUILD) + $(RUSTC) --edition 2024 --crate-name bench \ + --cfg 'lib="tinyjson"' --extern tinyjson=$(TINYJSON_LIB) \ + -L $(BUILD) $(RUSTFLAGS) -o $@ $< + +bench: $(BENCH_JACKSON) $(BENCH_TINYJSON) $(FIXTURE) + @printf "%-10s %s bytes\n" jackson "$$(wc -c < $(BENCH_JACKSON))" + @printf "%-10s %s bytes\n" tinyjson "$$(wc -c < $(BENCH_TINYJSON))" + @echo + $(BENCH_JACKSON) $(FIXTURE) $(BENCH_ITERS) + $(BENCH_TINYJSON) $(FIXTURE) $(BENCH_ITERS) + +clean: + rm -rf $(BUILD) blob - /dev/null blob + eaf8ded6fa91a0360653f1f522c6156fda2db73a (mode 644) --- /dev/null +++ README.md @@ -0,0 +1,52 @@ +jackson - parse and generate JSON documents +============================================ +jackson is a minimal, zero-dependency, single-file JSON parser and +generator written in Rust. It is meant to be dropped into another +project's source tree, or linked as a plain rlib. + + +Requirements +------------ +In order to build jackson you need rustc (edition 2024). + + +Installation +------------ +There are two ways to use jackson in another Rust project. + +Drop-in source. Copy jackson.rs into your project and declare it as +a module: + + mod jackson; + use jackson::Value; + +Linked rlib. Build the library with make(1) and pass it to rustc: + + $ make + $ rustc --extern jackson=build/libjackson.rlib -L build main.rs + + +Example +------- +Parse a JSON document: + + use jackson::Value; + + let v: Value = r#"{"name":"jackson"}"#.parse()?; + +Generate a JSON document: + + println!("{}", v.stringify()?); + + +Download +-------- + got clone ssh://anon@ijanc.org/jackson + git clone https://git.ijanc.org/jackson.git + git clone https://git.sr.ht/~ijanc/jackson + git clone https://github.com/jackson.git + + +License +------- +ISC - see LICENSE. blob - /dev/null blob + 431e09884520e79461d49b26f8d64137dcc5d228 (mode 644) --- /dev/null +++ bench/bench.rs @@ -0,0 +1,67 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// Throughput benchmark. Parses a fixture N times, then stringifies +// the parsed value N times, and reports MB/s. Built once per library +// via --cfg lib="". + +use std::env; +use std::fs; +use std::hint::black_box; +use std::time::Instant; + +#[cfg(lib = "jackson")] +use jackson::Value; + +#[cfg(lib = "tinyjson")] +use tinyjson::JsonValue as Value; + +#[cfg(lib = "jackson")] +const LIB: &str = "jackson"; + +#[cfg(lib = "tinyjson")] +const LIB: &str = "tinyjson"; + +fn main() { + let args: Vec = env::args().collect(); + let path = args + .get(1) + .map(String::as_str) + .unwrap_or("vendor/fixture.json"); + let iters: usize = + args.get(2).and_then(|s| s.parse().ok()).unwrap_or(50); + + let input = fs::read_to_string(path).expect("read fixture"); + let bytes = input.len(); + + // Warm up and keep one value for the stringify loop. + let v: Value = input.parse().expect("parse fixture"); + let _ = v.stringify().expect("stringify fixture"); + + let t0 = Instant::now(); + for _ in 0..iters { + let _ = black_box(input.parse::().unwrap()); + } + let parse_dur = t0.elapsed(); + + let t0 = Instant::now(); + for _ in 0..iters { + let _ = black_box(v.stringify().unwrap()); + } + let stringify_dur = t0.elapsed(); + + let total = (bytes * iters) as f64; + let parse_mb = total / parse_dur.as_secs_f64() / 1_000_000.0; + let str_mb = total / stringify_dur.as_secs_f64() / 1_000_000.0; + + println!( + "{:10} parse {:6.1} MB/s stringify {:6.1} MB/s \ + ({} iters, {} bytes)", + LIB, parse_mb, str_mb, iters, bytes + ); +} blob - /dev/null blob + c57bddd73ec75cfe62db167973aa379b968ae268 (mode 644) --- /dev/null +++ jackson.rs @@ -0,0 +1,822 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +//! Parse and generate JSON documents. + +use std::fmt; +use std::fmt::Write as _; +use std::str::FromStr; + +/// A JSON value. +pub enum Value { + Null, + Bool(bool), + Number(f64), + String(String), + Array(Vec), + Object(Vec<(String, Value)>), +} + +/// A parse or serialise error. +pub struct Error { + msg: &'static str, + pos: usize, +} + +impl Error { + const fn new(msg: &'static str, pos: usize) -> Self { + Self { msg, pos } + } + + /// Static error message. + pub const fn message(&self) -> &'static str { + self.msg + } + + /// Byte offset into the input where the error was detected. + pub const fn position(&self) -> usize { + self.pos + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} at byte {}", self.msg, self.pos) + } +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl std::error::Error for Error {} + +impl FromStr for Value { + type Err = Error; + + fn from_str(s: &str) -> Result { + Parser::new(s).parse_root() + } +} + +impl Value { + /// Serialise this value as a JSON document. + pub fn stringify(&self) -> Result { + let mut out = String::new(); + write_value(self, &mut out)?; + Ok(out) + } + + pub fn as_bool(&self) -> Option { + match self { + Value::Bool(b) => Some(*b), + _ => None, + } + } + + pub fn as_number(&self) -> Option { + match self { + Value::Number(n) => Some(*n), + _ => None, + } + } + + pub fn as_str(&self) -> Option<&str> { + match self { + Value::String(s) => Some(s.as_str()), + _ => None, + } + } + + pub fn as_array(&self) -> Option<&[Value]> { + match self { + Value::Array(a) => Some(a.as_slice()), + _ => None, + } + } + + pub fn as_object(&self) -> Option<&[(String, Value)]> { + match self { + Value::Object(o) => Some(o.as_slice()), + _ => None, + } + } +} + +impl From for Value { + fn from(b: bool) -> Self { + Value::Bool(b) + } +} + +impl From for Value { + fn from(n: f64) -> Self { + Value::Number(n) + } +} + +impl From<&str> for Value { + fn from(s: &str) -> Self { + Value::String(s.to_string()) + } +} + +impl From for Value { + fn from(s: String) -> Self { + Value::String(s) + } +} + +fn write_value(v: &Value, out: &mut String) -> Result<(), Error> { + match v { + Value::Null => out.push_str("null"), + Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }), + Value::Number(n) => { + if !n.is_finite() { + return Err(Error::new("non-finite number", 0)); + } + if *n != 0.0 && n.fract() == 0.0 && n.abs() < (1_i64 << 53) as f64 { + write!(out, "{}", *n as i64).unwrap(); + } else { + write!(out, "{n}").unwrap(); + } + } + Value::String(s) => write_string(s, out), + Value::Array(a) => { + out.push('['); + for (i, item) in a.iter().enumerate() { + if i > 0 { + out.push(','); + } + write_value(item, out)?; + } + out.push(']'); + } + Value::Object(o) => { + out.push('{'); + for (i, (k, item)) in o.iter().enumerate() { + if i > 0 { + out.push(','); + } + write_string(k, out); + out.push(':'); + write_value(item, out)?; + } + out.push('}'); + } + } + Ok(()) +} + +fn write_string(s: &str, out: &mut String) { + out.push('"'); + let bytes = s.as_bytes(); + let mut run_start = 0; + for (i, &b) in bytes.iter().enumerate() { + let esc: &str = match b { + b'"' => "\\\"", + b'\\' => "\\\\", + b'\n' => "\\n", + b'\r' => "\\r", + b'\t' => "\\t", + 0x08 => "\\b", + 0x0C => "\\f", + 0..=0x1F => { + out.push_str(&s[run_start..i]); + write!(out, "\\u{:04x}", b).unwrap(); + run_start = i + 1; + continue; + } + _ => continue, + }; + out.push_str(&s[run_start..i]); + out.push_str(esc); + run_start = i + 1; + } + out.push_str(&s[run_start..]); + out.push('"'); +} + +const MAX_DEPTH: usize = 128; + +struct Parser<'a> { + src: &'a [u8], + pos: usize, + depth: usize, +} + +impl<'a> Parser<'a> { + fn new(s: &'a str) -> Self { + Self { + src: s.as_bytes(), + pos: 0, + depth: 0, + } + } + + fn err(&self, msg: &'static str) -> Error { + Error::new(msg, self.pos) + } + + fn peek(&self) -> Option { + self.src.get(self.pos).copied() + } + + fn bump(&mut self) -> Option { + let b = self.peek()?; + self.pos += 1; + Some(b) + } + + fn skip_ws(&mut self) { + while let Some(b) = self.peek() { + if matches!(b, b' ' | b'\t' | b'\n' | b'\r') { + self.pos += 1; + } else { + break; + } + } + } + + fn expect(&mut self, b: u8, msg: &'static str) -> Result<(), Error> { + if self.peek() == Some(b) { + self.pos += 1; + Ok(()) + } else { + Err(self.err(msg)) + } + } + + fn expect_keyword(&mut self, kw: &[u8]) -> Result<(), Error> { + let end = self.pos + kw.len(); + if end > self.src.len() || &self.src[self.pos..end] != kw { + return Err(self.err("expected keyword")); + } + self.pos = end; + Ok(()) + } + + fn enter(&mut self) -> Result<(), Error> { + if self.depth >= MAX_DEPTH { + return Err(self.err("max nesting depth exceeded")); + } + self.depth += 1; + Ok(()) + } + + fn parse_root(&mut self) -> Result { + let v = self.parse_value()?; + self.skip_ws(); + if self.pos < self.src.len() { + return Err(self.err("trailing garbage")); + } + Ok(v) + } + + fn parse_value(&mut self) -> Result { + self.skip_ws(); + let b = self + .peek() + .ok_or_else(|| self.err("unexpected end of input"))?; + match b { + b'n' => { + self.expect_keyword(b"null")?; + Ok(Value::Null) + } + b't' => { + self.expect_keyword(b"true")?; + Ok(Value::Bool(true)) + } + b'f' => { + self.expect_keyword(b"false")?; + Ok(Value::Bool(false)) + } + b'"' => Ok(Value::String(self.parse_string()?)), + b'[' => self.parse_array(), + b'{' => self.parse_object(), + b'-' | b'0'..=b'9' => Ok(Value::Number(self.parse_number()?)), + _ => Err(self.err("unexpected character")), + } + } + + fn parse_string(&mut self) -> Result { + self.pos += 1; + let mut out = String::new(); + loop { + let run_start = self.pos; + while let Some(&b) = self.src.get(self.pos) { + if matches!(b, b'"' | b'\\') || b < 0x20 { + break; + } + self.pos += 1; + } + let run = &self.src[run_start..self.pos]; + // SAFETY: src is the byte view of a &str input; the scan + // only breaks on ASCII bytes ("", \\, < 0x20), so + // run_start..self.pos is always a valid UTF-8 substring. + let run_str = unsafe { std::str::from_utf8_unchecked(run) }; + out.push_str(run_str); + match self.peek() { + None => return Err(self.err("unterminated string")), + Some(b'"') => { + self.pos += 1; + return Ok(out); + } + Some(b'\\') => { + self.pos += 1; + let esc = + self.bump().ok_or_else(|| self.err("bad escape"))?; + match esc { + b'"' => out.push('"'), + b'\\' => out.push('\\'), + b'/' => out.push('/'), + b'b' => out.push('\u{08}'), + b'f' => out.push('\u{0C}'), + b'n' => out.push('\n'), + b'r' => out.push('\r'), + b't' => out.push('\t'), + b'u' => out.push(self.parse_u_escape()?), + _ => return Err(self.err("invalid escape")), + } + } + Some(_) => { + return Err(self.err("control character in string")); + } + } + } + } + + fn parse_u_escape(&mut self) -> Result { + let hi = self.parse_hex4()?; + if (0xD800..=0xDBFF).contains(&hi) { + if self.bump() != Some(b'\\') || self.bump() != Some(b'u') { + return Err(self.err("expected low surrogate")); + } + let lo = self.parse_hex4()?; + if !(0xDC00..=0xDFFF).contains(&lo) { + return Err(self.err("invalid low surrogate")); + } + let code = 0x10000 + ((hi - 0xD800) << 10) + (lo - 0xDC00); + char::from_u32(code).ok_or_else(|| self.err("invalid codepoint")) + } else { + char::from_u32(hi).ok_or_else(|| self.err("invalid codepoint")) + } + } + + fn parse_hex4(&mut self) -> Result { + let mut v: u32 = 0; + for _ in 0..4 { + let b = + self.bump().ok_or_else(|| self.err("bad unicode escape"))?; + let d = match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' => b - b'a' + 10, + b'A'..=b'F' => b - b'A' + 10, + _ => return Err(self.err("bad hex digit")), + }; + v = v * 16 + d as u32; + } + Ok(v) + } + + fn parse_number(&mut self) -> Result { + let start = self.pos; + if self.peek() == Some(b'-') { + self.pos += 1; + } + match self.peek() { + Some(b'0') => self.pos += 1, + Some(b'1'..=b'9') => { + self.pos += 1; + while matches!(self.peek(), Some(b'0'..=b'9')) { + self.pos += 1; + } + } + _ => return Err(self.err("expected digit")), + } + if self.peek() == Some(b'.') { + self.pos += 1; + if !matches!(self.peek(), Some(b'0'..=b'9')) { + return Err(self.err("expected digit after decimal point")); + } + while matches!(self.peek(), Some(b'0'..=b'9')) { + self.pos += 1; + } + } + if matches!(self.peek(), Some(b'e' | b'E')) { + self.pos += 1; + if matches!(self.peek(), Some(b'+' | b'-')) { + self.pos += 1; + } + if !matches!(self.peek(), Some(b'0'..=b'9')) { + return Err(self.err("expected digit in exponent")); + } + while matches!(self.peek(), Some(b'0'..=b'9')) { + self.pos += 1; + } + } + let slice = &self.src[start..self.pos]; + let s = std::str::from_utf8(slice).unwrap(); + s.parse::() + .map_err(|_| Error::new("invalid number", start)) + } + + fn parse_array(&mut self) -> Result { + self.pos += 1; + self.enter()?; + let mut items = Vec::new(); + self.skip_ws(); + if self.peek() == Some(b']') { + self.pos += 1; + self.depth -= 1; + return Ok(Value::Array(items)); + } + loop { + items.push(self.parse_value()?); + self.skip_ws(); + match self.peek() { + Some(b',') => self.pos += 1, + Some(b']') => { + self.pos += 1; + self.depth -= 1; + return Ok(Value::Array(items)); + } + _ => return Err(self.err("expected ',' or ']'")), + } + } + } + + fn parse_object(&mut self) -> Result { + self.pos += 1; + self.enter()?; + let mut items = Vec::new(); + self.skip_ws(); + if self.peek() == Some(b'}') { + self.pos += 1; + self.depth -= 1; + return Ok(Value::Object(items)); + } + loop { + if self.peek() != Some(b'"') { + return Err(self.err("expected string key")); + } + let key = self.parse_string()?; + self.skip_ws(); + self.expect(b':', "expected ':'")?; + let v = self.parse_value()?; + items.push((key, v)); + self.skip_ws(); + match self.peek() { + Some(b',') => { + self.pos += 1; + self.skip_ws(); + } + Some(b'}') => { + self.pos += 1; + self.depth -= 1; + return Ok(Value::Object(items)); + } + _ => return Err(self.err("expected ',' or '}'")), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn value_accessors() { + assert_eq!(Value::Bool(true).as_bool(), Some(true)); + assert_eq!(Value::Null.as_bool(), None); + assert_eq!(Value::Number(1.5).as_number(), Some(1.5)); + assert_eq!(Value::Null.as_number(), None); + assert_eq!(Value::String("hi".into()).as_str(), Some("hi")); + assert_eq!(Value::Null.as_str(), None); + assert!(Value::Array(Vec::new()).as_array().is_some()); + assert!(Value::Null.as_array().is_none()); + assert!(Value::Object(Vec::new()).as_object().is_some()); + assert!(Value::Null.as_object().is_none()); + } + + #[test] + fn value_from_impls() { + assert!(matches!(Value::from(true), Value::Bool(true))); + assert!(matches!(Value::from(1.5), Value::Number(n) if n == 1.5)); + match Value::from("hello") { + Value::String(s) => assert_eq!(s, "hello"), + _ => panic!(), + } + match Value::from(String::from("world")) { + Value::String(s) => assert_eq!(s, "world"), + _ => panic!(), + } + } + + #[test] + fn error_accessors() { + let e = Error::new("boom", 42); + assert_eq!(e.message(), "boom"); + assert_eq!(e.position(), 42); + } + + #[test] + fn value_variants_construct() { + let _ = Value::Null; + let _ = Value::Bool(true); + let _ = Value::Number(0.0); + let _ = Value::String(String::new()); + let _ = Value::Array(Vec::new()); + let _ = Value::Object(Vec::new()); + } + + fn parse(s: &str) -> Value { + s.parse::().unwrap() + } + + fn parse_err(s: &str) { + assert!(s.parse::().is_err(), "expected error for {s:?}"); + } + + #[test] + fn parse_primitives() { + assert!(matches!(parse("null"), Value::Null)); + assert!(matches!(parse("true"), Value::Bool(true))); + assert!(matches!(parse("false"), Value::Bool(false))); + assert!(matches!(parse(" null "), Value::Null)); + } + + #[test] + fn parse_numbers() { + let cases: &[(&str, f64)] = &[ + ("0", 0.0), + ("-0", 0.0), + ("42", 42.0), + ("-42", -42.0), + ("3.14", 3.14), + ("-2.5", -2.5), + ("1e10", 1e10), + ("1E10", 1e10), + ("1.5e2", 150.0), + ("1.5e+2", 150.0), + ("2.5e-1", 0.25), + ]; + for (s, v) in cases { + match parse(s) { + Value::Number(n) => assert_eq!(n, *v, "{s}"), + _ => panic!("not a number: {s}"), + } + } + } + + #[test] + fn parse_number_errors() { + parse_err("01"); + parse_err("+1"); + parse_err(".5"); + parse_err("1."); + parse_err("1e"); + parse_err("1e+"); + parse_err("-"); + } + + #[test] + fn parse_strings() { + match parse(r#""hello""#) { + Value::String(s) => assert_eq!(s, "hello"), + _ => panic!(), + } + match parse(r#""a\"b\\c\/d""#) { + Value::String(s) => assert_eq!(s, "a\"b\\c/d"), + _ => panic!(), + } + match parse(r#""\n\r\t\b\f""#) { + Value::String(s) => assert_eq!(s, "\n\r\t\u{08}\u{0C}"), + _ => panic!(), + } + match parse(r#""\u0041""#) { + Value::String(s) => assert_eq!(s, "A"), + _ => panic!(), + } + match parse(r#""\uD834\uDD1E""#) { + Value::String(s) => assert_eq!(s, "\u{1D11E}"), + _ => panic!(), + } + match parse("\"é\"") { + Value::String(s) => assert_eq!(s, "é"), + _ => panic!(), + } + } + + #[test] + fn parse_string_errors() { + parse_err(r#""unterminated"#); + parse_err("\"embedded\nnewline\""); + parse_err(r#""\x""#); + parse_err(r#""\uD800""#); + parse_err(r#""\uDC00""#); + parse_err(r#""\uD800\u0041""#); + } + + #[test] + fn parse_arrays() { + assert!(matches!(parse("[]"), Value::Array(a) if a.is_empty())); + match parse("[1, 2, 3]") { + Value::Array(a) => { + assert_eq!(a.len(), 3); + assert!(matches!(a[0], Value::Number(n) if n == 1.0)); + } + _ => panic!(), + } + match parse("[[1], [2, 3]]") { + Value::Array(a) => assert_eq!(a.len(), 2), + _ => panic!(), + } + } + + #[test] + fn parse_array_errors() { + parse_err("["); + parse_err("[1,"); + parse_err("[1 2]"); + parse_err("[,]"); + } + + #[test] + fn parse_objects() { + assert!(matches!(parse("{}"), Value::Object(m) if m.is_empty())); + match parse(r#"{"a": 1, "b": true}"#) { + Value::Object(m) => { + assert_eq!(m.len(), 2); + assert_eq!(m[0].0, "a"); + assert!(matches!(m[0].1, Value::Number(n) if n == 1.0)); + assert_eq!(m[1].0, "b"); + assert!(matches!(m[1].1, Value::Bool(true))); + } + _ => panic!(), + } + } + + #[test] + fn parse_object_errors() { + parse_err("{"); + parse_err(r#"{"a""#); + parse_err(r#"{"a":}"#); + parse_err(r#"{"a":1"#); + parse_err(r#"{a:1}"#); + parse_err(r#"{"a":1,}"#); + } + + #[test] + fn parse_duplicate_keys_kept() { + match parse(r#"{"a":1,"a":2}"#) { + Value::Object(m) => { + assert_eq!(m.len(), 2); + assert_eq!(m[0].0, "a"); + assert!(matches!(m[0].1, Value::Number(n) if n == 1.0)); + assert_eq!(m[1].0, "a"); + assert!(matches!(m[1].1, Value::Number(n) if n == 2.0)); + } + _ => panic!(), + } + } + + #[test] + fn parse_trailing_garbage_fails() { + parse_err("null null"); + parse_err("1 2"); + parse_err("[] x"); + } + + #[test] + fn parse_empty_fails() { + parse_err(""); + parse_err(" "); + } + + #[test] + fn round_trip() { + let cases = &[ + "null", + "true", + "false", + "0", + "-1.5", + r#""hello""#, + "[]", + "[1,2,3]", + "{}", + r#"{"a":1,"b":[true,null]}"#, + ]; + for s in cases { + let v: Value = s.parse().unwrap(); + assert_eq!(&v.stringify().unwrap(), s, "round trip {s}"); + } + } + + #[test] + fn parse_rejects_deep_nesting() { + let s: String = "[".repeat(200); + parse_err(&s); + } + + #[test] + fn stringify_primitives() { + assert_eq!(Value::Null.stringify().unwrap(), "null"); + assert_eq!(Value::Bool(true).stringify().unwrap(), "true"); + assert_eq!(Value::Bool(false).stringify().unwrap(), "false"); + assert_eq!(Value::Number(0.0).stringify().unwrap(), "0"); + assert_eq!(Value::Number(-42.5).stringify().unwrap(), "-42.5"); + } + + #[test] + fn stringify_integer_values() { + assert_eq!(Value::Number(42.0).stringify().unwrap(), "42"); + assert_eq!(Value::Number(-1000.0).stringify().unwrap(), "-1000"); + assert_eq!(Value::Number(1.5).stringify().unwrap(), "1.5"); + // Beyond 2^53 falls back to the f64 formatter. + let big = (1_i64 << 54) as f64; + let s = Value::Number(big).stringify().unwrap(); + assert_eq!(s.parse::().unwrap(), big); + } + + #[test] + fn stringify_non_finite_errors() { + assert!(Value::Number(f64::NAN).stringify().is_err()); + assert!(Value::Number(f64::INFINITY).stringify().is_err()); + assert!(Value::Number(f64::NEG_INFINITY).stringify().is_err()); + } + + #[test] + fn stringify_string_escapes() { + assert_eq!(Value::String("hi".into()).stringify().unwrap(), "\"hi\""); + assert_eq!( + Value::String("a\"b\\c".into()).stringify().unwrap(), + "\"a\\\"b\\\\c\"" + ); + assert_eq!( + Value::String("\n\r\t\u{08}\u{0C}".into()) + .stringify() + .unwrap(), + "\"\\n\\r\\t\\b\\f\"" + ); + assert_eq!( + Value::String("\x01\x1f".into()).stringify().unwrap(), + "\"\\u0001\\u001f\"" + ); + assert_eq!(Value::String("é".into()).stringify().unwrap(), "\"é\""); + } + + #[test] + fn stringify_array() { + assert_eq!(Value::Array(Vec::new()).stringify().unwrap(), "[]"); + let a = Value::Array(vec![ + Value::Number(1.0), + Value::Null, + Value::Bool(true), + ]); + assert_eq!(a.stringify().unwrap(), "[1,null,true]"); + } + + #[test] + fn stringify_object_preserves_insertion_order() { + let o = vec![ + ("b".into(), Value::Number(2.0)), + ("a".into(), Value::Number(1.0)), + ]; + assert_eq!(Value::Object(o).stringify().unwrap(), r#"{"b":2,"a":1}"#); + } + + #[test] + fn stringify_nested() { + let inner = vec![("x".into(), Value::Array(vec![Value::Number(3.0)]))]; + let outer = vec![("obj".into(), Value::Object(inner))]; + assert_eq!( + Value::Object(outer).stringify().unwrap(), + r#"{"obj":{"x":[3]}}"# + ); + } + + #[test] + fn stringify_array_propagates_error() { + let a = Value::Array(vec![Value::Number(f64::NAN)]); + assert!(a.stringify().is_err()); + } +} blob - /dev/null blob + d858ce8ce60f256d6c52e59467feb71678cdfc44 (mode 644) --- /dev/null +++ tests/suite.rs @@ -0,0 +1,105 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// Conformance runner for the external JSON test suites. + +use std::fs; +use std::path::Path; + +fn try_parse(data: &[u8]) -> bool { + match std::str::from_utf8(data) { + Ok(s) => s.parse::().is_ok(), + Err(_) => false, + } +} + +// Some(true) = parser must accept, Some(false) = parser must reject, +// None = implementation-defined / skip. +fn expectation(name: &str) -> Option { + if name.starts_with("y_") { + Some(true) + } else if name.starts_with("n_") { + Some(false) + } else if name.starts_with("i_") { + None + } else if name.starts_with("pass") { + Some(true) + } else if name.starts_with("fail") { + // fail1.json (root-level string) and fail18.json (deep nesting) + // were written against an older RFC and are valid per RFC 8259. + if matches!(name, "fail1.json" | "fail18.json") { + Some(true) + } else { + Some(false) + } + } else { + None + } +} + +fn run_suite(dir: &str) -> (usize, usize, Vec) { + let mut total = 0; + let mut ok = 0; + let mut failures = Vec::new(); + let Ok(entries) = fs::read_dir(Path::new(dir)) else { + return (0, 0, Vec::new()); + }; + let mut paths: Vec<_> = + entries.filter_map(|e| e.ok().map(|e| e.path())).collect(); + paths.sort(); + for path in paths { + if path.extension().and_then(|x| x.to_str()) != Some("json") { + continue; + } + let name = path.file_name().unwrap().to_string_lossy().into_owned(); + let Some(expect) = expectation(&name) else { continue }; + total += 1; + let data = fs::read(&path).unwrap_or_default(); + let accepted = try_parse(&data); + if accepted == expect { + ok += 1; + } else { + failures.push(format!( + "{}: expected {}, got {}", + name, + if expect { "accept" } else { "reject" }, + if accepted { "accept" } else { "reject" }, + )); + } + } + (total, ok, failures) +} + +#[test] +fn json_checker() { + let (total, ok, failures) = run_suite("vendor/JSON_checker"); + if total == 0 { + eprintln!("json_checker: no files (run `make suite-fetch`)"); + return; + } + eprintln!("json_checker: {ok}/{total} passed"); + for f in &failures { + eprintln!(" {f}"); + } + assert_eq!(ok, total); +} + +#[test] +fn json_test_suite() { + let (total, ok, failures) = + run_suite("vendor/JSONTestSuite/test_parsing"); + if total == 0 { + eprintln!("json_test_suite: no files (run `make suite-fetch`)"); + return; + } + eprintln!("json_test_suite: {ok}/{total} passed"); + for f in &failures { + eprintln!(" {f}"); + } + assert_eq!(ok, total); +}