std/data/toml

Standard Library source code

TOML encoding and decoding for ZuzuScript.

Module

Name
std/data/toml
Area
Standard Library
Source
modules/std/data/toml.zzm
=encoding utf8

=head1 NAME

std/data/toml - TOML encoding and decoding for ZuzuScript.

=head1 SYNOPSIS

  from std/data/toml import TOML;

  let codec := new TOML( pretty: true, canonical: true );
  let text := codec.encode({ answer: 42, name: "Zuzu" });
  let data := codec.decode(text);

=head1 IMPLEMENTATION SUPPORT

This module is supported by zuzu.pl, zuzu-rust, and zuzu-js on Node and
Electron. It is partially supported by zuzu-js in the browser: in-memory
TOML encode/decode coverage passes, but file-backed load/dump coverage is
unsupported because browser filesystem capability is unavailable.

=head1 DESCRIPTION

This module provides a pure-Zuzu implementation of TOML parsing and
serialization, with a user-facing API modelled on C<std/data/json>.

=head1 EXPORTS

=head2 Classes

=over

=item C<< TOML({ utf8?: Bool, pretty?: Bool, canonical?: Bool }) >>

Constructs a TOML codec. Returns: C<TOML>.

=item C<< codec.encode(value) >>

Parameters: C<value> is a C<Dict> or compatible mapping. Returns:
C<String>. Encodes C<value> as TOML text.

=item C<< codec.encode_binarystring(value) >>

Parameters: C<value> is a C<Dict> or compatible mapping. Returns:
C<BinaryString>. Encodes C<value> as UTF-8 TOML bytes.

=item C<< codec.decode(String text) >>

Parameters: C<text> is TOML text. Returns: C<Dict>. Decodes TOML text
into a dictionary.

=item C<< codec.decode_binarystring(BinaryString bytes) >>

Parameters: C<bytes> is UTF-8 TOML bytes. Returns: C<Dict>. Decodes TOML
bytes into a dictionary.

=item C<< codec.load(Path path) >>

Parameters: C<path> is a C<std/io> C<Path>. Returns: C<Dict>. Reads TOML
text from C<path> and decodes it.

=item C<< codec.dump(Path path, value) >>

Parameters: C<path> is a C<std/io> C<Path> and C<value> is a C<Dict> or
compatible mapping. Returns: C<null>. Encodes C<value> and writes TOML
text to C<path>.

=back

=head1 COPYRIGHT AND LICENCE

B<< std/data/toml >> is copyright Toby Inkster.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut

from std/string import substr, index;


function _is_space ( String ch ) {
	return ch ≡ " " or ch ≡ "\t" or ch ≡ "\r" or ch ≡ "\n";
}

function _trim ( String text ) {
	let start := 0;
	let stop := length text;
	while ( start < stop and _is_space( substr( text, start, 1 ) ) ) {
		start++;
	}
	while ( stop > start and _is_space( substr( text, stop - 1, 1 ) ) ) {
		stop--;
	}

	return substr( text, start, stop - start );
}

function _strip_comment ( String line ) {
	let i := 0;
	let n := length line;
	let in_string := false;
	let escaped := false;

	while ( i < n ) {
		let ch := substr( line, i, 1 );

		if (in_string) {
			if (escaped) {
				escaped := false;
			}
			else if ( ch ≡ "\\" ) {
				escaped := true;
			}
			else if ( ch ≡ "\"" ) {
				in_string := false;
			}
		}

		else {
			if ( ch ≡ "\"" ) {
				in_string := true;
			}
			else if ( ch ≡ "#" ) {
				return substr( line, 0, i );
			}
		}

		i++;
	}

	return line;
}

function _split_dotted_key ( String raw_key ) {
	let out := [];
	let i := 0;
	let n := length raw_key;
	let current := "";
	let in_quote := false;
	let escaped := false;

	while ( i < n ) {
		let ch := substr( raw_key, i, 1 );

		if (in_quote) {
			if (escaped) {
				current _= ch;
				escaped := false;
			}
			else if ( ch ≡ "\\" ) {
				escaped := true;
			}
			else if ( ch ≡ "\"" ) {
				in_quote := false;
			}
			else {
				current _= ch;
			}
		}

		else {
			if ( ch ≡ "\"" ) {
				in_quote := true;
			}

			else if ( ch ≡".") {
				let k := _trim(current);
				die "Invalid empty key path component" if k ≡ "";
				out.push(k);
				current := "";
			}

			else {
				current _= ch;
			}
		}

		i++;
	}

	let k := _trim(current);
	die "Invalid empty key path component" if k ≡ "";
	out.push(k);

	return out;
}

function _parse_string_value ( String text, Number pos ) {
	let i := pos + 1;
	let n := length text;
	let out := "";

	while ( i < n ) {
		let ch := substr( text, i, 1 );

		if ( ch ≡ "\\" ) {
			i++;
			die "Unterminated escape sequence" if i >= n;
			let esc := substr( text, i, 1 );
			if ( esc ≡ "n" ) {
				out _= "\n";
				i++;
				continue;
			}
			if ( esc ≡ "r" ) {
				out _= "\r";
				i++;
				continue;
			}
			if ( esc ≡ "t" ) {
				out _= "\t";
				i++;
				continue;
			}
			if ( esc ≡ "\"" ) {
				out _= "\"";
				i++;
				continue;
			}
			if ( esc ≡ "\\" ) {
				out _= "\\";
				i++;
				continue;
			}
			die `Unsupported escape: ${esc}`;
		}

			if ( ch ≡ "\"" ) {
				let next_pos := i + 1;
				return [ out, next_pos ];
		}
		out _= ch;
		i++;
	}

	die "Unterminated string value";
}

function _skip_inline_ws ( String text, Number pos ) {
	let i := pos;
	let n := length text;
	while ( i < n and _is_space( substr( text, i, 1 ) ) ) {
		i++;
	}

	return i;
}

function _parse_value ( String text, Number pos ) {
	let i := _skip_inline_ws( text, pos );
	let n := length text;
	die "Missing value" if i >= n;
	let ch := substr( text, i, 1 );
	if ( ch ≡ "\"" ) {
		return _parse_string_value( text, i );
	}
	if ( ch ≡ "[" ) {
		let arr := [];
		i++;

		while (true) {
			i := _skip_inline_ws( text, i );
			die "Unterminated array" if i >= n;
			if ( substr( text, i, 1 ) ≡ "]" ) {
				let next_pos := i + 1;
				return [ arr, next_pos ];
			}
			let parsed := _parse_value( text, i );

		arr.push( parsed[0] );
		i := _skip_inline_ws( text, parsed[1] );
		die "Unterminated array" if i >= n;
		let sep := substr( text, i, 1 );
		if ( sep ≡"," ) {
			i++;
			}
			else if ( sep ≡ "]" ) {
				let next_pos := i + 1;
				return [ arr, next_pos ];
			} else {
				die "Expected ',' or ']' in array";
			}

		}
	}
	if ( ch ≡ "{" ) {
		let d := {};
		i++;

		while (true) {
			i := _skip_inline_ws( text, i );
			die "Unterminated inline table" if i >= n;
			if ( substr( text, i, 1 ) ≡ "}" ) {
				let next_pos := i + 1;
				return [ d, next_pos ];
			}
			let key_start := i;

		let in_q := false;
		let found_eq := false;

		while ( i < n and not found_eq ) {
			let kc := substr( text, i, 1 );
			if ( kc ≡ "\"" ) {
				in_q := not in_q;
			}
			if ( not in_q and kc ≡ "=" ) {
				found_eq := true;
			}
			else {
				i++;
			}
		}

		die "Invalid inline table item" if i >= n;
		let key_raw := _trim( substr( text, key_start, i - key_start ) );
		let key := key_raw;
		if ( key ~ /^".*"$/ ) {
			key := substr( key, 1, length key - 2 );
		}
		i++;
		let pv := _parse_value( text, i );
		d.set( key, pv[0] );
		i := _skip_inline_ws( text, pv[1] );
		die "Unterminated inline table" if i >= n;
		let sep := substr( text, i, 1 );
		if ( sep ≡"," ) {
			i++;
			}
			else if ( sep ≡ "}" ) {
				let next_pos := i + 1;
				return [ d, next_pos ];
			} else {
				die "Expected ',' or '}' in inline table";
			}
		}
	}
	let start := i;
	let done := false;
	while ( i < n and not done ) {
		let c := substr( text, i, 1 );
		if ( _is_space(c) or c ≡"," or c ≡ "]" or c ≡ "}" ) {
			done := true;
		}
		else {
			i++;
		}
	}
	let token := substr( text, start, i - start );
	if ( token ≡ "true" ) {
		return [ true, i ];
	}
	if ( token ≡ "false" ) {
		return [ false, i ];
	}
	if ( token ~ /^[+-]?[0-9]+$/ ) {
		let num := 0 + token;
		return [ num, i ];
	}
	if ( token ~ /^[+-]?[0-9]+\.[0-9]+$/ ) {
		let num := 0 + token;
		return [ num, i ];
	}
	die `Unsupported TOML token '${token}'`;
}

function _ensure_table_path ( Dict root, Array path ) {
	let cursor := root;
	let i := 0;

	while ( i < path.length() ) {
		let key := path[i];
		if ( not cursor.exists(key) ) {
			cursor.set( key, {} );
		}
		else if ( not( cursor.get(key) instanceof Dict ) ) {
			die `TOML path '${key}' conflicts with non-table value`;
		}
		cursor := cursor.get(key);
		i++;
	}

	return cursor;
}

function _set_deep_dict_value ( Dict table, Array path, Number at, value ) {
	let out := table;
	let key := path[at];
	if ( at + 1 >= path.length() ) {
		out.set( key, value );
		return out;
	}

	let child := out.exists(key) ? out.get(key): {};
	if ( not( child instanceof Dict ) ) {
		die `TOML path '${key}' conflicts with non-table value`;
	}
	let updated_child := _set_deep_dict_value( child, path, at + 1, value );
	out.set( key, updated_child );
	return out;
}

function _assign_path ( Dict root, Array key_path, value, Array current_path ) {
	let full := [];
	let i := 0;
	while ( i < current_path.length() ) {
		full.push( current_path[i] );
		i++;
	}
	i := 0;
	while ( i < key_path.length() - 1 ) {
		full.push( key_path[i] );
		i++;
	}
	let leaf := key_path[ key_path.length() - 1 ];
	full.push(leaf);
	let updated_root := _set_deep_dict_value( root, full, 0, value );
	return updated_root;
}

function _join_lines ( Array lines ) {
	let out := "";
	let i := 0;

	while ( i < lines.length() ) {
		out _= lines[i];
		if ( i + 1 < lines.length() ) {
			out _= "\n";
		}
		i++;
	}

	return out;
}

function _normalize_for_encoding ( value ) {
	if ( value instanceof Array ) {
		let out := [];
		let i := 0;
		while ( i < value.length() ) {
			out.push( _normalize_for_encoding( value[i] ) );
			i++;
		}
		return out;
	}

	if ( value instanceof Set or value instanceof Bag ) {
		return _normalize_for_encoding( value.sortstr() );
	}

	if ( value instanceof PairList ) {
		let out := {};
		let pairs := value.to_Array();
		let i := 0;
		while ( i < pairs.length() ) {
			let pair := pairs[i]{pair};
			let key := pair[0];
			if ( not out.exists(key) ) {
				out.set( key, _normalize_for_encoding( pair[1] ) );
			}
			i++;
		}
		return out;
	}

	if ( value instanceof Dict ) {
		let out := {};
		let keys := value.sorted_keys();
		let i := 0;
		while ( i < keys.length() ) {
			let key := keys[i];
			out.set( key, _normalize_for_encoding( value.get(key) ) );
			i++;
		}
		return out;
	}

	return value;
}

function _escape_string ( String text ) {
	let out := "";
	let i := 0;
	let n := length text;

	while ( i < n ) {
		let ch := substr( text, i, 1 );
		if ( ch ≡ "\\" ) {
			out _= "\\\\";
			i++;
			continue;
		}
		if ( ch ≡ "\"" ) {
			out _= "\\\"";
			i++;
			continue;
		}
		if ( ch ≡ "\n" ) {
			out _= "\\n";
			i++;
			continue;
		}
		if ( ch ≡ "\r" ) {
			out _= "\\r";
			i++;
			continue;
		}
		if ( ch ≡ "\t" ) {
			out _= "\\t";
			i++;
			continue;
		}
		out _= ch;
		i++;
	}

	return out;
}

function _encode_value ( value, pretty, canonical, Number indent_level ) {
	if ( value instanceof String ) {
		return `"${_escape_string(value)}"`;
	}
	if ( value instanceof Number ) {
		return "" _ value;
	}
	if ( value instanceof Boolean ) {
		return value ? "true": "false";
	}

	if ( value instanceof Array ) {
		let parts := [];
		let i := 0;
		while ( i < value.length() ) {
			parts.push( _encode_value( value[i], pretty, canonical, indent_level + 1 ) );
			i++;
		}
		let sep := pretty ? ", ":",";
		let out := "[";
		i := 0;

		while ( i < parts.length() ) {
			if ( i > 0 ) {
				out _= sep;
			}
			out _= parts[i];
			i++;
		}

		out _= "]";
		return out;
	}

	if ( value instanceof Dict ) {
		let keys := canonical ? value.sorted_keys(): value.keys();
		let out := "{" ;
		let i := 0;

		while ( i < keys.length() ) {
			let k := keys[i];

			if ( i > 0 ) {
				if (pretty) {
					out _= ", ";
				}
				else {
					out _=",";
				}
			}

			let encoded_child := _encode_value( value.get(k), pretty, canonical, indent_level + 1 );
			out _= k;
			out _= " = ";
			out _= encoded_child;
			i++;
		}

		out _= "}";
		return out;
	}

	die `Unsupported TOML type for encoding: ${typeof value}`;
}

function _encode_table ( Dict table, Array path, pretty, canonical, Array out ) {

	if ( path.length() > 0 ) {
		if ( out.length() > 0 and out[ out.length() - 1 ] ≢ "" ) {
			out.push("");
		}
		let title := "";
		let i := 0;

		while ( i < path.length() ) {
			if ( i > 0 ) {
				title _=".";
			}
			title _= path[i];
			i++;
		}

		out.push( `[${title}]` );
	}

	let keys := canonical ? table.sorted_keys(): table.keys();
	let children := [];
	let i := 0;

	while ( i < keys.length() ) {
		let key := keys[i];
		let value := table.get(key);
		if ( value instanceof Dict ) {
			children.push(key);
		}
		else {
			let encoded_value := _encode_value( value, pretty, canonical, 0 );
			out.push( key _ " = " _ encoded_value );
		}
		i++;
	}

	i := 0;

	while ( i < children.length() ) {
		let child_key := children[i];
		let child_path := [];
		let j := 0;
		while ( j < path.length() ) {
			child_path.push( path[j] );
			j++;
		}
		child_path.push(child_key);
		_encode_table( table.get(child_key), child_path, pretty, canonical, out );
		i++;
	}

}

function _parse_document ( String text ) {
	let root := {};
	let current_table_name := null;
	let current_table := {};
	let pos := 0;
	let n := length text;
	let done := false;

	function _flush_current_table () {
		if ( current_table_name ≢ null ) {
			root.set( current_table_name, current_table );
		}
	}

	while ( pos <= n and not done ) {
		let nl := index( text, "\n", pos );
		let end;
		if ( nl < 0 ) {
			end := n;
		}
		else {
			end := nl;
		}
		let raw_line := substr( text, pos, end - pos );
		let line := _trim( _strip_comment(raw_line) );

		if ( line ≢ "" ) {
			if ( substr( line, 0, 1 ) ≡ "[" ) {
				die "Invalid table header" if substr( line, ( length line ) -
					1, 1 ) ≢ "]";
				let inside := _trim( substr( line, 1, ( length line ) - 2 ) );
				_flush_current_table();
				let parts := _split_dotted_key(inside);
				current_table_name := parts[0];
				current_table := {};
			}

			else {
				let eq_pos := index( line, "=" );
				die "Expected key = value" if eq_pos < 0;
				let key_part := _trim( substr( line, 0, eq_pos ) );
				let val_part := _trim( substr( line, eq_pos + 1 ) );
				let key_path := _split_dotted_key(key_part);
				let parsed := _parse_value( val_part, 0 );
				let trailing := _trim( substr( val_part, parsed[1] ) );
				die "Unexpected trailing characters in value" if trailing ≢ "";
				if ( current_table_name ≡ null ) {
					root.set( key_path[0], parsed[0] );
				}
				else {
					current_table.set( key_path[0], parsed[0] );
				}
			}

		}

		if ( nl < 0 ) {
			done := true;
		}
		else {
			pos := nl + 1;
		}
	}
	_flush_current_table();

	return root;
}

class TOML {
	let Boolean utf8 := true;
	let Boolean pretty := false;
	let Boolean canonical := false;

	method encode (value) {
		let normalized := _normalize_for_encoding(value);

		if ( normalized instanceof Dict ) {
			let lines := [];
			_encode_table( normalized, [], pretty, canonical, lines );
			return _join_lines(lines);
		}

		die "TOML encoder expects a Dict at top level";
	}

	method encode_binarystring (value) {
		return to_binary( self.encode(value) );
	}

	method decode ( String text ) {
		let src := text;
		src := "" if src ≡ null;

		return _parse_document(src);
	}

	method decode_binarystring ( BinaryString raw ) {
		return self.decode( to_string(raw) );
	}

	method load (path) {
		from std/io import Path;
		die "TOML.load is denied by runtime policy" if __system__{deny_fs};
		die "TOML.load expects a std/io Path object" if not( path instanceof Path );
		return self.decode_binarystring( path.slurp() );
	}

	method dump ( path, value ) {
		from std/io import Path;
		die "TOML.dump is denied by runtime policy" if __system__{deny_fs};
		die "TOML.dump expects a std/io Path object" if not( path instanceof Path );
		path.spew( self.encode_binarystring(value) );
		return path;
	}

}