modules/pod/parser.zzm

pod-parser-0.0.1 source code

Package

Name
pod-parser
Version
0.0.1
Uploaded
2026-05-28 11:45:33
Dependencies
Metadata
zuzu-distribution.json
Archive
Download .tar.gz
=encoding utf8

=head1 NAME

pod/parser - Parse POD into DOM-like node objects.

=head1 SYNOPSIS

  from pod/parser import parse_pod, load_pod, PodParser;
  from std/io import Path;

  let doc := parse_pod("=head1 NAME\n\nExample\n\n=cut\n");
  say( doc.first_by_type("heading").text() );

  let same := load_pod( new Path("lib/example.zzm") );
  let parser := new PodParser();
  let again := parser.parse(same);

=head1 DESCRIPTION

This pure-Zuzu module parses Plain Old Documentation blocks from either a
string or a C<std/io> C<Path>. It ignores non-POD source text outside POD
regions and assembles the documentation into mutable DOM-like objects.

The returned object is a C<PodDocument>. Its children are C<PodNode>
objects. Nodes expose methods for DOM traversal, inspection, and
manipulation, including C<children>, C<child>, C<append_child>,
C<remove_child>, C<descendants>, C<find_by_type>, and C<text_content>.

Paragraph nodes are unwrapped while parsing: a blank line starts a new
paragraph, but a single newline inside paragraph text is treated as a
soft wrap and becomes one space. Verbatim/code blocks preserve their
line breaks.

=head1 EXPORTED FUNCTIONS

=over

=item * C<< parse_pod(String|Path source) >>

Parse POD from a string, or slurp and parse UTF-8 POD from a C<Path>.
Returns: C<PodDocument>.

=item * C<< parse_pod_string(String text) >>

Parse POD from a string. Returns: C<PodDocument>.

=item * C<< load_pod(Path path) >>

Slurp UTF-8 POD from a C<Path> and parse it. Returns: C<PodDocument>.

=back

=head1 EXPORTED CLASSES

=over

=item C<PodDocument>

The root document node.

=item C<PodNode>

A mutable DOM node. Constructors are public so callers can build and
manipulate additional nodes when needed.

=item C<PodParser>

Small OO wrapper with C<parse>, C<parse_string>, and C<load> methods.

=back

=head1 COPYRIGHT AND LICENCE

B<< pod/parser >> is copyright Toby Inkster.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut

from std/io import Path;
from std/string import join, replace, split, trim;

class PodNode {
	let String kind := "node";
	let text := "";
	let command := null;
	let target := null;
	let indent := null;
	let Number level := 0;
	let source := null;
	let encoding := null;
	let parent := null;
	let Array children := [];

	method __build__ () {
		children := [] if children == null;
	}

	method type () { return kind; }
	method kind () { return kind; }
	method is_type ( String wanted ) { return kind eq wanted; }

	method _set_type ( String value ) {
		kind := value;
		return self;
	}

	method text ( value? ) {
		text := value if value != null;
		return text;
	}

	method command ( value? ) {
		command := value if value != null;
		return command;
	}

	method target ( value? ) {
		target := value if value != null;
		return target;
	}

	method indent ( value? ) {
		indent := value if value != null;
		return indent;
	}

	method level ( value? ) {
		level := value if value != null;
		return level;
	}

	method source ( value? ) {
		source := value if value != null;
		return source;
	}

	method encoding ( value? ) {
		encoding := value if value != null;
		return encoding;
	}

	method parent () {
		return parent;
	}

	method _set_parent ( value ) {
		parent := value;
		return self;
	}

	method root () {
		let node := self;
		while ( node.parent() != null ) {
			node := node.parent();
		}
		return node;
	}

	method children () {
		return children;
	}

	method nodes () {
		return children;
	}

	method length () {
		return children.length();
	}

	method has_children () {
		return children.length() > 0;
	}

	method child ( Number index, fallback? ) {
		return fallback if index < 0 or index >= children.length();
		return children[index];
	}

	method first_child ( fallback? ) {
		return self.child( 0, fallback );
	}

	method last_child ( fallback? ) {
		return fallback if children.length() == 0;
		return children[ children.length() - 1 ];
	}

	method append_child ( PodNode node ) {
		node._set_parent(self);
		children.push(node);
		return node;
	}

	method prepend_child ( PodNode node ) {
		return self.insert_child( 0, node );
	}

	method insert_child ( Number index, PodNode node ) {
		let kept := [];
		let inserted := false;
		let i := 0;
		if ( index <= 0 ) {
			kept.push(node);
			inserted := true;
		}
		while ( i < children.length() ) {
			if ( not inserted and i >= index ) {
				kept.push(node);
				inserted := true;
			}
			kept.push(children[i]);
			i++;
		}
		if ( not inserted ) {
			kept.push(node);
		}
		node._set_parent(self);
		children := kept;
		return node;
	}

	method remove_child ( PodNode wanted ) {
		let kept := [];
		let removed := null;
		for ( let child in children ) {
			if ( child == wanted and removed == null ) {
				removed := child;
				child._set_parent(null);
			}
			else {
				kept.push(child);
			}
		}
		children := kept;
		return removed;
	}

	method remove_at ( Number index ) {
		return null if index < 0 or index >= children.length();
		let wanted := children[index];
		return self.remove_child(wanted);
	}

	method clear_children () {
		for ( let child in children ) {
			child._set_parent(null);
		}
		children := [];
		return self;
	}

	method replace_children ( Array nodes ) {
		self.clear_children();
		for ( let node in nodes ) {
			self.append_child(node);
		}
		return self;
	}

	method descendants () {
		let out := [];
		self._collect_descendants(out);
		return out;
	}

	method _collect_descendants ( Array out ) {
		for ( let child in children ) {
			out.push(child);
			child._collect_descendants(out);
		}
	}

	method find_by_type ( String wanted ) {
		let out := [];
		for ( let node in self.descendants() ) {
			out.push(node) if node.is_type(wanted);
		}
		return out;
	}

	method first_by_type ( String wanted, fallback? ) {
		for ( let node in self.descendants() ) {
			return node if node.is_type(wanted);
		}
		return fallback;
	}

	method walk ( Function callback ) {
		callback(self);
		for ( let child in children ) {
			child.walk(callback);
		}
		return self;
	}

	method text_content () {
		let parts := [];
		self._collect_text(parts);
		return join( "\n", parts );
	}

	method _collect_text ( Array parts ) {
		parts.push(text) if text != null and text ne "";
		for ( let child in children ) {
			child._collect_text(parts);
		}
	}

	method to_Iterator () {
		return children.to_Iterator();
	}
}

class PodDocument extends PodNode {
	method __build__ () {
		self._set_type("document");
	}
}

function _new_document ( source ) {
	return new PodDocument( source: source );
}

function _normalize_newlines ( String text ) {
	return replace( replace( text, "\r\n", "\n", "g" ), "\r", "\n", "g" );
}

function _command_parts ( String line ) {
	let match := line ~ /^=([A-Za-z][A-Za-z0-9]*)(?:\s+(.*))?$/;
	if ( not match ) {
		return null;
	}

	return {
		command: lc(match[1]),
		text: match[2] == null ? "" : trim(match[2]),
	};
}

function _top ( Array stack ) {
	return stack[ stack.length() - 1 ];
}

function _add_child ( Array stack, PodNode node ) {
	_top(stack).append_child(node);
	return node;
}

function _close_open_item ( Array stack ) {
	if ( stack.length() > 1 and _top(stack).type() eq "item" ) {
		stack.pop();
	}
}

function _close_list ( Array stack ) {
	_close_open_item(stack);
	if ( stack.length() > 1 and _top(stack).type() eq "list" ) {
		stack.pop();
	}
}

function _paragraph_text ( Array lines ) {
	let parts := [];
	for ( let line in lines ) {
		let piece := trim(line);
		if ( piece ne "" ) {
			parts.push(piece);
		}
	}
	return join( " ", parts );
}

function _verbatim_text ( Array lines ) {
	return join( "\n", lines );
}

function _clear_array ( Array values ) {
	while ( values.length() > 0 ) {
		values.pop();
	}
}

function _flush_pending ( Array stack, Array para, Array verb ) {
	if ( para.length() > 0 ) {
		_add_child( stack, new PodNode(
			kind: "paragraph",
			text: _paragraph_text(para),
		) );
		_clear_array(para);
	}

	if ( verb.length() > 0 ) {
		_add_child( stack, new PodNode(
			kind: "verbatim",
			text: _verbatim_text(verb),
		) );
		_clear_array(verb);
	}
}

function _heading_node ( String command, String text ) {
	let match := command ~ /^head([1-6])$/;
	return new PodNode(
		kind: "heading",
		level: int(match[1]),
		text: text,
	);
}

function _command_node ( String command, String text ) {
	return new PodNode(
		kind: "command",
		command: command,
		text: text,
	);
}

function _process_command ( PodDocument root, Array stack, command, text ) {
	if ( command eq "cut" ) {
		while ( stack.length() > 1 ) {
			stack.pop();
		}
		return false;
	}

	if ( command eq "encoding" ) {
		root.encoding(text);
		_add_child( stack, new PodNode(
			kind: "encoding",
			text: text,
		) );
		return true;
	}

	if ( command ~ /^head[1-6]$/ ) {
		_add_child( stack, _heading_node( command, text ) );
		return true;
	}

	if ( command eq "over" ) {
		let node := _add_child( stack, new PodNode(
			kind: "list",
			indent: text,
		) );
		stack.push(node);
		return true;
	}

	if ( command eq "item" ) {
		_close_open_item(stack);
		if ( _top(stack).type() ne "list" ) {
			let list := _add_child( stack, new PodNode(kind: "list") );
			stack.push(list);
		}

		let item := new PodNode(
			kind: "item",
			text: text,
		);
		_top(stack).append_child(item);
		stack.push(item);
		return true;
	}

	if ( command eq "back" ) {
		_close_list(stack);
		return true;
	}

	if ( command eq "pod" ) {
		_add_child( stack, new PodNode(
			kind: "pod",
			text: text,
		) );
		return true;
	}

	if ( command eq "for" ) {
		let parsed := text ~ /^(\S+)(?:\s+(.*))?$/;
		_add_child( stack, new PodNode(
			kind: "for",
			target: parsed ? parsed[1] : "",
			text: parsed and parsed[2] != null ? parsed[2] : "",
		) );
		return true;
	}

	if ( command eq "begin" ) {
		let block := _add_child( stack, new PodNode(
			kind: "block",
			target: text,
		) );
		stack.push(block);
		return true;
	}

	if ( command eq "end" ) {
		if ( stack.length() > 1 and _top(stack).type() eq "block" ) {
			stack.pop();
		}
		else {
			_add_child( stack, new PodNode(
				kind: "end",
				target: text,
			) );
		}
		return true;
	}

	_add_child( stack, _command_node( command, text ) );
	return true;
}

function _parse_text ( String text, source ) {
	let root := _new_document(source);
	let stack := [ root ];
	let para := [];
	let verb := [];
	let in_pod := false;
	let lines := split( _normalize_newlines(text), "\n" );

	for ( let line in lines ) {
		let command := _command_parts(line);
		if ( command ) {
			_flush_pending( stack, para, verb );
			in_pod := command{command} eq "cut"
				? false
				: _process_command( root, stack, command{command}, command{text} );
			next;
		}

		next if not in_pod;

		if ( trim(line) eq "" ) {
			_flush_pending( stack, para, verb );
			next;
		}

		if ( line ~ /^\s/ ) {
			if ( para.length() > 0 ) {
				_flush_pending( stack, para, verb );
			}
			verb.push(line);
		}
		else {
			if ( verb.length() > 0 ) {
				_flush_pending( stack, para, verb );
			}
			para.push(line);
		}
	}

	_flush_pending( stack, para, verb );
	return root;
}

function _canonical_path ( Path path ) {
	let canonical := path.realpath();
	if ( canonical == null ) {
		canonical := path.absolute();
	}
	if ( canonical == null ) {
		return path.to_String;
	}
	if ( typeof canonical eq "Path" ) {
		return canonical.to_String;
	}
	return "" _ canonical;
}

function parse_pod_string ( String text ) {
	return _parse_text( text, null );
}

function load_pod ( path ) {
	die "pod/parser: load_pod expects a std/io Path object"
		if not( path instanceof Path );
	return _parse_text( path.slurp_utf8(), _canonical_path(path) );
}

function parse_pod ( source ) {
	if ( typeof source eq "String" ) {
		return parse_pod_string(source);
	}
	if ( source instanceof Path ) {
		return load_pod(source);
	}
	die "pod/parser: parse_pod expects String or std/io Path";
}

class PodParser {
	method parse ( source ) {
		return parse_pod(source);
	}

	method parse_string ( String text ) {
		return parse_pod_string(text);
	}

	method load ( path ) {
		return load_pod(path);
	}
}