std/string/quoted_printable

Standard Library source code

Quoted-printable encoders and decoders.

Module

Name
std/string/quoted_printable
Area
Standard Library
Source
modules/std/string/quoted_printable.zzm
=encoding utf8

=head1 NAME

std/string/quoted_printable - Quoted-printable encoders and decoders.

=head1 SYNOPSIS

  from std/string/quoted_printable import encode, decode;

  let raw := to_binary( "Hello, world!\r\n" );

  let text := encode(raw);
  let bytes := decode(text);

  let binary_text := encode(raw, binary: true);
  let short_lines := encode(raw, line_length: 40, newline: "\n");

=head1 IMPLEMENTATION SUPPORT

This module is supported by all implementations of ZuzuScript.

=head1 DESCRIPTION

This module provides quoted-printable encoding and decoding helpers for
RFC 2045-style byte transport. Encoding returns ASCII C<String> text.
Decoding returns a C<BinaryString>, because quoted-printable is a byte
transfer encoding rather than a Unicode text format.

The C<binary> option controls how input line break bytes are encoded.
In the default non-binary mode, CRLF, CR, and LF bytes are normalized to
the configured C<newline> string. In binary mode, CR and LF bytes are
encoded as C<=0D> and C<=0A>.

=head1 EXPORTS

=head2 Functions

=over

=item * C<encode(BinaryString bytes, ... PairList options)>

Parameters: C<bytes> is binary input data and C<options> controls
encoding. Returns: C<String>. Encodes C<bytes> as quoted-printable ASCII
text.

=item * C<decode(String text, ... PairList options)>

Parameters: C<text> is quoted-printable text and C<options> controls
strictness. Returns: C<BinaryString>. Decodes quoted-printable text into
bytes.

=back

=head1 OPTIONS

=over

=item * C<line_length>

Maximum encoded line length. Defaults to C<76> and must be at least
C<4>.

=item * C<newline>

Output newline for hard line breaks and encoded soft breaks. Defaults
to CRLF.

=item * C<binary>

When true, encode CR and LF bytes as C<=0D> and C<=0A>. Defaults to
false.

=item * C<strict>

When true, malformed quoted-printable escape sequences throw during
decoding. Non-strict decoding preserves malformed escape text
literally.

=back

=head1 COPYRIGHT AND LICENCE

B<< std/string/quoted_printable >> is copyright Toby Inkster.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut


let encode := null;
let decode := null;

{
	from std/string import chr, index, ord, substr;
	from std/string/base64 import
		encode as _base64_encode,
		decode as _base64_decode;

	let _B64_ALPHABET := "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
		_ "abcdefghijklmnopqrstuvwxyz0123456789+/";
	let _HEX := "0123456789ABCDEF";

	function _div_floor ( Number n, Number d ) {
		return floor( n / d );
	}

	function _mod ( Number n, Number d ) {
		return n - _div_floor( n, d ) * d;
	}

	function _bytes_to_binary ( Array bytes ) {
		let out := "";
		let i := 0;
		let n := bytes.length();

		while ( i < n ) {
			let b0 := bytes[i];
			let b1 := null;
			let b2 := null;
			if ( i + 1 < n ) {
				b1 := bytes[i + 1];
			}
			if ( i + 2 < n ) {
				b2 := bytes[i + 2];
			}

			let c0 := _div_floor( b0, 4 );
			let c1 := _mod( b0, 4 ) * 16;
			let c2 := 64;
			let c3 := 64;

			if ( not( b1 == null ) ) {
				c1 += _div_floor( b1, 16 );
				c2 := _mod( b1, 16 ) * 4;
				if ( not( b2 == null ) ) {
					c2 += _div_floor( b2, 64 );
					c3 := _mod( b2, 64 );
				}
			}

			out _= substr( _B64_ALPHABET, c0, 1 );
			out _= substr( _B64_ALPHABET, c1, 1 );
			out _= c2 == 64 ? "=" : substr( _B64_ALPHABET, c2, 1 );
			out _= c3 == 64 ? "=" : substr( _B64_ALPHABET, c3, 1 );
			i += 3;
		}

		return _base64_decode(out);
	}

	function _binary_to_bytes ( BinaryString raw ) {
		let b64 := _base64_encode(raw);
		let out := [];
		let i := 0;
		let n := length b64;

		while ( i < n ) {
			let c0 := index( _B64_ALPHABET, substr( b64, i, 1 ) );
			let c1 := index( _B64_ALPHABET, substr( b64, i + 1, 1 ) );
			let ch2 := substr( b64, i + 2, 1 );
			let ch3 := substr( b64, i + 3, 1 );
			let c2 := -1;
			let c3 := -1;
			if ( ch2 ne "=" ) {
				c2 := index( _B64_ALPHABET, ch2 );
			}
			if ( ch3 ne "=" ) {
				c3 := index( _B64_ALPHABET, ch3 );
			}

			out.push( c0 * 4 + _div_floor( c1, 16 ) );
			if ( c2 >= 0 ) {
				out.push( _mod( c1, 16 ) * 16 + _div_floor( c2, 4 ) );
			}
			if ( c3 >= 0 ) {
				out.push( _mod( c2, 4 ) * 64 + c3 );
			}

			i += 4;
		}

		return out;
	}

	function _parse_options ( PairList options ) {
		let line_length := 76;
		let newline := "\r\n";
		let binary := false;
		let strict := false;

		for ( let option in options.enumerate() ) {
			let key := option.key;
			let value := option.value;

			if ( key eq "line_length" ) {
				line_length := value;
			}
			else if ( key eq "newline" ) {
				newline := value;
			}
			else if ( key eq "binary" ) {
				binary := value;
			}
			else if ( key eq "strict" ) {
				strict := value;
			}
			else {
				die `quoted_printable option '${key}' is not supported`;
			}
		}

		if ( not( line_length instanceof Number ) ) {
			die "quoted_printable line_length option expects Number";
		}
		if ( line_length < 4 ) {
			die "quoted_printable line_length option must be at least 4";
		}
		if ( not( newline instanceof String ) ) {
			die "quoted_printable newline option expects String";
		}
		if ( not( binary instanceof Boolean ) ) {
			die "quoted_printable binary option expects Boolean";
		}
		if ( not( strict instanceof Boolean ) ) {
			die "quoted_printable strict option expects Boolean";
		}

		return {
			line_length: int(line_length),
			newline: newline,
			binary: binary,
			strict: strict,
		};
	}

	function _is_safe_literal ( Number b ) {
		return ( b >= 33 and b <= 60 ) or ( b >= 62 and b <= 126 );
	}

	function _byte_to_hex_token ( Number b ) {
		return "="
			_ substr( _HEX, _div_floor( b, 16 ), 1 )
			_ substr( _HEX, _mod( b, 16 ), 1 );
	}

	function _simple_token_length ( Number b, Boolean final_byte ) {
		if ( ( b == 9 or b == 32 ) and not final_byte ) {
			return 1;
		}
		if ( _is_safe_literal(b) ) {
			return 1;
		}
		return 3;
	}

	function _space_tab_needs_escape (
		Array bytes,
		Number i,
		Number column,
		Number line_length,
	) {
		let n := bytes.length();
		if ( i + 1 >= n ) {
			return true;
		}

		let j := i + 1;
		while ( j < n and ( bytes[j] == 9 or bytes[j] == 32 ) ) {
			j++;
		}
		if ( j >= n ) {
			return true;
		}

		let next_is_final := i + 2 >= n;
		let next_len := _simple_token_length( bytes[i + 1], next_is_final );
		let max := next_is_final ? line_length : line_length - 1;
		return column + 1 + next_len > max;
	}

	function _token_for_byte (
		Array bytes,
		Number i,
		Number column,
		Number line_length,
	) {
		let b := bytes[i];

		if ( b == 9 or b == 32 ) {
			if ( _space_tab_needs_escape( bytes, i, column, line_length ) ) {
				return _byte_to_hex_token(b);
			}
			return chr(b);
		}

		if ( _is_safe_literal(b) ) {
			return chr(b);
		}

		return _byte_to_hex_token(b);
	}

	function _emit_token (
		String out,
		Number column,
		String token,
		Boolean more_after,
		Number line_length,
		String newline,
	) {
		let max := more_after ? line_length - 1 : line_length;
		let updated_out := out;
		let updated_column := column;

		if ( updated_column > 0 and updated_column + ( length token ) > max ) {
			updated_out _= "=" _ newline;
			updated_column := 0;
		}

		updated_out _= token;
		updated_column += length token;

		return [ updated_out, updated_column ];
	}

	function _encode_segment (
		Array bytes,
		Number line_length,
		String newline,
	) {
		let out := "";
		let column := 0;
		let i := 0;
		let n := bytes.length();

		while ( i < n ) {
			let token := _token_for_byte( bytes, i, column, line_length );
			let emitted := _emit_token(
				out,
				column,
				token,
				i + 1 < n,
				line_length,
				newline,
			);
			out := emitted[0];
			column := emitted[1];
			i++;
		}

		return out;
	}

	function _encode_text_bytes (
		Array bytes,
		Number line_length,
		String newline,
	) {
		let out := "";
		let line := [];
		let i := 0;
		let n := bytes.length();

		while ( i < n ) {
			let b := bytes[i];
			if ( b == 13 or b == 10 ) {
				out _= _encode_segment( line, line_length, newline );
				out _= newline;
				line := [];

				if ( b == 13 and i + 1 < n and bytes[i + 1] == 10 ) {
					i++;
				}
			}
			else {
				line.push(b);
			}

			i++;
		}

		out _= _encode_segment( line, line_length, newline );
		return out;
	}

	function _hex_value ( String ch ) {
		let cp := ord(ch);
		if ( cp >= 48 and cp <= 57 ) {
			return cp - 48;
		}
		if ( cp >= 65 and cp <= 70 ) {
			return cp - 55;
		}
		if ( cp >= 97 and cp <= 102 ) {
			return cp - 87;
		}
		return -1;
	}

	function _decode_text_bytes ( String text, Boolean strict ) {
		let out := [];
		let i := 0;
		let n := length text;

		while ( i < n ) {
			let ch := substr( text, i, 1 );
			let cp := ord( text, i );

			if ( cp > 127 ) {
				die "quoted_printable.decode rejects non-ASCII input";
			}

			if ( ch eq "=" ) {
				if ( i + 1 >= n ) {
					die "malformed quoted-printable escape" if strict;
					out.push(61);
					i++;
					next;
				}

				let ch1 := substr( text, i + 1, 1 );
				if ( ch1 eq "\r" ) {
					if ( i + 2 < n and substr( text, i + 2, 1 ) eq "\n" ) {
						i += 3;
					}
					else {
						i += 2;
					}
					next;
				}
				if ( ch1 eq "\n" ) {
					i += 2;
					next;
				}

				if ( i + 2 < n ) {
					let hi := _hex_value(ch1);
					let lo := _hex_value( substr( text, i + 2, 1 ) );
					if ( hi >= 0 and lo >= 0 ) {
						out.push( hi * 16 + lo );
						i += 3;
						next;
					}
				}

				die "malformed quoted-printable escape" if strict;
				out.push(61);
				i++;
				next;
			}

			out.push(cp);
			i++;
		}

		return out;
	}

	encode := function ( BinaryString bytes, ... PairList options ) {
		let opts := _parse_options(options);
		let raw := _binary_to_bytes(bytes);

		if ( opts{binary} ) {
			return _encode_segment(
				raw,
				opts{line_length},
				opts{newline},
			);
		}

		return _encode_text_bytes(
			raw,
			opts{line_length},
			opts{newline},
		);
	};

	decode := function ( String text, ... PairList options ) {
		let opts := _parse_options(options);
		return _bytes_to_binary( _decode_text_bytes( text, opts{strict} ) );
	};
}