dev

See Global Lua Modules/XML

Subpages



--- XML parser for valid XML streams in Lua.
--  This module is a fork of the [[github:manoelcampos/xml2lua|xml2lua]]
--  library by [[github:manoelcampos|@manoelcampos]]. It is available
--  under the MIT license, as with the original library.
--  
--  The parser provides a partially object-oriented API with its
--  functionality split into tokeniser and handler components.
--  
--  The handler instance from @{xml.handlers} is passed to the tokeniser
--  via @{xml.parser} and receives callbacks for each XML element
--  processed (if a suitable handler function is defined). The API is
--  conceptually similar to the SAX API but implemented differently.
--  
--  XML data is passed to the parser instance through the
--  @{XMLParser:parse} method. Note that the parser only accepts a
--  single string currently.
--  
--  The default XML handler is @{xml.handlers.DOM}, due to its ability
--  to nondestructively parse any XML (representing comments, text nodes
--  and mixed content appropriately). The module provides a serialiser
--  supporting XML DOM root tables at @{xml.serialise}, which has a
--  compatibility layer for XML tree root tables.
--  
--  If your application involves bidirectional parsing of data, such as
--  the contents of templates using Wikia's [[w:Help:Infobox|infobox
--  component]], the @{xml.handlers.DOM} handler is recommended. When
--  creating XML configuration files for use in Lua modules, it is
--  recommended to use the @{xml.handlers.Tree} handler which allows for
--  easier node traversal and data extraction.
--  
--  ## Features ##
--    * Tokenises well-formed XML (relatively robustly)
--    * Flexible handler-based event API (see @{xml.handlers}
--   documentation).
--    * Parses all XML infoset elements:
--      ** Tags
--      ** Text
--      ** Comments
--      ** CDATA
--      ** XML declarations
--      ** Processing instructions
--      ** DOCTYPE declarations
--    * Provides limited well-formedness checking
--    (checks for basic syntax & balanced tags only)
--    * Flexible whitespace handling (optional)
--    * Entity handling (optional)
--  
--  ## Limitations ##
--    * Shallow well-formedness checking only (fails
--      to detect most semantic errors)
--    * Non-validating
--    * No charset handling
--    * No namespace support
--  
--  @script             xml
--  @alias              p
--  @license            MIT
--  @release            beta
--  @require            Module:I18n
--  @require            Module:Yesno
--  @version            1.3.5
--  @author             Paul Chakravarti (passtheaardvark.com)
--  @author             [[github:manoelcampos|Manoel Campos da Silva Filho]]
--  @author             [[User:8nml|8nml]]
local p = {}

--  Module dependencies.
local i18n = require('Dev:I18n').loadMessages('XML')
local yesno = require('Dev:Yesno')

--  Module variables.
local XML_DOM_TYPE_COMMENT = 'COMMENT'
local XML_DOM_TYPE_DECL = 'DECL'
local XML_DOM_TYPE_DTD = 'DTD'
local XML_DOM_TYPE_ELEMENT = 'ELEMENT'
local XML_DOM_TYPE_TEXT = 'TEXT'
local XML_DOM_TYPE_PI = 'PI'

--  Parser utilities.

--- Converts decimal character code to character or HTML ISO code.
--  @param              {number} code The decimal value to convert to its
--                      respective character.
--  @return             If `code` is a graphical character, the character
--                      is returned. Otherwise, the HTML ISO code for
--                      that decimal value is returned in the format
--                      `&#code`.
--  @local
local function decimalToHtmlChar(code)
    local n = tonumber(code)
    if n >= 0 and n < 256 then
        return string.char(n)
    else
        return '&#' .. code .. ';'
    end
end

--- Converts hexadecimal character code to character or HTML ISO code.
--  @function           hexadecimalToHtmlChar
--  @param              {number} code The hexadecimal value to convert to
--                      its respective character.
--  @return             If `code` is a graphical character, the character
--                      is returned. Otherwise, the HTML ISO code for that
--                      decimal value is returned in the format `&#xcode`.
--  @local
local function hexadecimalToHtmlChar(code)
    local n = tonumber(code, 16)
    if n >= 0 and n < 256 then
        return string.char(n)
    else
        return '&#x' .. code .. ';'
    end
end

--- Checks if a function/field exists in a table or in its metatable.
--  @function           fexists
--  @param              {table} tbl The table to test function or field
--                      presence.
--  @param              {string} element The function or field name to
--                      check the existence of.
--  @return             {boolean} Boolean for whether the function or
--                      field exists.
--  @local
local function fexists(tbl, element)
    if tbl == nil then
        return false
    end

    if tbl[element] == nil then
        return fexists(getmetatable(tbl), element)
    else
        return true
    end
end

--- Error handler callback.
--  @function           err
--  @param              {table} self XML parser instance.
--  @param[opt]         {string} err Localised error message string.
--  @param[opt]         {number} pos String character position.
--  @local
local function err(self, err, pos)
    if self.options.errorHandler then
        self.options.errorHandler(err, pos)
    end
end

--- Removes leading and trailing whitespaces from a string.
--  @function            stripWS
--  @param               {table} self XML parser instance.
--  @param               {string} s XML text with whitespace.
--  @return              {string} Trimmed string if `options.stripWS` is
--                       true.
--  @local
local function stripWS(self, s)
    if self.options.stripWS then
        s = mw.text.trim(s)
    end
    return s
end

--- Parses XML entities in a string.
--  @function           parseEntities
--  @param              {table} self XML parser instance.
--  @param              {string} str String to insert entities into.
--  @return             {string} String with entities.
local function parseEntities(self, str)
    if self.options.expandEntities then
        for k, v in pairs(self._ENTITIES) do
            str = string.gsub(str, k, v)
        end
    end

    return str
end

--- Parses a string representing a opening XML tag.
--  @function           parseTag
--  @param              {table} self XML parser instance.
--  @param              {string} s Opening tag text.
--  @return             {table} A table describing the opening tag and
--                      and its attribute nodes.
--                       * `[1]` The name of the tag. (string)
--                       * `[2]` is the atribute nodes of the tag.
--                      (table)
--  @local
local function parseTag(self, s)
    local tag = {}
    tag.name = string.gsub(s, self._TAG, '%1')
    tag.attrs = {}

    local parseFunction = function(k, v)
        tag.attrs[k] = parseEntities(self, v)
        tag.attrs._ = 1
    end

    string.gsub(s, self._ATTR1, parseFunction)
    string.gsub(s, self._ATTR2, parseFunction)

    if tag.attrs._ then
        tag.attrs._ = nil
    else
        tag.attrs = nil
    end

    return tag
end

--- Parses a string representing a XML declaration tag.
--  @function           parseXmlDeclaration
--  @param              {table} self XML parser instance.
--  @param              {string} str Opening XML tag text.
--  @param              {table} f Auxiliary parser variables.
--  @return             {table} A table representation of the XML
--                      declaration.
--  @todo               Check if attributes are valid.
--  @todo               Check for version (mandatory).
--  @local
local function parseXmlDeclaration(self, str, f)
    -- XML declaration.
    f.match, f.endMatch, f.text = string.find(str, self._PI, f.pos)
    if not f.match then
        err(self, i18n:msg('error-parsing-decl'), f.pos)
    end

    if f.match ~= 1 then
        -- Must be at start of doc if present.
        err(self, i18n:msg('error-parsing-declStart'), f.pos)
    end

    local tag = parseTag(self, f.text)
    if tag.attrs and tag.attrs.version == nil then
        err(self, i18n:msg('error-parsing-declAttr'), f.pos)
    end

    if fexists(self.handler, 'decl') then
        self.handler:decl(tag, f.match, f.endMatch)
    end

    return tag
end

--- Parses a string representing a XML processing instruction.
--  @function           parseXmlProcessingInstruction
--  @param              {table} self XML parser instance.
--  @param              {string} str XML processing instruction text.
--  @param              {table} f Auxiliary parser variables.
--  @return             {table} A table representation of the XML
--                      declaration.
--  @local
local function parseXmlProcessingInstruction(self, str, f)
    local tag = {}

    -- XML Processing Instruction (PI)
    f.match, f.endMatch, f.text = string.find(str, self._PI, f.pos)
    if not f.match then
        err(self, i18n:msg('error-parsing-pi'), f.pos)
    end
    if fexists(self.handler, 'pi') then
        -- Parse PI attributes & text
        tag = parseTag(self, f.text)
        local pi = string.sub(f.text, string.len(tag.name) + 1)
        if pi ~= '' then
            if tag.attrs then
                tag.attrs._text = pi
            else
                tag.attrs = {_text = pi}
            end
        end
        self.handler:pi(tag, f.match, f.endMatch)
    end

    return tag
end

--- Parses a string representing an XML comment.
--  @function           parseComment
--  @param              {table} self XML parser instance.
--  @param              {string} str XML comment text.
--  @param              {table} f Auxiliary parser variables.
--  @local
local function parseComment(self, str, f)
    f.match, f.endMatch, f.text = string.find(str, self._COMMENT, f.pos)
    if not f.match then
        err(self, i18n:msg('error-parsing-comment'), f.pos)
    end

    if fexists(self.handler, 'comment') then
        f.text = parseEntities(self, stripWS(self, f.text))
        self.handler:comment(f.text, next, f.match, f.endMatch)
    end
end

--- Utility to parse a string representing XML DTD declarations.
--  @function           _parseDtd
--  @param              {table} self XML parser instance.
--  @param              {string} str XML comment text.
--  @param              {number} pos Character position index.
--  @return             {number} Start index of match.
--  @return             {number} End index of match.
--  @return             {table} A table representation of the XML
--                      DTD declarations.
--  @local
local function _parseDtd(self, str, pos)
    -- match, endMatch, root, type, name, uri, internal
    local dtdPatterns = { self._DTD1, self._DTD2, self._DTD3, self._DTD4, self._DTD5 }

    for i, dtd in pairs(dtdPatterns) do
        local m, e, r, t, n, u, i = string.find(str, dtd, pos)
        if m then
            return m, e, { _root = r, _type = t, _name = n, _uri = u, _internal = i }
        end
    end

    return nil
end

--- Parses a string representing XML DTD declarations.
--  @function           parseDtd
--  @param              {table} self XML parser instance.
--  @param              {string} str XML DTD declaration text.
--  @param              {table} f Auxiliary parser variables.
--  @local
local function parseDtd(self, str, f)
    f.match, f.endMatch, attrs = _parseDtd(self, str, f.pos)
    if not f.match then
        err(self, i18n:msg('error-parsing-dtd'), f.pos)
    end

    if fexists(self.handler, 'dtd') then
        local tag = { name = 'DOCTYPE', value = string.sub(str, f.match + 10, f.endMatch - 1) }
        self.handler:dtd(tag, f.match, f.endMatch)
    end
end

--- Parses a string representing a XML CDATA section.
--  @function           parseCdata
--  @param              {table} self XML parser instance.
--  @param              {string} str XML CDATA section text.
--  @param              {table} f Auxiliary parser variables.
--  @local
local function parseCdata(self, str, f)
    f.match, f.endMatch, f.text = string.find(str, self._CDATA, f.pos)
    if not f.match then
        err(self, i18n:msg('error-parsing-cdata'), f.pos)
    end

    if fexists(self.handler, 'cdata') then
        self.handler:cdata(f.text, nil, f.match, f.endMatch)
    end
end

--- Parses a normal XML tag.
--  @function           parseNormalTag
--  @param              {table} self XML parser instance.
--  @param              {string} str XML CDATA section text.
--  @param              {table} f Auxiliary parser variables.
--  @return             {table} A table representation of the XML
--                      tag.
--  @todo               Support `>` tag in attributes.
--  @local
local function parseNormalTag(self, str, f)
    -- Check for errors.
    while 1 do
        -- If there isn't an attribute without closing quotes (single
        -- or double quotes), then break to follow the normal
        -- processing of the tag.
        -- Otherwise, try to find where the quotes close.
        f.errStart, f.errEnd = string.find(f.tagstr, self._ATTRERR1)

        if f.errEnd == nil then
            f.errStart, f.errEnd = string.find(f.tagstr, self._ATTRERR2)
            if f.errEnd == nil then
                break
            end
        end

        f.extStart, f.extEnd, f.endt2 = string.find(str, self._TAGEXT, f.endMatch + 1)
        f.tagstr = f.tagstr .. string.sub(str, f.endMatch, f.extEnd - 1)
        if not f.match then
            err(self, i18n:msg('error-parsing-xml'), f.pos)
        end
        f.endMatch = f.extEnd
    end

    -- Extract tag name and attrs.
    local tag = parseTag(self, f.tagstr)

    if (f.endt1 == '/') then
        if fexists(self.handler, 'endtag') then
            if tag.attrs then
                -- Shouldn't have any attributes in endtag
                err(self, i18n:msg('error-parsing-endtag', tag.name), f.pos)
            end
            if table.remove(self._stack) ~= tag.name then
                err(self, i18n:msg('error-parsing-unmatched', tag.name), f.pos)
            end
            self.handler:endtag(tag, f.match, f.endMatch)
        end
    else
        table.insert(self._stack, tag.name)

        if fexists(self.handler, 'starttag') then
            self.handler:starttag(tag, f.match, f.endMatch)
        end

        -- Self-closing tag
        if (f.endt2 == '/') then
            table.remove(self._stack)
            if fexists(self.handler, 'endtag') then
                self.handler:endtag(tag, f.match, f.endMatch)
            end
        end
    end

    return tag
end

--- Type-agnostic XML tag parser.
--  Determines the type of a tag and parses it using the appropriate
--  subroutine above.
--  @function           parseTagType
--  @param              {table} self XML parser instance.
--  @param              {string} str XML CDATA section text.
--  @param              {table} f Auxiliary parser variables.
--  @local
local function parseTagType(self, str, f)
    -- Test for tag type
    if string.find(string.sub(f.tagstr, 1, 5), '?xml%s') then
        parseXmlDeclaration(self, str, f)
    elseif string.sub(f.tagstr, 1, 1) == '?' then
        parseXmlProcessingInstruction(self, str, f)
    elseif string.sub(f.tagstr, 1, 3) == '!--' then
        parseComment(self, str, f)
    elseif string.sub(f.tagstr, 1, 8) == '!DOCTYPE' then
        parseDtd(self, str, f)
    elseif string.sub(f.tagstr, 1, 8) == '![CDATA[' then
        parseCdata(self, str, f)
    else
        parseNormalTag(self, str, f)
    end
end

--- Tag parsing iterator check (first pass).
--  @function           getNextTag
--  @return             {boolean} Boolean for whether there is a next
--                      tag.
--  @todo               Fix exceptions below (multiple passes).
--  @local
local function getNextTag(self, str, f)
    f.match, f.endMatch, f.text, f.endt1, f.tagstr, f.endt2 = string.find(str, self._XML, f.pos)
    if not f.match then
        if string.find(str, self._WS, f.pos) then
            -- No more text - check document complete
            if #self._stack ~= 0 then
                err(self, i18n:msg('error-parsing-incomplete'), f.pos)
            else
                return false
            end
        else
            -- Unparsable text
            err(self, i18n:msg('error-parsing-xml'), f.pos)
        end
    end

    f.text = f.text or ''
    f.tagstr = f.tagstr or ''
    f.match = f.match or 0

    return f.endMatch ~= nil
end

--- Default error handler for invalid XML.
--  Throws a formatted exception message with position.
--  @function           defaultErrorHandler
--  @param[opt]         {string} msg Error message specifying XML item type.
--  @param[opt]         {number} pos String character position.
--  @local
local function defaultErrorHandler(msg, pos)
    msg = msg or i18n:msg('error-parsing')
    pos = tostring(pos or 0)
    error(i18n:msg('error-message-format', msg, pos))
end

--  Serialiser utilities.

--- Generates an XML attribute string from an `_attr` table.
--  @function           serialiseAttr
--  @param              {table} tbl Attribute table field `_attr` from
--                      an XML attribute table representation.
--  @return             {string} a XML String representation of the
--                      tag attributes.
local function serialiseAttr(tbl)
    tbl = tbl or {}
    local s = ''
    for k, v in pairs(tbl) do
        s = s .. ' ' .. k .. '=' .. '"' .. v .. '"'
    end
    return s
end

--  Handler utilities.

--- DOM handler constructor.
--  @function           initDOMHandler
--  @param              {table|nil} options DOM handler options.
--  @constructor
--  @local
local function initDOMHandler(options)
    local dom = {}

    options = options or {}
    dom.options = {}
    dom.options.commentNode = options.commentNode == nil
        and true
        or  yesno(options.commentNode, false)
    dom.options.piNode = options.piNode == nil
        and true
        or  yesno(options.piNode, false)
    dom.options.dtdNode = options.dtdNode == nil
        and true
        or  yesno(options.dtdNode, false)
    dom.options.declNode = options.declNode == nil
        and true
        or  yesno(options.declNode, false)

    dom.current = {}
    dom.current._children = { n = 0 }
    dom.current._type = 'ROOT'
    dom._stack = {}

    return dom
end

--- Tree handler constructor.
--  @function           initTreeHandler
--  @param              {table|nil} options Tree handler options.
--  @constructor
--  @local
local function initTreeHandler(options)
    local obj = {}
    obj.root = {}

    options = options or {}
    obj.options = {}
    obj.options.noreduce = type(options.noreduce) == 'table'
        and options.noreduce
        or  {}

    obj._stack = { obj.root, n = 1 }

    return obj
end

--- Print handler constructor.
--  @function           initPrintHandler
--  @param              {table|nil} options Print handler options.
--  @constructor
--  @local
local function initPrintHandler(options)
    local logger = {}

    options = options or {}
    logger.options = {}
    logger.options.commentNode = options.commentNode == nil
        and true
        or  yesno(options.commentNode, false)
    logger.options.piNode = options.piNode == nil
        and true
        or  yesno(options.piNode, false)
    logger.options.dtdNode = options.dtdNode == nil
        and true
        or  yesno(options.dtdNode, false)
    logger.options.declNode = options.declNode == nil
        and true
        or  yesno(options.declNode, false)

    return logger
end

--- Gets the first key of a table.
--  @function           getFirstKey
--  @param              {table} tbl Table to get the first key from.
--  @return             {string|number} The table's first key, nil if
--                      the table is empty or `tbl` parameter if it
--                      isn't a table.
--  @constructor
--  @local
local function getFirstKey(tbl)
    if type(tbl) == 'table' then
       for k, v in pairs(tbl) do
           return k
       end
 
       return nil
    end
 
    return tbl
 end
 
--- Class providing the actual XML parser.
--  @type               XMLParser
local XmlParser = {}
XmlParser.__index = XmlParser

--  Private attributes with XML patterns.
XmlParser._XML = '^([^<]*)<(%/?)([^>]-)(%/?)>'
XmlParser._ATTR1 = '([%w-:_]+)%s*=%s*"(.-)"'
XmlParser._ATTR2 = "([%w-:_]+)%s*=%s*'(.-)'"
XmlParser._CDATA = '<%!%[CDATA%[(.-)%]%]>'
XmlParser._PI = '<%?(.-)%?>'
XmlParser._COMMENT = '<!%-%-(.-)%-%->'
XmlParser._TAG = '^(.-)%s.*'
XmlParser._LEADINGWS = '^%s+'
XmlParser._TRAILINGWS = '%s+$'
XmlParser._WS = '^%s*$'
XmlParser._DTD1 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*(%b[])%s*>'
XmlParser._DTD2 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*(%b[])%s*>'
XmlParser._DTD3 = '<!DOCTYPE%s.->'
XmlParser._DTD4 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*>'
XmlParser._DTD5 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*>'
--  Attribute pattern with non-closing double quotes (the equal sign
--  is matched non-greedly by using `=+?`).
XmlParser._ATTRERR1 = '=+?%s*"[^"]*$'
--  Attribute pattern with non-closing single quotes (the equal sign
--  is matched non-greedly by using `=+?`).
XmlParser._ATTRERR2 = '=+?%s*\'[^\']*$'
--  Closing tag pattern (e.g. `</person>` or `/>`).
XmlParser._TAGEXT = '(%/?)>'
XmlParser._ENTITIES = {
    ['&lt;'] = '<',
    ['&gt;'] = '>',
    ['&amp;'] = '&',
    ['&quot;'] = '"',
    ['&apos;'] = "'",
    ['&#(%d+);'] = decimalToHtmlChar,
    ['&#x(%x+);'] = hexadecimalToHtmlChar
}

--- Instantiates a XmlParser object.
--  @param              {table} _handler Handler object to be used to
--                      convert the XML string to another formats. See
--                      the available handlers at @{xml.handlers}.
--  @param              _options Options for this XmlParser instance,
--                      defined in @{xml.parser}.
function XmlParser.new(_handler, _options)
    local obj = {
        handler = _handler,
        options = _options,
        _stack = {}
    }

    setmetatable(obj, XmlParser)
    obj.__index = XmlParser
    return obj
end

--- Main function which starts the XML parsing process
--  @param              {string} str the XML string to parse
--  @param[opt]         {boolean} parseAttributes indicates if tag
--                      attributes should be parsed or not.
--                      Default: `true`.
function XmlParser:parse(str, parseAttributes)
    if type(self) ~= 'table' or getmetatable(self) ~= XmlParser then
        error(i18n:msg('error-parser-method'))
    end

    if parseAttributes == nil then
        parseAttributes = true
    end

    self.handler.parseAttributes = parseAttributes

    -- Stores auxiliary parser variables such as string.find results.
    local f = {
        -- string.find return
        match = 0,
        endMatch = 0,
        -- text, end1, tagstr, end2,
        -- string.find parameters and auxiliar variables
        pos = 1
        -- startText, endText,
        -- errStart, errEnd, extStart, extEnd,
    }

    while f.match do
        if not getNextTag(self, str, f) then
            break
        end

        -- Handle leading text
        f.startText = f.match
        f.endText = f.match + string.len(f.text) - 1
        f.match = f.match + string.len(f.text)
        f.text = parseEntities(self, stripWS(self, f.text))
        if f.text ~= '' and fexists(self.handler, 'text') then
            self.handler:text(f.text, nil, f.match, f.endText)
        end

        parseTagType(self, str, f)
        f.pos = f.endMatch + 1
    end
end

--- Parses an XML string into an abstract syntax tree or event trace.
--  This function includes logic to attach a handler to the XML parser,
--  making it much more convenient than @{xml.parser}.
--  @function           p.parse
--  @param              {string} str XML string to be parsed.
--  @param              {string|table} handler Handler to use. Default:
--                      `"DOM"`. Accepts the following values:
--                       * @{xml.handlers.DOM|"DOM"} - DOM handler (typed).
--                       * @{xml.handlers.Tree|"Tree"} - tree handler.
--                       * @{xml.handlers.Print|"Print"} - parser logging.
--                       * Custom handler in the form of a Lua table.
--  @param[opt]         {table} parser_opts Parser configuration options.
--                      Defaults are listed in @{xml.parser} options.
--  @param[opt]         {table} handler_opts Handler configuration options.
--                      Defaults are listed in @{xml.handler} options.
--  @error[688]         'XML handler "$handler" not found'
--  @return             {table} Lua representation of XML root structure.
function p.parse(str, handler, parser_opts, handler_opts)
    handler = handler or 'DOM'
    parser_opts = type(parser_opts) == 'table' and parser_opts or {}
    handler_opts = type(handler_opts) == 'table' and handler_opts or {}

    if type(handler) ~= 'table' and not p.handlers[handler] then
        error(i18n:msg('error-handler-fetch', handler))
    end

    local handler_obj = type(handler) == 'table'
        and handler
        or  p.handlers[handler]:new()

    local parser = p.parser(handler_obj, parser_opts)

    parser:parse(str)
    return handler_obj.root
end

--- Converts a Lua XML DOM tree to a XML string representation.
--  @function           p.serialise
--  @param              {table} tbl DOM or tree root for XML conversion.
--                      This parameter is the root table generated by a
--                      @{xml.handlers.DOM} or @{xml.handlers.Tree}
--                      parser instance.
--  @param[opt]         {number} level Only used internally, when the
--                      function is called recursively to print
--                      indentation.
--  @error[739]         'cannot serialise this value. Are you using a
--                      handler other than "xml.handlers.DOM" and
--                      "xml.handlers.Tree"?'
--  @return             {string} XML string representation for table.
function p.serialise(tbl, level, name)
    if type(tbl) ~= 'table' then
        error(i18n:msg('error-serialise'))

    -- DOM table serialiser. Very stable and supports text nodes.
    elseif tbl._name and tbl._type then
        local name = tbl._name
        local level = level or 1
        local indent = string.rep(' ', level * 4)
        local ret = { '<' .. tbl._name .. serialiseAttr(tbl._attr) .. (#tbl._children ~= 0 and '>' or ' />') }
    
        for k, v in pairs(tbl._children or {}) do
            if k == 'n' then
                -- Do nothing.
            elseif v._type == XML_DOM_TYPE_ELEMENT then
                table.insert(
                    ret,
                    #v._children == 1 and v._children[1]._type == XML_DOM_TYPE_TEXT
                        and (indent .. '<' .. v._name .. serialiseAttr(v._attr) .. '>' .. v._children[1]._text .. '</' .. v._name .. '>')
                        or  indent .. p.serialise(v, level + 1)
                )
            elseif v._type == XML_DOM_TYPE_COMMENT then
                table.insert(ret, indent .. '<!-- ' .. v._text .. ' -->')
            elseif v._type == XML_DOM_TYPE_TEXT or v._type == XML_DOM_TYPE_CDATA then
                table.insert(ret, indent .. v._text)
            elseif v._type == XML_DOM_TYPE_PI then
                table.insert(ret, indent .. '<?' .. v._name .. serialiseAttr(v._attr) .. '?>')
            end
        end
    
        if #tbl._children ~= 0 then
            table.insert(ret, string.rep(' ', (level - 1) * 4) .. '</' .. tbl._name .. '>')
        end
    
        return table.concat(ret, '\n')

    -- Tree table serialiser. More versatile but rather unstable.
    else
        local level = level or -2
        local first_level = level
        local indent = string.rep(' ', level * 4)
        local ret = level == -2 and name and  { '<' .. name .. serialiseAttr(tbl._attr) .. '>' } or {}
        tbl._attr = nil

        for k, v in pairs(tbl) do
            if type(v) == 'table' then
                -- If the keys of the table are a number, it represents an array.
                if type(k) == 'number' then
                    local attrs = serialiseAttr(v._attr)
                    v._attr = nil
                    table.insert(ret, indent .. '<' .. name .. attrs .. '>\n' .. p.serialise(v, level + 1, name) .. '\n' .. indent .. '</' .. name .. '>') 

                -- If not, the children tags are all single nodes of different types.
                else
                    level = level + 1
                    if type(getFirstKey(v)) == 'number' then 
                       table.insert(ret, indent .. p.serialise(v, level, k))
                    else
                       local attrs = serialiseAttr(v._attr)
                       v._attr = nil
                       table.insert(ret, indent .. '<' .. k .. attrs .. '>\n' .. p.serialise(v, level + 1, k) .. '\n' .. indent .. '</' .. k .. '>')
                    end
                end
            else
                table.insert(ret, indent .. '<' .. k .. '>' .. tostring(v) .. '</' .. k .. '>')
            end
        end

        if name and first_level == -2 then
            table.insert(ret, '</' .. name .. '>\n')
        end

        return table.concat(ret, '\n')
    end
end

--- Loads an XML file from a specified path.
--  If the file is in the Module namespace, the loader assumes the page
--  is a Lua module returning a string. Otherwise, the loader will fetch
--  the page's raw text, removing any leading non-XML comment/shebang.
--  @function           p.load
--  @param              {string} filepath XML file target path (including
--                      namespace).
--  @error[784]         'file "$filepath" does not contain XML'
--                       * The page `filepath` does not exist.
--                       * The module `filepath` does not exist or does
--                      not export a string.
--  @return             {string} The contents of the XML file.
function p.load(filepath)
    local title = mw.title.new(filepath)
    local status = true
    local content
    if title.namespace == 828 or filepath:find('^Dev:') then
        status, content = pcall(require, filepath)
    else
        content = title:getContent()
        content = content
            :gsub('^%s*#![^\n]*\n', '')  -- shebang
            :gsub('^%s*//[^\n]*\n', '')  -- inline non-HTML comment
            :gsub('^%s*/%*[^/]*/\n', '') -- multiline non-HTML comment
    end
    status = status and type(content) == 'string'
    if status then
        return mw.text.trim(content)
    end
    error(i18n:msg('error-file-load', filepath or ''))
end

--- Instantiates a @{XmlParser} object to parse a XML string.
--  @function           p.parser
--  @param              {table} handler Handler object to be used to
--                      convert the XML string to another format,
--                      usually from @{xml.handlers}.
--  @param[opt]         {table} options Options for parsing XML.
--  @param[opt]         {table} options.stripWS
--                      Strip non-significant whitespace (leading or
--                      trailing) and do not generate events for empty
--                      text elements. Default: `true`.
--  @param[opt]         {table} options.stripWS
--  @param[opt]         {table} options.expandEntities 
--                      Expand entities (standard entities and single
--                      character  numeric entities only currently -
--                      could be extended at runtime if a suitable DTD
--                      parser added elements to the table (see
--                      `XMLParser._ENTITIES`). May also be possible to
--                      expand multibyre entities for UTF-8 only.
--                      Default: `true`.
--  @param[opt]         {table} options.errorHandler
--                      Custom error handler function.
--  @return             An XML parser instance used to parse the XML.
function p.parser(handler, options)
    if handler == xml then
        error(i18n:msg('error-parser-call'))
    end

    options = options or {}
    options.stripWS = type(options.stripWS) == 'nil'
        and true
        or  yesno(options.stripWS, false)
    options.expandEntities = type(options.expandEntities) == 'nil'
        and true
        or  yesno(options.expandEntities, false)
    options.errorHandler = type(options.errorHandler) == 'function'
        and options.errorHandler
        or  defaultErrorHandler

    return XmlParser.new(handler, options)
end

--- Handler object, used to generate parser output.
--  @type               Handler

--- Instantiates a new handler object.
--  Each instance can handle a single XML string.
--  By using such a constructor, you can parse multiple XML files in
--  the same application.
--  @function           Handler:new
--  @param[opt]         {table} options Handler configuration options.
--  @return             {Hander} Handler object instance.
--  @note               This method is not available in
--                      @{xml.handlers.Print}.

--- Parses a start tag.
--  @function           Handler:starttag
--  @param              {table}  tag A table describing the opening tag
--                      and its attribute nodes.
--  @param              {string} tag[1] The name of the tag.
--  @param              {table}  tag[2] The atribute nodes of the tag.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- Parses an end tag.
--  @function           Handler:endtag
--  @param              {table}  tag A table describing the closing tag
--                      and its attribute nodes.
--  @param              {string} tag[1] The name of the tag.
--  @param              {table}  tag[2] The atribute nodes of the tag.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- Parses the text content of a tag.
--  @function           Handler:text
--  @param              {string} text Text content to process.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- Parses a comment tag.
--  @function           Handler:comment
--  @param              {string} text Comment text to process.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- Parses a XML processing instruction (PI) tag
--  @function           Handler:pi
--  @param              {table}  tag A table describing the opening tag
--                      and its attribute nodes.
--  @param              {string} tag[1] The name of the tag.
--  @param              {table}  tag[2] The atribute nodes of the tag.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- Parse the XML declaration line (indicating the XML version).
--  @function           Handler:decl
--  @param              {table}  tag A table describing the opening tag
--                      and its attribute nodes.
--  @param              {string} tag[1] The name of the tag.
--  @param              {table}  tag[2] The atribute nodes of the tag.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- Parses a DTD tag.
--  @function           Handler:dtd
--  @param              {table}  tag A table describing the opening tag
--                      and its attribute nodes.
--  @param              {string} tag[1] The name of the tag.
--  @param              {table}  tag[2] The atribute nodes of the tag.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- Parses a CDATA section.
--  @function           Handler:cdata
--  @param              {string} text Text content to process.
--  @param[opt]         {number} s Start index of match.
--  @param[opt]         {number} e End index of match.

--- XML handlers for conversion logic in the @{xml.parser|XML parser}.
--  @table              p.handlers
p.handlers = {}

--- @{Handler} to generate a DOM-like node tree structure.
--  The tree structure has a single ROOT node parent, and is capable of
--  representing any valid XML document.
--  Each node is a table comprising the fields below:
--   * `_name` - element name (string)
--   * `_type` - any of `'ROOT'`, `'ELEMENT'`, `'TEXT'`,
--  `'COMMENT'`, `'PI'`, `'DECL'`, `'DTD'` (string)
--     ** `PI` - XML Processing Instruction tag.
--     ** `DECL` - XML declaration tag
--   * `_attr` - node attributes - see callback API (table)
--   * `_parent` - parent node (table)
--   * `_children` - child nodes (table)
--  @table              p.handlers.DOM
p.handlers.DOM = initDOMHandler()
p.handlers.DOM.__index = p.handlers.DOM

function p.handlers.DOM:starttag(tag)
    local node = {}
    node._type = XML_DOM_TYPE_ELEMENT
    node._name = tag.name
    node._attr = tag.attrs
    node._children = { n = 0 }

    if self.root == nil then
        self.root = node
    end

    table.insert(self._stack, node)

    self.current = self.current or self._stack[#self._stack]
    table.insert(self.current._children, node)
    self.current = node
end

function p.handlers.DOM:endtag(tag, s)
    -- Container tag node for current tag.
    local prev = self._stack[#self._stack]

    if tag.name ~= prev._name then
        error(i18n:msg('error-parsing-unmatched', s .. ':' .. tag.name))
    end

    table.remove(self._stack)
    self.current = self._stack[#self._stack]
end

function p.handlers.DOM:text(text)
    local node = {}
    node._type = XML_DOM_TYPE_TEXT
    node._text = text
    table.insert(self.current._children, node)
end

function p.handlers.DOM:comment(text)
    if not self.options.commentNode then
        return
    end
    local node = {}
    node._type = XML_DOM_TYPE_COMMENT
    node._text = text
    table.insert(self.current._children, node)
end

function p.handlers.DOM:pi(tag)
    if not self.options.piNode then
        return
    end
    local node = {}
    node._type = XML_DOM_TYPE_PI
    node._name = tag.name
    node._attr = tag.attrs
    table.insert(self.current._children, node)
end

function p.handlers.DOM:decl(tag)
    if not self.options.declNode then
        return
    end
    local node = {}
    node._type = XML_DOM_TYPE_DECL
    node._name = tag.name
    node._attr = tag.attrs
    table.insert(self.current._children, node)
end

function p.handlers.DOM:dtd(tag)
    if not self.options.dtdNode then
        return
    end
    local node = {}
    node._type = XML_DOM_TYPE_DTD
    node._name = tag.name
    node._attr = tag.attrs
    table.insert(self.current._children, node)
end

function p.handlers.DOM:cdata(section)
    local node = {}
    node._type = XML_DOM_TYPE_TEXT
    node._text = '<![CDATA[' .. section .. ']]>'
    table.insert(self.current._children, node)
end

--- Instantiates a new DOM handler.
--  @function           p.handlers.DOM:new
--  @param              {table} options Handler options for parsing.
--  @param[opt]         {boolean} options.commentNode
--                      Whether to include comment nodes. Default: `true`.
--  @param[opt]         {boolean} options.piNode
--                      Whether to include processing instruction nodes.
--                      Default: `true`.
--  @param[opt]         {boolean} options.dtdNode
--                      Whether to include DTD declaration nodes. Default:
--                      `true`.
--  @param[opt]         {boolean} options.declNode
--                      Whether to include XML declaration nodes. Default:
--                      `true`.
--  @constructor
function p.handlers.DOM:new(options)
    local obj = initDOMHandler(options)

    obj.__index = self
    setmetatable(obj, self)

    return obj
end

--- @{Handler} to generate a natural table-based tree.
--  
--  This handler supports many XML formats. The XML structure tree is
--  mapped into a recursive map of node names to child elements (as a
--  string representing text, or a table of values).
--  
--  Where there is only a single child element this is inserted as a
--  named key. If there are multiple elements, these are inserted as
--  an array element (in some cases it may be preferable to always
--  insert elements as an array elment which can be specified on a
--  per element basis in the options). Attributes are inserted as a
--  child element with a key of `'_attr'`.
--  
--  In general, this format is relatively useful, despite the following
--  limitations:
--   * Tag/text & CDATA elements are processed - all others are
--  ignored.
--   * `Mixed-Content` XML behaves unpredictably.
--   * If a leaf element has both a text element and attributes, the
--  text must be accessed through an array element (to provide a
--  container for the attribute).
--  @table              p.handlers.Tree
p.handlers.Tree = initTreeHandler()
p.handlers.Tree.__index = p.handlers.Tree

function p.handlers.Tree:reduce(node, key, parent)
    for k,v in pairs(node) do
        if type(v) == 'table' then
            self:reduce(v, k, node)
        end
    end
    if #node == 1 and not self.options.noreduce[key] and node._attr == nil then
        parent[key] = node[1]
    else
        node.n = nil
    end
end

function p.handlers.Tree:starttag(tag)
    local node = {}
    if self.parseAttributes == true then
        node._attr = tag.attrs
    end

    --Table in the stack representing the tag being processed
    local current = self._stack[#self._stack]
    
    if current[tag.name] then
        table.insert(current[tag.name], node)
    else
        current[tag.name] = {node; n = 1}
    end

    table.insert(self._stack, node)
end

function p.handlers.Tree:endtag(tag, s)
    -- Currently processed tag node in stack.
    local current = self._stack[#self._stack]
    -- Container for processed tag node in stack.
    local prev = self._stack[#self._stack-1]
    if not prev[tag.name] then
        error(i18n:msg('error-parsing-unmatched', s .. ':' .. tag.name))
    end
    if prev == self.root then
        -- Once parsing is complete, recursively reduce tree.
        self:reduce(prev, nil, nil)
    end

    local first_key = getFirstKey(current)
    table.remove(self._stack)
end

function p.handlers.Tree:text(text)
    local current = self._stack[#self._stack]
    table.insert(current, text)
end

function p.handlers.Tree:cdata(section)
    local current = self._stack[#self._stack]
    table.insert(current, '<![CDATA[' .. section .. ']]>')
end

--- Instantiates a new tree handler.
--  @function           p.handlers.Tree:new
--  @param              {table} options Handler options for parsing.
--  @param[opt]         {table} options.noreduce Boolean map of tag
--                      names that node children elements will not be
--                      reduced for even if there is only one child.
--  @return             {Handler} Tree handler instance.
--  @constructor
function p.handlers.Tree:new(options)
    local obj = initTreeHandler(options)

    obj.__index = self
    setmetatable(obj, self)

    return obj
end

--- @{Handler} to generate simple event tracing during parsing.
--  Outputs messages to the Scribunto console during the parse
--  process, usually for debugging purposes.
--  @table              p.handlers.Print
p.handlers.Print = initPrintHandler()
p.handlers.Print.__index = p.handlers.Print

function p.handlers.Print:log(message)
    if self.root == nil then
        self.root = message
    else
        self.root = self.root .. message
    end
    mw.log(message)
    self.root = self.root .. '\n'
end

function p.handlers.Print:starttag(message)
    local message = 'Start    : ' .. tag.name
    if tag.attrs then
        for k, v in pairs(tag.attrs) do 
            message = message .. '\n' .. string.format(' + %s="%s"', k, v)
        end 
    end
    self:log(message)
end

function p.handlers.Print:starttag(tag, s, e)
    local message = 'Start    : ' .. tag.name
    if tag.attrs then
        for k, v in pairs(tag.attrs) do 
            message = message .. '\n' .. string.format(' + %s="%s"', k, v)
        end 
    end
    self:log(message)
end

function p.handlers.Print:endtag(tag, s, e)
    self:log('End      : ' .. tag.name)
end

function p.handlers.Print:text(text, s, e)
    self:log('Text     : ' .. text)
end

function p.handlers.Print:cdata(text, s, e)
    self:log('CDATA    : ' .. text)
end

function p.handlers.Print:comment(text, s, e)
    self:log('Comment  : ' .. text)
end

function p.handlers.Print:dtd(tag, s, e)     
    local message = 'DTD      : ' .. tag.name
    if tag.attrs then
        for k, v in pairs(tag.attrs) do 
            message = message .. '\n' .. string.format(' + %s="%s"', k, v)
        end 
    end
    self:log(message)
end

function p.handlers.Print:pi(tag, s, e) 
    local message = 'PI       : ' .. tag.name
    if tag.attrs then
        for k, v in pairs(tag.attrs) do 
            message = message .. '\n' .. string.format(' + %s="%s"', k, v)
        end 
    end
    self:log(message)
end

function p.handlers.Print:decl(tag, s, e) 
    local message = 'XML Decl : '..tag.name
    if tag.attrs then
        for k, v in pairs(tag.attrs) do 
            message = message .. '\n' .. string.format(' + %s="%s"', k, v)
        end 
    end
    self:log(message)
end

--- Instantiates a new Print handler.
--  @function           p.handlers.Print:new
--  @param              {table} options Handler options for parsing.
--  @param[opt]         {boolean} options.commentNode
--                      Whether to include comment nodes. Default: `true`.
--  @param[opt]         {boolean} options.piNode
--                      Whether to include processing instruction nodes.
--                      Default: `true`.
--  @param[opt]         {boolean} options.dtdNode
--                      Whether to include DTD declaration nodes. Default:
--                      `true`.
--  @param[opt]         {boolean} options.declNode
--                      Whether to include XML declaration nodes. Default:
--                      `true`.
--  @constructor
function p.handlers.Print:new(options)
    local obj = initPrintHandler(options)

    obj.__index = self
    setmetatable(obj, self)

    return obj
end

return p