xmlutils

package module

v0.0.0-...-00276e6 Latest Latest Go to latest Published: Feb 2, 2025 License: MIT Imports: 13 Imported by: 9

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/fbaube/xmlutils

README ¶

`package github.com/fbaube/xmlutils`

Low-level XML data structures and code for processing XML mixed content.

Documentation ¶

Overview ¶

Package xmlutils is mostly content analysis for XML files, including both with and without DOCTYPE. .

Index ¶

Variables
func DoParseRaw_xml(s string) (xtokens []CT.CToken, err error)
func DoParse_xml(s string) (xtokens []CT.CToken, err error)
func DoParse_xml_locationAware(s string) (xtokens []CT.LAToken, err error)
func NewConfiguredDecoder(r io.Reader) *xml.Decoder
type CommonCPR
- func NewCommonCPR() *CommonCPR
- func (p *CommonCPR) AsString(i int) string
type ContentityBasics
- func (p *ContentityBasics) CheckTopTags() (bool, string)
- func (p *ContentityBasics) HasNone() bool
- func (p *ContentityBasics) SetToNonXml(L int)
type ContypingInfo
- func (p ContypingInfo) MultilineString() (s string)
- func (pC *ContypingInfo) ParseDoctype(sRaw CT.Raw) (*ParsedDoctype, error)
- func (p ContypingInfo) String() (s string)
type DitaContype
type DitaFlavor
type DoctypeMType
type KeyElmTriplet
- func GetKeyElmTriplet(localName string) *KeyElmTriplet
type MType
type NS
type NSsnapshot
type PIDFPIfields
- func (p PIDFPIfields) String() string
type PIDSIDcatalogFileRecord
- func NewPIDSIDcatalogFileRecord(pid string, sid string) (*PIDSIDcatalogFileRecord, error)
- func NewSIDPIDcatalogRecordfromStartTag(ct CT.CToken) (pID *PIDSIDcatalogFileRecord, err error)
- func (p PIDSIDcatalogFileRecord) DString() string
- func (p PIDSIDcatalogFileRecord) Echo() string
- func (p *PIDSIDcatalogFileRecord) HasPID() bool
- func (p *PIDSIDcatalogFileRecord) HasSID() bool
- func (p PIDSIDcatalogFileRecord) String() string
type ParsedDoctype
type ParsedPreamble
- func ParsePreamble(sRaw CT.Raw) (*ParsedPreamble, error)
type ParserResults_xml
- func GenerateParserResults_xml(s string) (*ParserResults_xml, error)
- func (p *ParserResults_xml) NodeCount() int
- func (p *ParserResults_xml) NodeDebug(i int) string
- func (p *ParserResults_xml) NodeEcho(i int) string
- func (p *ParserResults_xml) NodeInfo(i int) string
type SliceBounds
type XmlCatalogFile
- func NewXmlCatalogFile(fpath string) (pXC *XmlCatalogFile, err error)
- func (p *XmlCatalogFile) GetByPublicID(s string) *PIDSIDcatalogFileRecord
- func (p *XmlCatalogFile) Validate() (retval bool)
type XmlContype
type XmlDoctype
type XmlPeek
- func Peek_xml(content string) (*XmlPeek, error)
type XmlPublicID
type XmlSystemID

Constants ¶

This section is empty.

Variables ¶

View Source

var DITArootElms = []string{
	"topic", "concept", "reference", "task", "bookmap",
	"map", "glossentry", "glossgroup"}

DITArootElms are all the XML root elements that can be classified as DITA-type. Note that LwDITA uses only "topic".

View Source

var DITAtypeFileExtensions = []string{".dita", ".ditamap", ".ditaval"}

DITAtypeFileExtensions are all the file extensions that are automatically classified as being DITA-type.

View Source

var DTDtypeFileExtensions = []string{".dtd", ".mod", ".ent"}

DTDtypeFileExtensions are all the file extensions that are automatically classified as being DTD-type.

View Source

var DTMTmap = []DoctypeMType{

	{"html", "html/cnt/html5", "html", false, true},

	{"//DTD LIGHTWEIGHT DITA Topic//", "xml/cnt/topic", "topic", true, true},
	{"//DTD LW DITA Topic//", "xml/cnt/topic", "topic", true, true},
	{"//DTD XDITA Topic//", "html/cnt/topic", "topic", true, true},

	{"//DTD LIGHTWEIGHT DITA Map//", "xml/map/---", "map", true, true},
	{"//DTD LW DITA Map//", "xml/map/---", "map", true, true},
	{"//DTD XDITA Map//", "html/map/---", "map", true, true},

	{"//DTD DITA Concept//", "xml/cnt/concept", "concept", false, false},
	{"//DTD DITA Topic//", "xml/cnt/topic", "topic", false, false},
	{"//DTD DITA Task//", "xml/cnt/task", "task", false, false},

	{"//DTD HTML 4.", "html/cnt/html4", "html", false, false},
	{"//DTD XHTML 1.0 ", "html/cnt/xhtml1.0", "html", false, false},
	{"//DTD XHTML 1.1//", "html/cnt/xhtml1.1", "html", false, false},
	{"//DTD MathML 2.0//", "html/cnt/mathml", "", false, false},
	{"//DTD SVG 1.0//", "xml/img/svg1.0", "svg", false, false},
	{"//DTD SVG 1.1", "xml/img/svg", "svg", false, false},
	{"//DTD XHTML Basic 1.1//", "html/cnt/topic", "html", false, false},
	{"//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//", "html/cnt/blarg",
		"html", false, false},
}

DTMTmap maps DOCTYPEs to MTypes (and: Is it LwDITA ?). This list should suffice for all ordinary XML files (except of course Docbook).

View Source

var DitaContypes = []DitaContype{"Map", "Bookmap", "Topic", "Task", "Concept",
	"Reference", "Dita", "Glossary", "Conrefs", "LwMap", "LwTopic"}

DitaContypes - see DitaContype.

View Source

var DitaFlavors = []DitaFlavor{"1.2", "1.3", "XDITA", "HDITA", "MDATA"}

DitaFlavors - see DitaFlavor.

View Source

var HtmlKeyContentElms = []string{"main", "content"}

HtmlKeyContentElms is elements that often surround the actual page content.

View Source

var HtmlSectioningContentElms = []string{"article", "aside", "nav", "section"}

HtmlSectioningContentElms have internal sections and subsections.

View Source

var HtmlSectioningRootElms = []string{
	"blockquote", "body", "details", "dialog", "fieldset", "figure", "td"}

HtmlSectioningRootElms have their OWN outlines, separate from the outlines of their ancestors, i.e. self-contained hierarchies.

View Source

var HtmlSelfClosingTags = []string{
	"area", "base", "br", "col", "command", "embed", "hr", "img", "input",
	"keygen", "link", "meta", "param", "source", "track", "wbr",
}

HtmlSelfClosingTags is tags that do not need closing tags.

View Source

var KeyElmTriplets = []*KeyElmTriplet{

	{"html", "head", "body"},
	{"topic", "prolog", "body"},
	{"map", "topicmeta", ""},
	{"reference", "", ""},
	{"task", "", ""},
	{"bookmap", "", ""},
	{"glossentry", "", ""},
	{"glossgroup", "", ""},
	{"meta", "", ""},
}

KeyElmTriplets are for HTML5, LwDITA, and DITA.

View Source

var MarkdownFileExtensions = []string{".md", ".mdown", ".markdown", ".mkdn"}

MarkdownFileExtensions are all the file extensions that are automatically classified as being Markdown-type, even tho we generally use a regex instead.

View Source

var MiscFileExtensions = []string{".sqlar"}

MiscFileExtensions are all the other file extensions that we want to process.

View Source

var NS_OASIS_XML_CATALOG = "urn:oasis:names:tc:entity:xmlns:xml:catalog:"

NS_OASIS_XML_CATALOG is the OASIS namespace (as URN) for XML catalogs.

View Source

var NS_XML = "http://www.w3.org/XML/1998/namespace"

NS_XML is the XML namespace.

View Source

var STD_PREAMBLE CT.Raw = xml.Header

STD_PREAMBLE is "<?xml version="1.0" encoding="UTF-8"?>" + "\n"

View Source

var XML_NS_Recognized = []string{
	"lang", "space", "base", "id", "Father"}

View Source

var XmlContypes = []XmlContype{"Unknown", "DTD", "DTDmod", "DTDent",
	"RootTagData", "RootTagMixedContent", "MultipleRootTags", "INVALID"}

XmlContypes categorise an XML file by structure and content. NOTE: Maybe DTDmod should be DTDelms.

Functions ¶

func DoParse_xml ¶

func DoParse_xml(s string) (xtokens []CT.CToken, err error)

DoParse_xml takes a string, so we can assume that we can discard it after use cos the caller has another copy of it. To be safe, it copies every token using `xml.CopyToken(T)`.

func DoParse_xml_locationAware ¶

func DoParse_xml_locationAware(s string) (xtokens []CT.LAToken, err error)

DoParse_xml_locationAware is TBS.

func NewConfiguredDecoder ¶

func NewConfiguredDecoder(r io.Reader) *xml.Decoder

NewConfiguredDecoder returns a new xml.Decoder that has been confgured with non-strict namespace parsing, HTML auto-closing tags, and HTML entities.

Types ¶

type CommonCPR ¶

type CommonCPR struct {
	NodeDepths []int
	FilePosns  []*CT.FilePosition
	CPR_raw    string
	// Writer is usually the GTokens Writer
	io.Writer
}

CommonCPR is Concrete Parse Results common to all formats processed.

func (*CommonCPR) AsString ¶

func (p *CommonCPR) AsString(i int) string

AsString TODO should probably be renamed to String.

type ContentityBasics ¶

type ContentityBasics struct {
	// XmlRoot is not meaningful for non-XML
	XmlRoot CT.Span
	Text    CT.Span
	Meta    CT.Span
	// MetaFormat is "YAML" or "XML"
	MetaFormat string
	// MetaProps uses dot separators if hierarchy is needed
	MetaProps SU.PropSet
}

ContentityBasics describes the top-level structure of a Contentity (content entity). It has XmlRoot, Text, Meta, MetaProps, and is embedded in struct XmlPeek. .

func (*ContentityBasics) CheckTopTags ¶

func (p *ContentityBasics) CheckTopTags() (bool, string)

HasRootTag returns true is a root element was found, plus a message about any missing top-level constructs, and it can return warnings. .

func (*ContentityBasics) HasNone ¶

func (p *ContentityBasics) HasNone() bool

HasNone returns false if no expected top-level structure is found.

func (*ContentityBasics) SetToNonXml ¶

func (p *ContentityBasics) SetToNonXml(L int)

SetToNonXml just needs the length of the content, and sets no useful information about deeper structure.

type ContypingInfo ¶

type ContypingInfo struct {
	FileExt         string
	MimeType        string
	MimeTypeAsSnift string
	MType           string
}

ContypingInfo has simple fields related to typing content (i.e. determining its type). .

func (ContypingInfo) MultilineString ¶

func (p ContypingInfo) MultilineString() (s string)

MultilineString should probably be renamed to Debug, in implementing [Stringser].

func (*ContypingInfo) ParseDoctype ¶

func (pC *ContypingInfo) ParseDoctype(sRaw CT.Raw) (*ParsedDoctype, error)

ParseDoctype should probably NOT be a method on ContypingInfo.

It expects to receive (a file extension) plus (a content type as determined by the HTTP stdlib. However a DOCTYPE is always considered authoritative, so this func can ignore things like the file extension, and overwrite or set any field it wants to.

It works by first trying to match the DOCTYPE against a list. If that fails, stronger measures are called for.

Note two things about this function:

Firstly, it can handle PID, SID, or both:
<!DOCTYPE topic PUBLIC "-//OASIS//DTD LWDITA Topic//EN"> <!DOCTYPE topic PUBLIC "-//OASIS//DTD LWDITA Topic//EN" "./foo.dtd"> <!DOCTYPE topic SYSTEM "./foo.dtd">
Secondly, it can handle a less-than-complete declaration:
DOCTYPE topic PUBLIC "-//OASIS//DTD LWDITA Topic//EN" (and variations) topic PUBLIC "-//OASIS//DTD LWDITA Topic//EN" (and variations) PUBLIC "-//OASIS//DTD LWDITA Topic//EN" (and variations)

The last one is quite important because it is the format that appears in XML catalog files. .

func (ContypingInfo) String ¶

func (p ContypingInfo) String() (s string)

type DitaContype ¶

type DitaContype string

DitaContype is a [Lw]DITA Topic, Map, etc. See DitaContypes.

type DitaFlavor ¶

type DitaFlavor string

DitaFlavor is a [Lw]DITA flavor. See DitaFlavors.

type DoctypeMType ¶

type DoctypeMType struct {
	ToMatch       string
	DoctypesMType string
	RootElm       string
	IsLwDITA      bool
	// LwDITA, HTML5, and not much more (if any)
	IsInScope bool
}

DoctypeMType maps a DOCTYPE string to an MType string and a bool, ? Is it LwDITA ?

type KeyElmTriplet ¶

type KeyElmTriplet struct {
	Name string
	Meta string
	Text string
}

KeyElmTriplet is a set of tags that appear together in a well-known content format.

func GetKeyElmTriplet ¶

func GetKeyElmTriplet(localName string) *KeyElmTriplet

GetKeyElmTriplet uses an XML localName to retrieve the other elements that are expected.

type MType ¶

type MType string

An MType is specific to this app and/but is modeled after the prior concept of Mime-type. An MType has three fields.

Its value is generally based on two to four inputs:

The Mime-type guess returned by Go stdlib func net/http.DetectContentType(data []byte) string (which is based on https://mimesniff.spec.whatwg.org/ ) (The no-op default return value is "application/octet-stream")
Our own shallow analysis of file contents
The file extension (it is normally present)
The DOCTYPE (iff XML, incl. HTML)

Note that

a plain text file MAY be presumed to be Markdown, altho it is not clear (yet) which (TXT or MKDN) should take precedence.
a Markdown file CAN and WILL be presumed to be LwDITA MDITA; this may cause conflicts/problems for other dialects.
mappings can appear bogus, for example HTTP stdlib "text/html" might become MType "xml/html".

String possibilities (but in LOWER CASE!) in each field:

[0] XML, HTML, BIN, TXT, MKDN, (new!) DIRLIKE (i.e. non-contentful)
We might (or not) keep XML and HTML distinct for a number of reasons, but partly because in the Go stdlib, they are processed quite differently, and we take advantage of it to keep HTML pro- cessing free of nasty surprises and unhelpful strictness
We might (or might not) keep MKDN distinct from TXT
[1] CNT (Content), MAP (ToC), IMG, SCH(ema) [and maybe others TBD?]
[2] Depends on [0]: XML: per-DTD [and/or Pub-ID/filext]; HTML: per-DTD [and/or Pub-ID/filext]; BIN: format/filext; SCH: format/filext [DTD,MOD,XSD,wotevs]; TXT: TBD MKDN: flavor of Markdown (?) (note also AsciiDoc, RST, ...) DIRLIKE: dir, symlink, pipe, socket, ...?

Possible FIXME: Let [2] (3rd) be version info (html5, lwdiat, dita13) and then keep root tag info separately.

Possible FIXME: Append version info, probably after a semicolon. .

type NS ¶

type NS struct {
	// Prefix is the shorthand version.
	Prefix string
	// URI is the full version.
	URI string
}

NS is e.g. { "xml", "http://www.w3.org/XML/1998/namespace" }

type NSsnapshot ¶

type NSsnapshot struct {
	Default NS
	Others  []NS
}

One of these has to be filled in for the NS declarations at the top of a content file. Also this can describe the NS state at any point in parsing or traversing a content tree.

type PIDFPIfields ¶

type PIDFPIfields struct {
	// Registration is "+" or "-"
	Registration string
	// IsOasis but if not, then could be any of many others
	IsOasis bool
	// Organization is "OASIS" or maybe something else
	Organization string
	// PublicTextClass is typically "DTD" (filename.dtd)
	// or "ELEMENTS" (filename.mod)
	PublicTextClass string
	// PublicTextDesc is the distinguishing string,
	// e.g. PUBLIC "-//OASIS//DTD (_PublicTextDesc_)//EN".
	// It can end with the root tag of the document
	// (e.g. "Topic"). It can have an optional
	// embedded version number, such as "DITA 1.3".
	PublicTextDesc string
}

PIDFPIfields holds the parsed results of a PID (PublicID) a.k.a. Formal Public Identifier, for example "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN"

func (PIDFPIfields) String ¶

func (p PIDFPIfields) String() string

type PIDSIDcatalogFileRecord ¶

type PIDSIDcatalogFileRecord struct {
	// XMLName probably does not ever need to be printed.
	XMLName xml.Name `xml:"public"`
	// XmlPublicID (PID) (FPI) is the DOCTYPE string
	XmlPublicID  `xml:"publicId,attr"`
	PIDFPIfields // PublicID
	// XmlSystemID is the path to the file. Tipicly a relative filepath.
	XmlSystemID `xml:"uri,attr"`
	// The filepath long form, as resolved.
	// Note that we must use a string in order to avoid an import cycle.
	AbsFilePath string // FU.AbsFilePath
	HttpPath    string
	Err         error // in case an entry barfs

}

PIDSIDcatalogFileRecord representa a line item from a parsed XML catalog file. One with a simple structure, such as the catalog file for LwDITA. This same struct is also used to record the PID and/or SID of a DOCTYPE declaration.

func NewPIDSIDcatalogFileRecord ¶

func NewPIDSIDcatalogFileRecord(pid string, sid string) (*PIDSIDcatalogFileRecord, error)

NewPIDSIDcatalogFileRecord is pretty self-explanatory.

func NewSIDPIDcatalogRecordfromStartTag ¶

func NewSIDPIDcatalogRecordfromStartTag(ct CT.CToken) (pID *PIDSIDcatalogFileRecord, err error)

NewSIDPIDcatalogRecordfromStartTag is TBS.

func (PIDSIDcatalogFileRecord) DString ¶

func (p PIDSIDcatalogFileRecord) DString() string

DString returns a comprehensive dump.

func (PIDSIDcatalogFileRecord) Echo ¶

func (p PIDSIDcatalogFileRecord) Echo() string

Echo returns the public ID _unquoted_. <!DOCTYPE topic "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN">

func (*PIDSIDcatalogFileRecord) HasPID ¶

func (p *PIDSIDcatalogFileRecord) HasPID() bool

func (*PIDSIDcatalogFileRecord) HasSID ¶

func (p *PIDSIDcatalogFileRecord) HasSID() bool

func (PIDSIDcatalogFileRecord) String ¶

func (p PIDSIDcatalogFileRecord) String() string

String returns the juicy part. For example, <!DOCTYPE topic "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN"> maps to "DTD LIGHTWEIGHT DITA Topic".

type ParsedDoctype ¶

type ParsedDoctype struct {
	// Raw is the raw Doctype string
	CT.Raw
	// PIDSIDcatalogFileRecord is the PID + SID.
	PIDSIDcatalogFileRecord
	// DTrootElm is the tag declared in the DOCTYPE, which
	// should match the root tag in the text of the file.
	DTrootElm string
	// contains filtered or unexported fields
}

ParsedDoctype is a parse of a complete DOCTYPE declaration. For [Lw]DITA, what interests us is something like

PUBLIC "-//OASIS//DTD (PublicTextDesc)//EN" or sometimes
PUBLIC "-//OASIS//ELEMENTS (PublicTextDesc)//EN" and
maybe followed by SYSTEM...

The structure of a DOCTYPE is like so:

PUBLIC | SYSTEM = Availability
- = Registration = Organization & DTD are not registeredd with ISO.
OASIS = Organization
DTD = Public Text Class (CAPACITY | CHARSET | DOCUMENT | DTD | ELEMENTS | ENTITIES | LPD | NONSGML | NOTATION | SHORTREF | SUBDOC | SYNTAX | TEXT )
(*) = Public Text Description, incl. any version number
EN = Public Text Language
URL = optional, explicit

We don't include the raw DOCTYPE here because this structure can be optional but we still need to have the Doctype string in the DB as a separate column, even if it is empty (i.e. "").

type ParsedPreamble ¶

type ParsedPreamble struct {
	// Preamble_raw does not include a trailing newline
	Preamble_raw string
	// MinorVersion "0" means XML 1.0
	MinorVersion string
	// Encoding has Valid values and forms TBS
	Encoding string
	// IsStandalone has value "yes" or "no"
	IsStandalone bool
}

ParsedPreamble is a parse of an optional PI (processing instruction) at the start of an XML file. The most typical form is defined in the stdlib:

"<?xml version="1.0" encoding="UTF-8"?>" + "\n"

Here the major version MUST be 1. XML has a version 1.1 but nobody uses it, so also the minor version MUST be 0, because that's what the Go stdlib XML parser understands, and anything else is gonna cause crazy breakage. Fields:

<?xml version="version_number"         <= required, "1.0"
     encoding="encoding_declaration"   <= optional, assume "UTF-8"
   standalone="standalone_status" ?>   <= opt'l, can be "yes", dflt "no"

Probably any errors returned by this function should be panicked on, because any such error is pretty fundamental and also ridiculous. Note also that strictly speaking, an XML preamble is NOT a PI.

var STD_PreambleParsed ParsedPreamble

STD_PreambleFields is our parse of variable "STD_PREAMBLE".

func ParsePreamble ¶

func ParsePreamble(sRaw CT.Raw) (*ParsedPreamble, error)

ParsePreamble parses an XML preamble, which (BTW) MUST be the first line in a file. XML version MUST be "1.0". Encoding handling is incomplete.

Example: <?xml version="1.0" encoding='UTF-8' standalone="yes"?>
Also OK: xml version="1.0" encoding='UTF-8' standalone="yes"
Also OK: version="1.0" encoding='UTF-8' standalone="yes"
Also OK: fields as documented for struct "XmlPreambleFields".

type ParserResults_xml ¶

type ParserResults_xml struct {
	// NodeSlice is driven by the stdlib XML parser in [encoding/xml].
	NodeSlice []CT.CToken // []xml.Token
	CommonCPR
}

ParserResults_xml is TBS.

func GenerateParserResults_xml ¶

func GenerateParserResults_xml(s string) (*ParserResults_xml, error)

GenerateParserResults_xml is TBS.

func (*ParserResults_xml) NodeCount ¶

func (p *ParserResults_xml) NodeCount() int

func (*ParserResults_xml) NodeDebug ¶

func (p *ParserResults_xml) NodeDebug(i int) string

func (*ParserResults_xml) NodeEcho ¶

func (p *ParserResults_xml) NodeEcho(i int) string

func (*ParserResults_xml) NodeInfo ¶

func (p *ParserResults_xml) NodeInfo(i int) string

type SliceBounds ¶

type SliceBounds struct {
	BegIdx, EndIdx int
}

SliceBounds specifies a subslice.

type XmlCatalogFile ¶

type XmlCatalogFile struct {
	XMLName xml.Name `xml:"catalog"`
	// Prefer is "public" or "system"
	Prefer                string                    `xml:"prefer,attr"`
	XmlPublicIDsubrecords []PIDSIDcatalogFileRecord `xml:"public"`
	// AbsFilePath is so we can peel off the directory path
	AbsFilePath string
}

XmlCatalogFile represents a parsed XML catalog file, at the top level.

func NewXmlCatalogFile ¶

func NewXmlCatalogFile(fpath string) (pXC *XmlCatalogFile, err error)

NewXmlCatalogFile is a convenience function that reads in the file and then processes the file contents. It is not clear what the constraints on the path are (but a relative path should work okay).

func (*XmlCatalogFile) GetByPublicID ¶

func (p *XmlCatalogFile) GetByPublicID(s string) *PIDSIDcatalogFileRecord

GetByPublicID retrieves from [XmlPublicIDsubrecords].

func (*XmlCatalogFile) Validate ¶

func (p *XmlCatalogFile) Validate() (retval bool)

Validate validates an XML catalog. It checks that the listed files exist and that the IDs (as strings that are not parsed yet) are well-formed. It assumes that the catalog has already been loaded from an XML catalog file on-disk. The return value is false if _any_ entry fails to load, but also each entry has its own error field.

type XmlContype ¶

type XmlContype string

XmlContype categorizes the XML file. See variable 8XmlContypes9.

type XmlDoctype ¶

type XmlDoctype string

XmlDoctype is just a DOCTYPE string, for example: <!DOCTYPE html>

type XmlPeek ¶

type XmlPeek struct {
	PreambleRaw CT.Raw // string
	DoctypeRaw  CT.Raw // string
	HasDTDstuff bool
	ContentityBasics
}

XmlPeek is used by [fileutils.AnalyseFile] when preparing a [fileutils.AnalysisRecord]. Note that ContentityBasics has chunks of Raw but not the full "Raw" string. .

func Peek_xml ¶

func Peek_xml(content string) (*XmlPeek, error)

Peek_xml takes a string and does the minimum to find XML preamble, DOCTYPE, root element, whether DTD stuff was encountered, and the locations of outer elements containing metadata and body text.

It uses the Go stdlib parser, so success in finding a root element in this function all but guarantees that the string is valid XML.

It is called by [fileutils.AnalyzeFile]. .

type XmlPublicID ¶

type XmlPublicID string

XmlPublicID = PID = Public ID = FPI = Formal Public Identifier

type XmlSystemID ¶

type XmlSystemID string

XmlSystemID = SID = System ID = URI (Universal Resource Identifier) (can be a filepath or an HTTP address)

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

README ¶

package github.com/fbaube/xmlutils

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func DoParseRaw_xml ¶

func DoParse_xml ¶

func DoParse_xml_locationAware ¶

func NewConfiguredDecoder ¶

Types ¶

type CommonCPR ¶

func NewCommonCPR ¶

func (*CommonCPR) AsString ¶

type ContentityBasics ¶

func (*ContentityBasics) CheckTopTags ¶

func (*ContentityBasics) HasNone ¶

func (*ContentityBasics) SetToNonXml ¶

type ContypingInfo ¶

func (ContypingInfo) MultilineString ¶

func (*ContypingInfo) ParseDoctype ¶

func (ContypingInfo) String ¶

type DitaContype ¶

type DitaFlavor ¶

type DoctypeMType ¶

type KeyElmTriplet ¶

func GetKeyElmTriplet ¶

type MType ¶

type NS ¶

type NSsnapshot ¶

type PIDFPIfields ¶

func (PIDFPIfields) String ¶

type PIDSIDcatalogFileRecord ¶

func NewPIDSIDcatalogFileRecord ¶

func NewSIDPIDcatalogRecordfromStartTag ¶

func (PIDSIDcatalogFileRecord) DString ¶

func (PIDSIDcatalogFileRecord) Echo ¶

func (*PIDSIDcatalogFileRecord) HasPID ¶

func (*PIDSIDcatalogFileRecord) HasSID ¶

func (PIDSIDcatalogFileRecord) String ¶

type ParsedDoctype ¶

type ParsedPreamble ¶

func ParsePreamble ¶

type ParserResults_xml ¶

func GenerateParserResults_xml ¶

func (*ParserResults_xml) NodeCount ¶

func (*ParserResults_xml) NodeDebug ¶

func (*ParserResults_xml) NodeEcho ¶

func (*ParserResults_xml) NodeInfo ¶

type SliceBounds ¶

type XmlCatalogFile ¶

func NewXmlCatalogFile ¶

func (*XmlCatalogFile) GetByPublicID ¶

func (*XmlCatalogFile) Validate ¶

type XmlContype ¶

type XmlDoctype ¶

type XmlPeek ¶

func Peek_xml ¶

type XmlPublicID ¶

type XmlSystemID ¶

Source Files ¶

`package github.com/fbaube/xmlutils`