blob: 9d6540a14bd68895e95b2c64c1028c37c0c8a0dd [file] [log] [blame]
<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE spec SYSTEM "spec.dtd" [
<!-- -->
<!ENTITY TR-or-Rec "">
<!-- <!ENTITY TR-or-Rec "(TR)"> -->
<!ENTITY eTR-or-Rec "specification">
<!-- <!ENTITY eTR-or-Rec "technical report(TR)"> -->
<!ENTITY application "">
<!ENTITY error "">
<!ENTITY fatal-error "">
<!ENTITY parsed-data "">
<!ENTITY unparsed-data "">
<!ENTITY parsed-entity "">
<!ENTITY parser "">
<!ENTITY unparsed-entity "">
<!ENTITY well-formed "">
<!ENTITY valid "">
<!ENTITY validity "">
<!ENTITY escape "">
<!ENTITY prolog "">
<!ENTITY surrogate-blocks "">
<!ENTITY letter "">
<!ENTITY ideographic "">
<!ENTITY markup "">
<!ENTITY left-angle-bracket "()">
<!ENTITY right-angle-bracket "()">
<!ENTITY string "">
<!ENTITY char-string ""><!-- string of chararacters,
character strings, strings,
characters -->
<!ENTITY replacement-text "">
<!ENTITY single-quote "">
<!ENTITY double-quote "">
<!ENTITY adaptations-annex "">
<!ENTITY root "">
<!ENTITY base-character "">
<!ENTITY diacritical-mark ""><!--()-->
<!ENTITY composed-form "">
<!ENTITY standalone "">
<!ENTITY double-hyphen "">
<!--<!ENTITY case-fold "">-->
<!-- <!ENTITY case-fold "">-->
<!ENTITY parameter "">
<!ENTITY stream "">
<!ENTITY validating "">
<!ENTITY non-validating "">
<!ENTITY user "">
<!--<!ENTITY at-user-option "">-->
<!ENTITY at-user-option "">
<!ENTITY content-particle "">
<!ENTITY processor "">
<!ENTITY default "">
<!ENTITY default-value "">
<!ENTITY header "">
<!ENTITY target "">
<!ENTITY mixed-content "">
<!ENTITY country-code "">
<!ENTITY language-code "">
<!ENTITY version ""> <!-- version 1.01.0 -->
<!ENTITY match "">
<!ENTITY character-value "">
<!ENTITY byte-order-mark "">
<!ENTITY bypass "">
<!ENTITY identifier "">
<!-- <!ENTITY identify ""> -->
<!ENTITY identify "">
<!-- <!ENTITY identified ""> -->
<!ENTITY identified "">
<!ENTITY combining-character "">
<!ENTITY subset "">
<!ENTITY token "">
<!ENTITY literal "">
<!ENTITY parenthesis "">
<!ENTITY left-parenthesis "">
<!ENTITY right-parenthesis "">
<!-- JIS X0221 -->
<!ENTITY extender "">
<!ENTITY property "">
<!ENTITY property-list "">
<!ENTITY property-file "">
<!ENTITY font-decomposition "">
<!ENTITY compatibility-decomposition "">
<!ENTITY compatibility-area "">
<!ENTITY language-identification "">
<!ENTITY space-character "">
<!ENTITY space "">
<!ENTITY code-value "">
<!ENTITY normative "">
<!ENTITY hueristics "">
<!ENTITY informative "">
<!ENTITY WebSGML 'ISO 8879WebSGML&adaptations-annex;'>
<!ENTITY XML.version "1.0">
<!ENTITY doc.date "1997128">
<!ENTITY iso6.doc.date "971208">
<!ENTITY w3c.doc.date "97123">
<!ENTITY draft.day '8'>
<!ENTITY draft.month '12'>
<!ENTITY draft.year '1997'>
<!-- -->
<!-- LAST TOUCHED BY: Tim Bray, 3 Dec 1997 -->
<!-- The words 'FINAL EDIT' in comments mark places where changes
need to be made after approval of the document by the ERB, before
publication. -->
<!ENTITY XML.version "1.0">
<!ENTITY doc.date "8 December 1997">
<!ENTITY iso6.doc.date "971208">
<!ENTITY w3c.doc.date "03-Dec-97">
<!ENTITY draft.day '8'>
<!ENTITY draft.month 'December'>
<!ENTITY draft.year '1997'>
<!ENTITY WebSGML
'WebSGML Adaptations Annex to ISO 8879'>
<!ENTITY newline " ">
<!-- old: <!ENTITY newline "&#8232;"> -->
<!ENTITY lt "<">
<!ENTITY gt ">">
<!--<!ENTITY amp "&"> -->
<!ENTITY xmlpio "'&lt;?xml'">
<!ENTITY pic "'?>'">
<!ENTITY br "¥n">
<!ENTITY cellback '#c0d9c0'>
<!ENTITY mdash "--"> <!-- was: <!ENTITY mdash "&#38;#151;"> -->
<!ENTITY com "--">
<!ENTITY como "--">
<!ENTITY comc "--">
<!ENTITY hcro "&amp;#x">
<!-- <!ENTITY nbsp ""> -->
<!ENTITY nbsp "&#160;">
<!ENTITY magicents "<code>amp</code>,
<code>lt</code>,
<code>gt</code>,
<code>apos</code>,
<code>quot</code>">
<!--: -->
<!-- audience and distribution status: for use at publication time -->
<!-- -->
<!ENTITY doc.audience "">
<!ENTITY doc.distribution "
">
]>
<!-- for Panorama *-->
<?VERBATIM "eg" ?>
<spec>
<header>
<title>&markup; (XML)</title>
<version>1.0&version;</version>
<w3c-designation>PR-xml-&iso6.doc.date;</w3c-designation>
<w3c-doctype>World Wide Web Consortium</w3c-doctype>
<pubdate><day>&draft.day;</day><month>&draft.month;</month><year>&draft.year;</year></pubdate>
<notice><p>XML WG
<!-- FINAL EDIT: FIX --></p></notice>
<publoc>
<loc href="http://www.w3.org/TR/PR-xml-&iso6.doc.date;">
http://www.w3.org/TR/PR-xml-&iso6.doc.date;</loc></publoc>
<prevlocs>
<loc href='http://www.w3.org/TR/WD-xml-961114'>
http://www.w3.org/TR/WD-xml-961114</loc>
<loc href='http://www.w3.org/TR/WD-xml-lang-970331'>
http://www.w3.org/TR/WD-xml-lang-970331</loc>
<loc href='http://www.w3.org/TR/WD-xml-lang-970630'>
http://www.w3.org/TR/WD-xml-lang-970630</loc>
<loc href='http://www.w3.org/TR/WD-xml-970807'>
http://www.w3.org/TR/WD-xml-970807</loc>
<loc href='http://www.w3.org/TR/WD-xml-971117'>
http://www.w3.org/TR/WD-xml-971117</loc>
</prevlocs>
<authlist>
<author><name>Tim Bray</name>
<affiliation>Textuality and Netscape</affiliation>
<email
href="mailto:tbray@textuality.com">tbray@textuality.com</email></author>
<author><name>Jean Paoli</name>
<affiliation>Microsoft</affiliation>
<email href="mailto:jeanpa@microsoft.com">jeanpa@microsoft.com</email></author>
<author><name>C. M. Sperberg-McQueen</name>
<affiliation>University of Illinois at Chicago</affiliation>
<email href="mailto:cmsmcq@uic.edu">cmsmcq@uic.edu</email></author>
</authlist>
<status>
<p>&TR-or-Rec;, 199712World Wide Web Consortium
Extensible Markup Language version1.0,
&TR-or-Rec;This &eTR-or-Rec;
is a translation of the XML proposed recommendation 1.0
published by the World Wide Web Consortium in December 1997. It is
intended that &eTR-or-Rec; is technically identical to the original.</p>
<p>The
original copyright notice is shown below:</p>
<p>XML
This version of the XML specification is for
public review and discussion. It may be distributed freely,
as long as all text and legal notices remain intact.</p>
<p>&TR-or-Rec;XML19982World
Wide Web ConsortiumXML
XML
The XML Proposed Recommendation is superseded
by the XML Recommendation which was published by the World
Wide Web Consortium in February 1998. It is intended that
this &eTR-or-Rec; be revised accordingly in the near future.</p>
<p>&TR-or-Rec;<loc
href='http://www.w3.org/XML'>XML</loc>
(&markup;Standard Generalized Markup Language, ISO
8879:1986)WWW&subset;
&TR-or-Rec;ISO 8879
&subset;<loc
href='http://www.w3.org/XML/#WG-decisions'></loc>XML
XML<loc
href='http://www.w3.org/XML/#software'></loc>
XML<loc
href='http://www.w3.org/XML/#discussion'></loc>It is a
stable document derived from a series of working drafts produced over
the last year as deliverables of the <loc
href='http://www.w3.org/XML'>XML activity</loc>. It specifies a
language created by subsetting an existing, widely used international
text processing standard (Standard Generalized Markup Language, ISO
8879:1986 as amended and corrected) for use on the World Wide Web.
Details of the decisions regarding which features of ISO 8879 to
retain in the subset <loc
href='http://www.w3.org/XML/#WG-decisions'>are available
separately</loc>. XML is already supported by some commercial
products, and there are a growing number of <loc
href='http://www.w3.org/XML/#software'>free implementations</loc>.
Public discussions of XML <loc
href='http://www.w3.org/XML/#discussion'>are accessible
online</loc>.</p>
<p>&TR-or-Rec;<bibref ref="Berners-Lee"/>
URI(Uniform Resource Identifier)URI
<bibref ref="RFC1738"/><bibref ref="RFC1808"/>
RFCURI
URL(Uniform Resource Locator)This
specification uses the term URI, which is defined by <bibref
ref="Berners-Lee"/>, a work in progress expected to update <bibref
ref="RFC1738"/> and <bibref ref="RFC1808"/>. Should the work not be
accepted as an RFC, the references to uniform resource identifiers
(URIs) in this specification will become references to uniform
resource locators (URLs).</p>
<p>XMLW3C
The normative version of the specification is
the English version found at the W3C site.</p>
<p>
Although this technical report is
intended to be technically identical to the original, it may
contain errors from the translation.</p>
<p>:
&TR-or-Rec;
&TR-or-Rec;WebHTML
</p>
</status>
<!-- out of date
<statusp>This is a W3C Working Draft for review by W3C members and other
interested parties. It is a draft document and may be updated,
replaced, or obsoleted by other documents at any time. It is
inappropriate to use W3C Working Drafts as reference material or to
cite them as other than "work in progress". A list of current W3C
working drafts can be found at
<loc href="http://www.w3.org/TR">http://www.w3.org/TR</loc>.</statusp>
<statusp><emph>Note:</emph> Since working drafts are subject to frequent
change, you are advised to reference the above URL, rather than the
URLs for working drafts themselves.</statusp>
<statusp>This work is part of the W3C SGML Activity (for current
status, see <loc href="http://www.w3.org/MarkUp/SGML/Activity"
>http://www.w3.org/MarkUp/SGML/Activity</loc>).</statusp>
<p>The current
draft of this specification
presupposes the successful completion of the current
work on the &WebSGML;, being prepared by ISO/IEC JTC1
at the time this draft specification was drafted.
If it is not
adopted in the expected form, some clauses of this specification
may change, and some
recommendations now labeled "<termref def="dt-interop">for
interoperability</termref>" will become requirements labeled
"<termref def="dt-compat">for compatibility</termref>".
</p>
<p>The current draft of this specification uses the term
URI, which is defined by
<bibref ref="Berners-Lee"/>,
which is work in progress expected to update
<bibref ref="RFC1738"/> and <bibref ref="RFC1808"/>.
Should the work in this draft not be accepted as an RFC, the
references to uniform resource identifiers (URIs) in this
specification will become references to uniform resource
locators (URLs).</p>
</status> -->
<abstract>
<p>&markup;(XML)SGML&TR-or-Rec;XMLHTMLSGMLXMLSGMLHTML</p>
</abstract>
<pubstmt>
<p>Chicago, Vancouver, Mountain View, et al.:
World-Wide Web Consortium, XML, 1996, 1997.</p>
</pubstmt>
<sourcedesc>
<p>Created in electronic form.</p>
</sourcedesc>
<langusage>
<language id='EN'>English</language>
<language id='ebnf'>Extended Backus-Naur Form (formal grammar)</language>
</langusage>
<revisiondesc>
<slist>
<sitem>1997-12-03 : CMSMcQ : yet further changes</sitem>
<sitem>1997-12-02 : TB : further changes (see TB to XML WG,
2 December 1997)</sitem>
<sitem>1997-12-02 : CMSMcQ : deal with as many corrections and
comments from the proofreaders as possible:
entify hard-coded document date in pubdate element,
change expansion of entity WebSGML,
update status description as per Dan Connolly (am not sure
about refernece to Berners-Lee et al.),
add 'The' to abstract as per WG decision,
move Relationship to Existing Standards to back matter and
combine with References,
re-order back matter so normative appendices come first,
re-tag back matter so informative appendices are tagged informdiv1,
remove XXX XXX from list of 'normative' specs in prose,
move some references from Other References to Normative References,
add RFC 1738, 1808, and 2141 to Other References (they are not
normative since we do not require the processor to enforce any
rules based on them),
add reference to 'Fielding draft' (Berners-Lee et al.),
move notation section to end of body,
drop URIchar non-terminal and use SkipLit instead,
lose stray reference to defunct nonterminal 'markupdecls',
move reference to Aho et al. into appendix (Tim's right),
add prose note saying that hash marks and fragment identifiers are
NOT part of the URI formally speaking, and are NOT legal in
system identifiers (processor 'may' signal an error).
Work through:
Tim Bray reacting to James Clark,
Tim Bray on his own,
Eve Maler,
NOT DONE YET:
change binary / text to unparsed / parsed.
handle James's suggestion about &lt; in attriubte values
uppercase hex characters,
namechar list,
</sitem>
<sitem>1997-12-01 : JB : add some column-width parameters</sitem>
<sitem>1997-12-01 : CMSMcQ : begin round of changes to incorporate
recent WG decisions and other corrections:
binding sources of character encoding info (27 Aug / 3 Sept),
correct wording of Faust quotation (restore dropped line),
drop SDD from EncodingDecl,
change text at version number 1.0,
drop misleading (wrong!) sentence about ignorables and extenders,
modify definition of PCData to make bar on msc grammatical,
change grammar's handling of internal subset (drop non-terminal markupdecls),
change definition of includeSect to allow conditional sections,
add integral-declaration constraint on internal subset,
drop misleading / dangerous sentence about relationship of
entities with system storage objects,
change table body tag to htbody as per EM change to DTD,
add rule about space normalization in public identifiers,
add description of how to generate our name-space rules from
Unicode character database (needs further work!).
</sitem>
<sitem>1997-10-08 : TB : Removed %-constructs again, new rules
for PE appearance.</sitem>
<sitem>1997-10-01 : TB : Case-sensitive markup; cleaned up
element-type defs, lotsa little edits for style</sitem>
<sitem>1997-09-25 : TB : Change to elm's new DTD, with
substantial detail cleanup as a side-effect</sitem>
<sitem>1997-07-24 : CMSMcQ : correct error (lost *) in definition
of ignoreSectContents (thanks to Makoto Murata)</sitem>
<sitem>Allow all empty elements to have end-tags, consistent with
SGML TC (as per JJC).</sitem>
<sitem>1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections:
introduce the term 'empty-element tag', note that all empty elements
may use it, and elements declared EMPTY must use it.
Add WFC requiring encoding decl to come first in an entity.
Redefine notations to point to PIs as well as binary entities.
Change autodetection table by removing bytes 3 and 4 from
examples with Byte Order Mark.
Add content model as a term and clarify that it applies to both
mixed and element content.
</sitem>
<sitem>1997-06-30 : CMSMcQ : change date, some cosmetic changes,
changes to productions for choice, seq, Mixed, NotationType,
Enumeration. Follow James Clark's suggestion and prohibit
conditional sections in internal subset. TO DO: simplify
production for ignored sections as a result, since we don't
need to worry about parsers which don't expand PErefs finding
a conditional section.</sitem>
<sitem>1997-06-29 : TB : various edits</sitem>
<sitem>1997-06-29 : CMSMcQ : further changes:
Suppress old FINAL EDIT comments and some dead material.
Revise occurrences of % in grammar to exploit Henry Thompson's pun,
especially markupdecl and attdef.
Remove RMD requirement relating to element content (?).
</sitem>
<sitem>1997-06-28 : CMSMcQ : Various changes for 1 July draft:
Add text for draconian error handling (introduce
the term Fatal Error).
RE deleta est (changing wording from
original announcement to restrict the requirement to validating
parsers).
Tag definition of validating processor and link to it.
Add colon as name character.
Change def of %operator.
Change standard definitions of lt, gt, amp.
Strip leading zeros from #x00nn forms.</sitem>
<sitem>1997-04-02 : CMSMcQ : final corrections of editorial errors
found in last night's proofreading. Reverse course once more on
well-formed: Webster's Second hyphenates it, and that's enough
for me.</sitem>
<sitem>1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self</sitem>
<sitem>1997-03-31 : Tim Bray : many changes</sitem>
<sitem>1997-03-29 : CMSMcQ : some Henry Thompson (on entity handling),
some Charles Goldfarb, some ERB decisions (PE handling in miscellaneous
declarations. Changed Ident element to accept def attribute.
Allow normalization of Unicode characters. move def of systemliteral
into section on literals.</sitem>
<sitem>1997-03-28 : CMSMcQ : make as many corrections as possible, from
Terry Allen, Norbert Mikula, James Clark, Jon Bosak, Henry Thompson,
Paul Grosso, and self. Among other things: give in on "well formed"
(Terry is right), tentatively rename QuotedCData as AttValue
and Literal as EntityValue to be more informative, since attribute
values are the <emph>only</emph> place QuotedCData was used, and
vice versa for entity text and Literal. (I'd call it Entity Text,
but 8879 uses that name for both internal and external entities.)</sitem>
<sitem>1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply
my changes dated 03-20 and 03-21. Normalize old 'may not' to 'must not'
except in the one case where it meant 'may or may not'.</sitem>
<sitem>1997-03-21 : TB : massive changes on plane flight from Chicago
to Vancouver</sitem>
<sitem>1997-03-21 : CMSMcQ : correct as many reported errors as possible.
</sitem>
<sitem>1997-03-20 : CMSMcQ : correct typos listed in CMSMcQ hand copy of spec.</sitem>
<sitem>1997-03-20 : CMSMcQ : cosmetic changes preparatory to revision for
WWW conference April 1997: restore some of the internal entity
references (e.g. to docdate, etc.), change character xA0 to &amp;nbsp;
and define nbsp as &amp;#160;, and refill a lot of paragraphs for
legibility.</sitem>
<sitem>1996-11-12 : CMSMcQ : revise using Tim's edits:
Add list type of NUMBERED and change most lists either to
BULLETS or to NUMBERED.
Suppress QuotedNames, Names (not used).
Correct trivial-grammar doc type decl.
Rename 'marked section' as 'CDATA section' passim.
Also edits from James Clark:
Define the set of characters from which [^abc] subtracts.
Charref should use just [0-9] not Digit.
Location info needs cleaner treatment: remove? (ERB
question).
One example of a PI has wrong pic.
Clarify discussion of encoding names.
Encoding failure should lead to unspecified results; don't
prescribe error recovery.
Don't require exposure of entity boundaries.
Ignore white space in element content.
Reserve entity names of the form u-NNNN.
Clarify relative URLs.
And some of my own:
Correct productions for content model: model cannot
consist of a name, so "elements ::= cp" is no good.
</sitem>
<sitem>1996-11-11 : CMSMcQ : revise for style.
Add new rhs to entity declaration, for parameter entities.</sitem>
<sitem>1996-11-10 : CMSMcQ : revise for style.
Fix / complete section on names, characters.
Add sections on parameter entities, conditional sections.
Still to do: Add compatibility note on deterministic content models.
Finish stylistic revision.</sitem>
<sitem>1996-10-31 : TB : Add Entity Handling section</sitem>
<sitem>1996-10-30 : TB : Clean up term &amp; termdef. Slip in
ERB decision re EMPTY.</sitem>
<sitem>1996-10-28 : TB : Change DTD. Implement some of Michael's
suggestions. Change comments back to //. Introduce language for
XML namespace reservation. Add section on white-space handling.
Lots more cleanup.</sitem>
<sitem>1996-10-24 : CMSMcQ : quick tweaks, implement some ERB
decisions. Characters are not integers. Comments are /* */ not //.
Add bibliographic refs to 10646, HyTime, Unicode.
Rename old Cdata as MsData since it's <emph>only</emph> seen
in marked sections. Call them attribute-value pairs not
name-value pairs, except once. Internal subset is optional, needs
'?'. Implied attributes should be signaled to the app, not
have values supplied by processor.</sitem>
<sitem>1996-10-16 : TB : track down &amp; excise all DSD references;
introduce some EBNF for entity declarations.</sitem>
<sitem>1996-10-?? : TB : consistency check, fix up scraps so
they all parse, get formatter working, correct a few productions.</sitem>
<sitem>1996-10-10/11 : CMSMcQ : various maintenance, stylistic, and
organizational changes:
Replace a few literals with xmlpio and
pic entities, to make them consistent and ensure we can change pic
reliably when the ERB votes.
Drop paragraph on recognizers from notation section.
Add match, exact match to terminology.
Move old 2.2 XML Processors and Apps into intro.
Mention comments, PIs, and marked sections in discussion of
delimiter escaping.
Streamline discussion of doctype decl syntax.
Drop old section of 'PI syntax' for doctype decl, and add
section on partial-DTD summary PIs to end of Logical Structures
section.
Revise DSD syntax section to use Tim's subset-in-a-PI
mechanism.</sitem>
<sitem>1996-10-10 : TB : eliminate name recognizers (and more?)</sitem>
<sitem>1996-10-09 : CMSMcQ : revise for style, consistency through 2.3
(Characters)</sitem>
<sitem>1996-10-09 : CMSMcQ : re-unite everything for convenience,
at least temporarily, and revise quickly</sitem>
<sitem>1996-10-08 : TB : first major homogenization pass</sitem>
<sitem>1996-10-08 : TB : turn "current" attribute on div type into
CDATA</sitem>
<sitem>1996-10-02 : TB : remould into skeleton + entities</sitem>
<sitem>1996-09-30 : CMSMcQ : add a few more sections prior to exchange
with Tim.</sitem>
<sitem>1996-09-20 : CMSMcQ : finish transcribing notes.</sitem>
<sitem>1996-09-19 : CMSMcQ : begin transcribing notes for draft.</sitem>
<sitem>1996-09-13 : CMSMcQ : made outline from notes of 09-06,
do some housekeeping</sitem>
</slist>
</revisiondesc>
</header>
<body>
<div1 id='sec-intro'>
<head></head>
<!-- <div2 id='sec-scope'>
<head></head> -->
<p>&markup;XML(eXtensible Markup Language)<termref def="dt-xml-doc">XML</termref>XMLXMLSGML(&markup;Standard Generalized Markup Language)<bibref ref='ISO8879'/>&subset;XMLSGML</p>
<p>XML<termref def="dt-entity"></termref>&parsed-data;&unparsed-data;&parsed-data;<termref def="dt-character"></termref><termref def="dt-chardata"></termref><termref def="dt-markup">&markup;</termref>&markup;XML</p>
<p><termdef id="dt-xml-proc" term="XML&processor;"><term>XML&processor;</term>XML </termdef> <termdef id="dt-app" term="&application;">XML&processor;<term>&application;</term></termdef>&TR-or-Rec;XML&processor;XML&application;</p>
<!-- </div2> -->
<div2 id='sec-origin-goals'>
<head></head>
<p>1996World Wide Web Consortium(W3C)XML( SGML)XMLSun MicrosystemsJon BosakW3CSGMLXML SIG(Special Interest Group)XML
<!--JIS? XML-->Dan ConnollyW3C</p>
<p>XML<ulist>
<item><p>a) XMLInternet</p></item>
<item><p>b) XML&application;</p></item>
<item><p>c) XMLSGML</p></item>
<item><p>d) XML</p></item>
<item><p>e) XML</p></item>
<item><p>f) XML</p></item>
<item><p>g) XML</p></item>
<item><p>h) XML</p></item>
<item><p>i) XML</p></item>
<item><p>j) XML&markup;</p></item></ulist>
</p>
<p>XML&XML.version;&version;&TR-or-Rec;(UnicodeISO/IEC 10646<!--* XXX for Uniform Resource Identifiers, *-->&language-identification; RFC 1766&language-code;ISO 639&country-code;ISO 3166)</p>
<p>&version;XML<!-- (&doc.date;) -->/p>
</div2>
<div2 id='sec-terminology'>
<head></head>
<p>XML&TR-or-Rec;XML&processor;
<glist>
<gitem>
<label>1.2.1 (may)</label>
<def><p><termdef id="dt-may" term="">XML&processor;</termdef></p></def>
</gitem>
<gitem>
<label>1.2.2 (must)</label>
<def><p>XML&processor;&error;<!-- do NOT change this! this is what defines a violation ofa 'must' clause as 'an error'. -MSM -->
</p></def>
</gitem>
<gitem>
<label>1.2.3 &error;(error)</label>
<def><p><termdef id="dt-error" term="&error;">&TR-or-Rec;&error;&error;</termdef></p></def>
</gitem>
<gitem>
<label>1.2.4 &fatal-error;(fatal error)</label>
<def><p><termdef id="dt-fatal" term="&fatal-error;"><termref def="dt-xml-proc">XML&processor;</termref>&application;&error;&fatal-error;&processor;&error;&error;&error;&application;&error;&processor;(&markup;)&application;&fatal-error;&processor;&processor;&application;</termdef></p></def>
</gitem>
<gitem>
<label>1.2.5 &at-user-option;(at user option)</label>
<def><p>(may)(must)()&user;</p></def>
</gitem>
<gitem>
<label>1.2.6 &validity;(validity constraint)</label>
<def><p><termref def="dt-valid">&valid;</termref>XML&validity;&error;&at-user-option;<termref def="dt-validating">XML&processor;</termref>&error;</p></def>
</gitem>
<gitem>
<label>1.2.7 &well-formed;(well-formedness constraint)</label>
<def><p><termref def="dt-wellformed">&well-formed;</termref>XML&well-formed;<termref def="dt-fatal">&fatal-error;</termref></p></def>
</gitem>
<gitem>
<label>1.2.8 &match;(match)</label>
<def><p>a) <termdef id="dt-match" term="&match;">&string;&match;&string;ISO/IEC 10646&composed-form;+&diacritical-mark;()&string;&match;&at-user-option;&processor;<!-- Note that no processing of characters with respect to case is part of the matching process. -->&lt;BR>b) &string;&match;&string;&string;&match;&lt;BR>c) &match;<titleref href='elementvalid'>&validity;</titleref>&match;</termdef></p></def>
</gitem>
<gitem>
<label>1.2.9 (for compatibility)</label>
<def><p><termdef id="dt-compat" term="">XMLXMLSGML</termdef></p></def>
</gitem>
<gitem>
<label>1.2.10 (for interoperability)</label>
<def><p><termdef id="dt-interop" term="">&WebSGML;SGML&processor;XML</termdef></p></def>
</gitem>
</glist>
</p>
</div2>
</div1>
<!-- &Docs; -->
<div1 id='sec-documents'>
<head></head>
<p><termdef id="dt-xml-doc" term="XML">
<!-- A textual object -->
&TR-or-Rec;<termref def="dt-wellformed">&well-formed;</termref><term>XML</term>&well-formed;XML<termref def="dt-valid">&valid;</termref>XML
</termdef></p>
<!-- why this div? -TB
<div2 id='sec-log-phys'>
<head>Logical and Physical Structure</head> -->
<p>XML<termref def="dt-entity"></termref><termref def="dt-entref"></termref><termref def="dt-docent"></termref>&markup;<titleref href="wf-entities"></titleref></p>
<!--
</div2> -->
<div2 id='sec-well-formed'>
<head>&well-formed;XML</head>
<p><termdef id="dt-wellformed" term="&well-formed;">&well-formed;XML</termdef>
<ulist>
<item><p>a) <nt def='NT-document'>document</nt>&match;</p></item>
<item><p>b) &TR-or-Rec;&well-formed;</p>
</item>
<item><p>c) <termref def='dt-parsedent'>&parsed-entity;</termref><titleref href='wf-entities'>&well-formed;</titleref></p></item>
</ulist></p>
<p>
<scrap lang='ebnf' id='document'>
<head></head>
<prod id='NT-document'><lhs>document</lhs>
<rhs><nt def='NT-prolog'>prolog</nt>
<nt def='NT-element'>element</nt>
<nt def='NT-Misc'>Misc</nt>*</rhs></prod>
</scrap>
</p>
<p><nt def="NT-document">document</nt>&match;
<ulist>
<item><p>a) <termref def="dt-element"></termref></p>
</item>
<!--* N.B. some readers (notably JC) find the following
paragraph awkward and redundant. I agree it's logically redundant:
it *says* it is summarizing the logical implications of
matching the grammar, and that means by definition it's
logically redundant. I don't think it's rhetorically
redundant or unnecessary, though, so I'm keeping it. It
could however use some recasting when the editors are feeling
stronger. -MSM *-->
<item><p>b) <termdef id="dt-root" term=""><term></term><termref def="dt-content"></termref></termdef>
</p></item>
</ulist>
</p>
<p><termdef id="dt-parentchild" term="/"><code>C</code><code>P</code><code>C</code><code>P</code><code>P</code><code>P</code><code>C</code><code></code><code>C</code><code>P</code><code></code></termdef></p>
</div2>
<div2 id="charsets">
<head></head>
<p>
<!--The data stored in an XML <termref def="dt-entity">entity</termref> is
either <termref def="dt-text">parsed</termref> or <termref
def="dt-unparsed">unparsed</termref>. -->
<termdef id="dt-text" term="">&parsed-entity;<term></term>(<termref def="dt-character"></termref>&markup;)</termdef><termdef id="dt-character" term=""><term></term>ISO/IEC 10646<bibref ref="ISO10646"/><!--Users may extend the ISO/IEC 10646 character repertoire by exploiting the private use areas. -->UnicodeISO/IEC 10646</termdef>
<scrap lang="ebnf" id="char32">
<head></head>
<prodgroup pcw2="4" pcw4="17.5" pcw5="11">
<prod id="NT-Char"><lhs>Char</lhs>
<rhs>#x9 | #xA | #xD | [#x20-#D7FF] | [#xE000-#xFFFD]
| [#x10000-#x10FFFF]</rhs>
<com>Unicode&surrogate-blocks;FFFEFFFF</com>
</prod>
</prodgroup>
</scrap>
</p>
<p>&character-value;XML&processor;ISO/IEC 10646UTF-8UTF-16<titleref href='charencoding'></titleref></p>
<p>ISO/IEC 10646UCS-4&code-value;<!-- bit string. -->1016</p>
</div2>
<div2 id='sec-common-syn'>
<head></head>
<p>2.3</p>
<p><nt def="NT-S">S</nt> ()&space-character;(#x20)
<scrap lang="ebnf" id='white'>
<head></head>
<prodgroup pcw2="4" pcw4="17.5" pcw5="11">
<prod id='NT-S'><lhs>S</lhs>
<rhs>(#x20 | #x9 | #xD | #xA)+</rhs>
</prod>
</prodgroup>
</scrap></p>
<p>&letter;&letter;(&combining-character;)&ideographic;
<!--
Certain layout and format-control characters defined by ISO/IEC 10646
should be ignored when recognizing identifiers; these are defined by the
classes <nt def='NT-Ignorable'>Ignorable</nt> and <nt def='NT-
Extender'>Extender</nt>.
-->
<titleref href='CharClasses'></titleref></p>
<p><termdef id="dt-name" term="Name"><term>Name</term>&letter;&letter;()</termdef>&string;"<code>xml</code>"<code>(('X'|'x') ('M'|'m') ('L'|'l'))</code>&match;&string;&TR-or-Rec;
</p>
<note>
<p>XMLXMLXMLXML&processor;
</p>
</note>
<p>
<nt def='NT-Nmtoken'>Nmtoken</nt> (&token;)
<scrap lang='ebnf'>
<head>&token;</head>
<!--
<prod id='NT-MiscName'><lhs>MiscName</lhs>
<rhs></rhs>
</prod>-->
<prod id='NT-NameChar'><lhs>NameChar</lhs>
<rhs><nt def="NT-Letter">Letter</nt>
| <nt def='NT-Digit'>Digit</nt>
<!--| <nt def='NT-MiscName'>MiscName</nt>-->
| '.' | '-' | '_' | ':'
| <nt def='NT-CombiningChar'>CombiningChar</nt>
<!-- | <nt def='NT-Ignorable'>Ignorable</nt> -->
| <nt def='NT-Extender'>Extender</nt></rhs>
</prod>
<prod id='NT-Name'><lhs>Name</lhs>
<rhs>(<nt def='NT-Letter'>Letter</nt> | '_' | ':')
(<nt def='NT-NameChar'>NameChar</nt>)*</rhs></prod>
<prod id='NT-Names'><lhs>Names</lhs>
<rhs><nt def='NT-Name'>Name</nt>
(<nt def='NT-S'>S</nt> <nt def='NT-Name'>Name</nt>)*</rhs></prod>
<prod id='NT-Nmtoken'><lhs>Nmtoken</lhs>
<rhs>(<nt def='NT-NameChar'>NameChar</nt>)+</rhs></prod>
<prod id='NT-Nmtokens'><lhs>Nmtokens</lhs>
<rhs><nt def='NT-Nmtoken'>Nmtoken</nt> (<nt def='NT-S'>S</nt> <nt
def='NT-Nmtoken'>Nmtoken</nt>)*</rhs></prod>
</scrap>
</p>
<p>&literal;&string;&literal;(<nt def='NT-EntityValue'>EntityValue</nt>)(<nt def='NT-AttValue'>AttValue</nt>)&identifier;(<nt def="NT-SystemLiteral">SystemLiteral</nt>)&literal;&markup;(<nt def='NT-SkipLit'>SkipLit</nt>)
<scrap lang='ebnf'>
<head>&literal;</head>
<!-- is marked section end legal in entity values etc.?
James says yes. Handbook page 392, sec. 10.4 seems to me to say no.
If James is right, leave as is. Otherwise, uncomment
the next comment and ...
-->
<!--
<prod id='NT-EntityValue'><lhs>EntityValue</lhs>
<rhs>' " '
(([^%&amp;"]
| <nt def='NT-PEReference'>PEReference</nt>
| <nt def='NT-Reference'>Reference</nt>)*
- (<nt def='NT-Char'>Char</nt>* ']]&gt;' <nt def='NT-char'>Char</nt>*))
' " '
</rhs>
<rhs>|&nbsp;
" ' "
(([^%&amp;']
| <nt def='NT-PEReference'>PEReference</nt>
| <nt def='NT-Reference'>Reference</nt>)*
- (<nt def='NT-Char'>Char</nt>* ']]&gt;' <nt def='NT-char'>Char</nt>*))
" ' "</rhs>
</prod>
<prod id='NT-AttValue'><lhs>AttValue</lhs>
<rhs>'"'
(([^&lt;&amp;"]
| <nt def='NT-Reference'>Reference</nt>)*
- (<nt def='NT-Char'>Char</nt>* ']]&gt;' <nt def='NT-char'>Char</nt>*))
' " '
</rhs>
<rhs>|&nbsp;
" ' "
(([^&lt;&amp;']
| <nt def='NT-Reference'>Reference</nt>)*
- (<nt def='NT-Char'>Char</nt>* ']]&gt;' <nt def='NT-char'>Char</nt>*))
" ' "</rhs>
<wfc def="CleanAttrVals"/>
</prod>
-->
<!-- ... and comment out the following, down to ... -->
<prod id='NT-EntityValue'><lhs>EntityValue</lhs>
<rhs>' " '
([^%&amp;"]
| <nt def='NT-PEReference'>PEReference</nt>
| <nt def='NT-Reference'>Reference</nt>)*
' " '
</rhs>
<rhs>|&nbsp;
" ' "
([^%&amp;']
| <nt def='NT-PEReference'>PEReference</nt>
| <nt def='NT-Reference'>Reference</nt>)*
" ' "</rhs>
</prod>
<prod id='NT-AttValue'><lhs>AttValue</lhs>
<rhs>' " '
([^&lt;&amp;"]
| <nt def='NT-Reference'>Reference</nt>)*
' " '
</rhs>
<rhs>|&nbsp;
" ' "
([^&lt;&amp;']
| <nt def='NT-Reference'>Reference</nt>)*
" ' "</rhs>
<!--<wfc def="WF-Attvaldelim"/>-->
</prod>
<!-- ... down to here. -->
<prod id="NT-SystemLiteral"><lhs>SystemLiteral</lhs>
<rhs><nt def='NT-SkipLit'>SkipLit</nt></rhs>
</prod>
<!-- <prod id="NT-URIchar"><lhs>URIchar</lhs>
<rhs><com>See <loc href="http://www.w3.org/XXX">XXX</loc></com>
</rhs></prod>
-->
<prod id="NT-PubidLiteral"><lhs>PubidLiteral</lhs>
<rhs>' " ' <nt def='NT-PubidChar'>PubidChar</nt>*
' " '
| " ' " (<nt def='NT-PubidChar'>PubidChar</nt> - " ' ")* " ' "</rhs>
</prod>
<prod id="NT-PubidChar"><lhs>PubidChar</lhs>
<rhs>#x20 | #xD | #xA
|&nbsp;[a-zA-Z0-9]
|&nbsp;[-'()+,./:=?]</rhs>
</prod>
<prod id="NT-SkipLit"><lhs>SkipLit</lhs>
<rhs>(' " ' [^"]* ' " ')
|&nbsp;(" ' " [^']* " ' ")</rhs>
</prod>
<!-- alternate form, making ms end illegal: -->
<!--
<prod id="NT-SkipLit"><lhs>SkipLit</lhs>
<rhs>(' " ' ([^"]* - ([^"]* ']]&gt;' [^"]*)) ' " ')
|&nbsp;(" ' " ([^']* - ([^']* ']]&gt;' [^']*)) " ' ")</rhs>
</prod>
-->
</scrap>
</p>
<!--
<wfcnote id="WF-Attvaldelim">
<head>Delimiters in Attribute Values</head>
<p>After the expansion of character and entity references,
an attribute value must not contain a "<code>&lt;</code>"
or "<code>&amp;</code>" character unless that character was
introduced by the expansion of a character reference
or one of the entities &magicents;.</p>
</wfcnote>-->
<!--
This is not quite right: &lt; should be legal, should it not?
Suppress this WFC until we get it right.
-->
<!-- Henry Thompson suggests (in substance, not form: the wording needs
to be clarified):
"Cooked Attribute values must not contain &lt; &amp; or the
quote which closed their uncooked literal,
unless arising from the expansion of a character reference or
magic reference
directly contained in their uncooked literal."
I'm not sure I agree with this rule, but it's at least coherent,
which is more than I can say for my attempt.
-->
</div2>
<div2 id='syntax'>
<head>&markup;</head>
<p><termref def='dt-text'></termref><termref def="dt-chardata"></termref>&markup;<termdef id="dt-markup" term="Markup"><term>&markup;</term><termref def="dt-stag"></termref><termref def="dt-etag"></termref><termref def="dt-empty"></termref><termref def="dt-entref"></termref><termref def="dt-charref"></termref><termref def="dt-comment"></termref><termref def="dt-cdsection">CDATA</termref> <termref def="dt-doctype"></termref><termref def="dt-pi"></termref>
</termdef>
</p>
<p><termdef id="dt-chardata" term="Character Data">&markup;<term></term></termdef></p>
<p> (&amp;)&left-angle-bracket; (&lt;)&markup;<termref def="dt-comment"></termref><termref def="dt-pi"></termref><termref def="dt-cdsection">CDATA</termref><emph></emph><termref def='dt-litentval'>&literal;</termref> <titleref href='wf-entities'>&well-formed;</titleref><!-- FINAL EDIT: restore internal entity decl or leave it out. -->&string;"<code>&amp;amp;</code>"&string;"<code>&amp;lt;</code>"<termref def="dt-escape">&escape;</termref>&right-angle-bracket; (>) &string;"<code>&amp;gt;</code>""<code>]]&gt;</code>"<termref def="dt-cdsection">CDATA</termref>&markup;<termref def='dt-compat'></termref>"<code>&amp;gt;</code>"&escape;</p>
<p>&markup;&char-string;CDATACDATA"<code>]]&gt;</code>"&char-string;
</p>
<p>
&single-quote;&double-quote;&single-quote;(') "<code>&amp;apos;</code>"&double-quote;(")"<code>&amp;quot;</code>"
<scrap lang="ebnf">
<head></head>
<prod id='NT-CharData'>
<lhs>CharData</lhs>
<rhs>[^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)</rhs>
</prod>
</scrap>
</p>
</div2>
<div2 id='sec-comments'>
<head></head>
<p><termdef id="dt-comment" term="Comment"><term></term><termref def='dt-markup'>&markup;</termref>
<!-- TB
except in a <termref def="dt-cdsection">CDATA section</termref>, i.e. within
<termref def="dt-elemcontent">element content</termref>, in
<termref def="dt-mixed">mixed content</termref>, or in the prolog. They must
not occur within declarations or tags. -->
<termref def="dt-chardata"></termref>XML&processor;&application;
<termref def="dt-compat"></termref>&string;"<code>--</code>" &double-hyphen;
<scrap lang="ebnf">
<head></head>
<prod id='NT-Comment'><lhs>Comment</lhs>
<rhs>'&lt;!--'
((<nt def='NT-Char'>Char</nt> - '-')
| ('-' (<nt def='NT-Char'>Char</nt> - '-')))*
'-->'</rhs>
<!--
<rhs>'&lt;!&como;'
(<nt def='NT-Char'>Char</nt>* -
(<nt def='NT-Char'>Char</nt>* '&comc;' <nt def='NT-Char'>Char</nt>*))
'&comc;&gt;'</rhs> -->
</prod>
</scrap>
</termdef></p>
<p>
<eg>&lt;!&como; declarations for &lt;head> &amp; &lt;body> &comc;&gt;</eg>
</p>
</div2>
<div2 id='sec-pi'>
<head></head>
<p><termdef id="dt-pi" term="Processing instruction"><term></term>(PI)&application;
<scrap lang="ebnf">
<head></head>
<prod id='NT-PI'><lhs>PI</lhs>
<rhs>'&lt;?' <nt def='NT-PITarget'>PITarget</nt>
(<nt def='NT-S'>S</nt>
(<nt def='NT-Char'>Char</nt>* -
(<nt def='NT-Char'>Char</nt>* &pic; <nt def='NT-Char'>Char</nt>*)))?
&pic;</rhs></prod>
<prod id='NT-PITarget'><lhs>PITarget</lhs>
<rhs><nt def='NT-Name'>Name</nt> -
(('X' | 'x') ('M' | 'm') ('L' | 'l'))</rhs>
</prod>
</scrap></termdef>
PI<termref def="dt-chardata"></termref>&application;PI&application;&identify;&target; (<nt def='NT-PITarget'>PITarget</nt>) &target; "<code>XML</code>""<code>xml</code>"&TR-or-Rec;XML<termref def='dt-notation'></termref>PI&target;
</p>
</div2>
<div2 id='sec-cdata-sect'>
<head>CDATA</head>
<p><termdef id="dt-cdsection" term="CDATA Section"><term>CDATA</term>&markup;&escape;CDATA&string;"<code>&lt;![CDATA[</code>"&string;
"<code>]]&gt;</code>"
<scrap lang="ebnf">
<head>CDATA</head>
<prod id='NT-CDSect'><lhs>CDSect</lhs>
<rhs><nt def='NT-CDStart'>CDStart</nt>
<nt def='NT-CData'>CData</nt>
<nt def='NT-CDEnd'>CDEnd</nt></rhs></prod>
<prod id='NT-CDStart'><lhs>CDStart</lhs>
<rhs>'&lt;![CDATA['</rhs>
</prod>
<prod id='NT-CData'><lhs>CData</lhs>
<rhs>(<nt def='NT-Char'>Char</nt>* -
(<nt def='NT-Char'>Char</nt>* ']]&gt;' <nt def='NT-Char'>Char</nt>*))
</rhs>
</prod>
<prod id='NT-CDEnd'><lhs>CDEnd</lhs>
<rhs>']]&gt;'</rhs>
</prod>
</scrap>
CDATA<nt def='NT-CDEnd'>CDEnd</nt>&markup;&left-angle-bracket;&literal;"<code>&amp;lt;</code>""<code>&amp;amp;</code>"&escape;CDATA
</termdef>
</p>
<p>"<code>&lt;greeting></code>""<code>&lt;/greeting></code>"<termref def='dt-markup'>&markup;</termref><termref def='dt-chardata'></termref>CDATA
<eg>&lt;![CDATA[&lt;greeting>Hello, world!&lt;/greeting>]]&gt;</eg>
</p>
</div2>
<div2 id='sec-prolog-dtd'>
<head>&prolog;</head>
<p><termdef id='dt-xmldecl' term='XML Declaration'>XMLXML&version;<term>XML</term>
</termdef>
</p>
<p>&TR-or-Rec;&version;&version; "<code>1.0</code>" &TR-or-Rec;&version;"<code>1.0</code>"&error;&TR-or-Rec;&version;"<code>1.0</code>"XMLXML&version;&version;&version;&processor;&version;&error;
</p>
<p>XML&markup;XML<termref def="dt-doctype"></termref><!-- old
The function of the markup in an XML document is to describe its
storage and logical structures, and associate attribute-value pairs with the
logical structure.
XML provides a
mechanism, the <termref def="dt-doctype">document type declaration</termref>,
to
define constraints on that logical structure and to support the use of
predefined storage units. --><termdef id="dt-valid" term="Validity">XML<term>&valid;</term>
</termdef></p>
<p><termref def="dt-element"></termref>
<scrap lang="ebnf" id='xmldoc'>
<head>&prolog;</head>
<prodgroup pcw2="6" pcw4="17.5" pcw5="9">
<prod id='NT-prolog'><lhs>prolog</lhs>
<rhs><nt def='NT-XMLDecl'>XMLDecl</nt>?
<nt def='NT-Misc'>Misc</nt>*
(<nt def='NT-doctypedecl'>doctypedecl</nt>
<nt def='NT-Misc'>Misc</nt>*)?</rhs></prod>
<prod id='NT-XMLDecl'><lhs>XMLDecl</lhs>
<rhs>&xmlpio;
<nt def='NT-VersionInfo'>VersionInfo</nt>
<nt def='NT-EncodingDecl'>EncodingDecl</nt>?
<nt def='NT-SDDecl'>SDDecl</nt>?
<nt def="NT-S">S</nt>?
&pic;</rhs>
</prod>
<prod id='NT-VersionInfo'><lhs>VersionInfo</lhs>
<rhs><nt def="NT-S">S</nt> 'version' <nt def='NT-Eq'>Eq</nt>
('"<nt def="NT-VersionNum">VersionNum</nt>"'
| "'<nt def="NT-VersionNum">VersionNum</nt>'")</rhs>
</prod>
<prod id='NT-Eq'><lhs>Eq</lhs>
<rhs><nt def='NT-S'>S</nt>? '=' <nt def='NT-S'>S</nt>?</rhs></prod>
<prod id="NT-VersionNum">
<lhs>VersionNum</lhs>
<rhs>([a-zA-Z0-9_.:] | '-')+</rhs>
</prod>
<prod id='NT-Misc'><lhs>Misc</lhs>
<rhs><nt def='NT-Comment'>Comment</nt> | <nt def='NT-PI'>PI</nt> |
<nt def='NT-S'>S</nt></rhs></prod>
</prodgroup>
</scrap></p>
<p>
XML<termref def="dt-wellformed">&well-formed;</termref><termref def="dt-valid">&valid;</termref>
<eg><![CDATA[<?xml version="1.0"?>
<greeting>Hello, world!</greeting>
]]></eg>
<eg><![CDATA[<greeting>Hello, world!</greeting>
]]></eg>
</p>
<p><termdef id="dt-doctype" term="Document Type Declaration">
XML<term></term><termref def='dt-markupdecl'>&markup;</termref><term>DTD</term>&markup;&subset;(<termref def='dt-extent'></termref>)&subset;&markup;DTD&subset;</termdef>
</p>
<p><termdef id="dt-markupdecl" term="markup declaration">
<term>&markup;</term><termref def="dt-eldecl"></termref> <termref def="dt-attdecl"></termref><termref def="dt-entdecl"></termref><termref def="dt-notdecl"></termref></termdef>&well-formed;&validity;<termref def='dt-PE'>&parameter;</termref><titleref xml-link="simple" href="sec-physical-struct"></titleref></p>
<scrap lang="ebnf" id='dtd'>
<head></head>
<prodgroup pcw2="6" pcw4="17.5" pcw5="9">
<prod id='NT-doctypedecl'><lhs>doctypedecl</lhs>
<rhs>'&lt;!DOCTYPE' <nt def='NT-S'>S</nt>
<nt def='NT-Name'>Name</nt> (<nt def='NT-S'>S</nt>
<nt def='NT-ExternalID'>ExternalID</nt>)?
<nt def='NT-S'>S</nt>? ('['
(<nt def='NT-markupdecl'>markupdecl</nt>
| <nt def='NT-PEReference'>PEReference</nt>
| <nt def='NT-S'>S</nt>)*
']'
<nt def='NT-S'>S</nt>?)? '>'</rhs>
<vc def="vc-roottype"/>
<!--<vc def="vc-nonnullDTD"/>-->
</prod>
<!--
<prod id='NT-markupdecls'><lhs>markupdecls</lhs>
<rhs>
(<nt def='NT-S'>S</nt>?
<nt def='NT-markupdecl'>markupdecl</nt>
<nt def='NT-S'>S</nt>?)*
</rhs></prod>
-->
<prod id='NT-markupdecl'><lhs>markupdecl</lhs>
<rhs><nt def='NT-elementdecl'>elementdecl</nt>
| <nt def='NT-AttlistDecl'>AttlistDecl</nt>
| <nt def='NT-EntityDecl'>EntityDecl</nt>
| <nt def='NT-NotationDecl'>NotationDecl</nt>
| <nt def='NT-PI'>PI</nt>
| <nt def='NT-Comment'>Comment</nt>
<!--| <nt def='NT-InternalPERef'>InternalPERef</nt> --></rhs>
<vc def='vc-PEinMarkupDecl'/>
<wfc def="wfc-PEinInternalSubset"/>
</prod>
<!--
<prod id="NT-InternalPERef"><lhs>InternalPERef</lhs>
<rhs><nt def="NT-PEReference">PEReference</nt></rhs>
<wfc def="wfc-integraldec"/>
</prod>
-->
</prodgroup>
</scrap>
<vcnote id="vc-roottype">
<head>&root;</head>
<p>
<nt def='NT-Name'>Name</nt>&root;&match;
</p>
</vcnote>
<!--
<vcnote id="vc-nonnullDTD">
<head>Non-null DTD</head>
<p>
The internal and external subsets of the DTD must not both
be empty.
</p>
</vcnote>
-->
<vcnote id='vc-PEinMarkupDecl'>
<head>&parameter;</head>
<p>&parameter;<termref def='dt-repltext'>&replacement-text;</termref>&markup;&markup;(<nt def='NT-markupdecl'>markupdecl</nt>)<termref def='dt-PERef'>&parameter;</termref>&replacement-text;&replacement-text;</p>
</vcnote>
<wfcnote id="wfc-PEinInternalSubset">
<head>&subset;&parameter;</head>
<p>DTD&subset;<termref def='dt-PERef'>&parameter;</termref>&markup;&markup;(&parameter;&subset;)
</p>
</wfcnote>
<p>
&subset;&subset;DTD&parameter;<nt def="NT-markupdecl">markupdecl</nt>&markup;&markup;<termref def="dt-PERef">&parameter;</termref>&subset;&parameter;<termref def="dt-cond-section"></termref>
<!--In the external subset, however, parameter-entity references can
be used to replace constructs prefixed by "<code>%</code>" in a production of
the grammar, and <termref def="dt-cond-section">conditional sections</termref>
may occur.
In the internal subset, by contrast, conditional sections may not
occur and the only parameter-entity references
allowed are those which match the non-terminal
<nt def="NT-InternalPERef">InternalPERef</nt>
within the rule for <nt def="NT-doctypedecl">markupdecl</nt>.
-->
<scrap id="ext-Subset">
<head>&subset;</head>
<prodgroup pcw2="6" pcw4="17.5" pcw5="9">
<prod id='NT-extSubset'><lhs>extSubset</lhs>
<rhs>(
<nt def='NT-markupdecl'>markupdecl</nt>
| <nt def='NT-conditionalSect'>conditionalSect</nt>
| <nt def='NT-PEReference'>PEReference</nt>
| <nt def='NT-S'>S</nt>
)*</rhs>
</prod>
</prodgroup>
</scrap></p>
<p>&subset;&parameter;&parameter;&markup;<emph></emph>&markup;<emph></emph>&subset;
</p>
<p>XML
<eg><![CDATA[<?xml version="1.0"?>
<!DOCTYPE greeting SYSTEM "hello.dtd">
<greeting>Hello, world!</greeting>
]]></eg>
<termref def="dt-sysid">&identifier;</termref> "<code>hello.dtd</code>"DTDURI</p>
<p>
<eg><![CDATA[<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE greeting [
<!ELEMENT greeting (#PCDATA)>
]>
<greeting>Hello, world!</greeting>
]]></eg>
&subset;&subset;&subset;&subset;<!--* 'is considered to'? boo. whazzat mean? -->&subset;&subset;
</p>
</div2>
<div2 id='sec-rmd'>
<head>&standalone;</head>
<p><termref def="dt-xml-proc">XML&processor;</termref>&application;&markup;&default-value;XML&standalone;&markup;&markup;
<scrap lang="ebnf" id='fulldtd'>
<head>&standalone;</head>
<prodgroup pcw2="4" pcw4="19.5" pcw5="9">
<prod id='NT-SDDecl'><lhs>SDDecl</lhs>
<rhs>
<nt def="NT-S">S</nt>
'standalone' <nt def='NT-Eq'>Eq</nt> "'" ('yes' | 'no') "'"
</rhs>
<rhs>
| <nt def="NT-S">S</nt>
'standalone' <nt def='NT-Eq'>Eq</nt> '"' ('yes' | 'no') '"'
</rhs><vc def='vc-check-rmd'/></prod>
</prodgroup>
</scrap></p>
<p>&standalone;, "<code>yes</code>"<termref def='dt-docent'></termref>DTD&subset;&subset;XML&processor;&application;&markup;"<code>no</code>"&markup;&standalone;<emph></emph>&standalone;</p>
<p>&markup;&standalone;&markup;&standalone;<code>"no"</code> </p>
<p>XML <code>standalone="no"</code> &standalone;&application;</p>
<vcnote id='vc-check-rmd'>
<head>&standalone;</head>
<p>&standalone;&markup; "<code>no</code>"
<ulist>
<item><p>a) <termref def="dt-default">&default;</termref></p></item>
<item><p>b) &magicents;<termref def="dt-entref"></termref></p>
</item>
<item><p>c) <titleref href='AVNormalize'></titleref></p></item>
<item>
<p>d) <termref def="dt-elemcontent"></termref>
</p></item>
</ulist>
</p>
</vcnote>
<p>&standalone;XML
<eg>&lt;?xml version="&XML.version;" standalone='yes'?></eg></p>
</div2>
<div2 id='sec-white-space'>
<head></head>
<p>XML&markup;(&space;&TR-or-Rec;<nt def='NT-S'>S</nt>)&version;&version;</p>
<p><termref def='dt-xml-proc'>XML&processor;</termref>&markup;&application;<termref def='dt-validating'>&validating;XML&processor;</termref><termref def="dt-elemcontent"></termref>&markup;&application;</p>
<p>
"<code>xml:space</code>"<termref def='dt-attr'></termref></p>
<p>&valid;<termref def="dt-attdecl"></termref>"<code>default</code>" "<code>preserve</code>"<termref def='dt-enumerated'></termref>
</p>
<p>"<code>default</code>"&application;&default;"<code>preserve</code>"&application;"<code>xml:space</code>" </p>
<p><termref def='dt-root'>&root;</termref>&default-value;&application;</p>
<p>
<eg><![CDATA[ <!ATTLIST poem xml:space (default|preserve) 'preserve'>]]></eg>
</p>
</div2>
<div2 id='sec-line-ends'>
<head></head>
<p>XML<termref def='dt-parsedent'>&parsed-entity;</termref><code>CR</code> (#xD) <code>LF</code> (#xA)</p>
<p><termref def='dt-app'>&application;</termref>&parsed-entity;&parsed-entity;&literal;"<code>#xD#xA</code>" &literal;<code>#xD</code>&literal;<termref def='dt-xml-proc'>XML&processor;</termref>&application;<code>#xA</code>()</p>
</div2>
<div2 id='sec-lang-tag'>
<head>&language-identification;</head>
<p><!--"Espa&#x00F1;ol" "EBNF"--><!-- x00F1: spanish's small ntilde--></p>
<!--; -->
<p>XML<!---->"<code>xml:lang</code>" <termref def="dt-attr"></termref>
<!--; XMLXML&application;-->
<bibref ref="RFC1766"/>RFC1766&language-identification;&language-identification;
<scrap lang='ebnf'>
<head>&language-identification;</head>
<prod id='NT-LanguageID'><lhs>LanguageID</lhs>
<rhs><nt def='NT-Langcode'>Langcode</nt>
('-' <nt def='NT-Subcode'>Subcode</nt>)*</rhs></prod>
<prod id='NT-Langcode'><lhs>Langcode</lhs>
<rhs><nt def='NT-ISO639Code'>ISO639Code</nt> |
<nt def='NT-IanaCode'>IanaCode</nt> |
<nt def='NT-UserCode'>UserCode</nt></rhs>
</prod>
<prod id='NT-ISO639Code'><lhs>ISO639Code</lhs>
<rhs>([a-z] | [A-Z]) ([a-z] | [A-Z])</rhs></prod>
<prod id='NT-IanaCode'><lhs>IanaCode</lhs>
<rhs>('i' | 'I') '-' ([a-z] | [A-Z])+</rhs></prod>
<prod id='NT-UserCode'><lhs>UserCode</lhs>
<rhs>('x' | 'X') '-' ([a-z] | [A-Z])+</rhs></prod>
<prod id='NT-Subcode'><lhs>Subcode</lhs>
<rhs>([a-z] | [A-Z])+</rhs></prod>
</scrap>
<nt def='NT-Langcode'>Langcode</nt>
<ulist>
<item><p>a) <bibref ref="ISO639"/>2&language-code;</p></item>
<item><p>b) Internet Assigned Numbers Authority (IANA)&language-code; "<code>i-</code>" ("<code>I-</code>")</p></item>
<item><p>c) &user;&language-code;IANA"<code>x-</code>" "<code>X-</code>" </p></item>
</ulist></p>
<p><nt def='NT-Subcode'>Subcode</nt><bibref ref="ISO3166"/>ISO3166()3<nt def='NT-Langcode'>Langcode</nt>"<code>x-</code>" "<code>X-</code>"IANA</p>
<p>&language-code;&country-code;()XML</p>
<p>
<eg><![CDATA[<p xml:lang="en">The quick brown fox jumps over the lazy dog.</p>
<p xml:lang="en-GB">What colour is it?</p>
<p xml:lang="en-US">What color is it?</p>
<sp who="Faust" desc='leise' xml:lang="de">
<l>Habe nun, ach! Philosophie,</l>
<l>Juristerei, und Medizin</l>
<l>und leider auch Theologie</l>
<l>]]><!-- x00DF german's es-zet; x00FC german's u-umlaut -->durchaus studiert mit hei&#223;em Bem&#252;h'n.<![CDATA[</l>
</sp>]]></eg></p>
<!--<p>xml:lang (&default-value;)(CDATA)-->
<p><code>xml:lang</code><code>xml:lang</code></p>
<!-- xml:lang DTD&default-value;xml:lang <term>xml:lang
<p xml:lang="en">Here the keywords are
<term xml:lang="en">shift</term> and
<term>reduce</term>. ...</p>
XML&processor;&application;
-->
<p>
&valid;&TR-or-Rec;
<eg>xml:lang NMTOKEN #IMPLIED</eg>
&default-value;xml:lang
<eg><![CDATA[ <!ATTLIST poem xml:lang NMTOKEN 'fr'>
<!ATTLIST gloss xml:lang NMTOKEN 'en'>
<!ATTLIST note xml:lang NMTOKEN 'en'>]]></eg>
</p>
<!--
DTD()()()&application;&TR-or-Rec;-->
</div2>
</div1>
<!-- &Elements; -->
<div1 id='sec-logical-struct'>
<head></head>
<p><termdef id="dt-element" term="Element"><termref def="dt-xml-doc">XML</termref><term></term>, <termref def="dt-stag"></termref><termref def="dt-etag"></termref><termref def="dt-empty"></termref><termref def="dt-eetag"></termref>(&identifier;(generic identifier)GI)&identified;</termdef><termref def="dt-attrname"></termref><termref def="dt-attrval"></termref></p>
<scrap lang='ebnf'><head></head>
<prod id='NT-element'><lhs>element</lhs>
<rhs><nt def='NT-EmptyElemTag'>EmptyElemTag</nt></rhs>
<rhs>| <nt def='NT-STag'>STag</nt> <nt def='NT-content'>content</nt>
<nt def='NT-ETag'>ETag</nt></rhs><wfc def='GIMatch'/></prod>
</scrap>
<p>&TR-or-Rec;()<code>(('X'|'x')('M'|'m')('L'|'l'))</code>&match;&TR-or-Rec;</p>
<wfcnote id='GIMatch'><head>&match;</head>
<p><nt def='NT-Name'></nt>&match;</p>
</wfcnote>
<div2 id='sec-starttags'>
<head></head>
<p><termdef id="dt-stag" term="Start-Tag">XML<term></term>&markup;
<scrap lang='ebnf'><head></head>
<prodgroup pcw2="6" pcw4="15" pcw5="11.5">
<prod id='NT-STag'><lhs>STag</lhs><rhs>'&lt;' <nt def='NT-Name'>Name</nt> (<nt def='NT-S'>S</nt> <nt def='NT-Attribute'>Attribute</nt>)* <nt def='NT-S'>S</nt>? '>'</rhs><wfc def="uniqattspec"/></prod>
<prod id='NT-Attribute'><lhs>Attribute</lhs><rhs><nt def='NT-Name'>Name</nt> <nt def='NT-Eq'>Eq</nt> <nt def='NT-AttValue'>AttValue</nt></rhs><vc def='ValueType'/><wfc def='NoExternalRefs'/><wfc def='CleanAttrVals'/></prod>
</prodgroup>
</scrap>
<nt def='NT-Name'>Name</nt><term></term></termdef><termdef id="dt-attr" term="Attribute"><nt def='NT-Name'>Name</nt><nt def='NT-AttValue'>AttValue</nt><term></term></termdef><termdef id="dt-attrname" term="Attribute Name"><nt def='NT-Name'>Name</nt><term></term></termdef><termdef id="dt-attrval" term="Attribute Value"><nt def='NT-AttValue'>AttValue</nt>(<code>'</code><code>"</code>&string;)<term></term></termdef></p>
<wfcnote id='uniqattspec'><head></head>
<p></p></wfcnote>
<vcnote id='ValueType'><head></head>
<p>(<titleref href='AttDecls'></titleref>)</p></vcnote>
<wfcnote id='NoExternalRefs'><head></head>
<p></p></wfcnote>
<wfcnote id='CleanAttrVals'><head><code>&lt;</code></head>
<p>(<code>&amp;lt;</code>)<termref def='dt-repltext'>&replacement-text;</termref><code>&lt;</code></p></wfcnote>
<p>
<eg>&lt;termdef id="dt-dog" term="dog"></eg></p>
<p><termdef id="dt-etag" term="End Tag"><term></term>&markup;
<scrap lang='ebnf'><head></head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id='NT-ETag'><lhs>ETag</lhs><rhs>'&lt;/' <nt def='NT-Name'>Name</nt> <nt def='NT-S'>S</nt>? '>'</rhs></prod></prodgroup></scrap></termdef></p>
<p>
<eg>&lt;/termdef></eg></p>
<p><termdef id="dt-content" term="Content"><termref def='dt-text'></termref><term></term>
<scrap lang='ebnf'><head></head>
<prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id='NT-content'><lhs>content</lhs><rhs>(<nt def='NT-element'>element</nt> | <nt def='NT-CharData'>CharData</nt> | <nt def='NT-Reference'>Reference</nt> | <nt def='NT-CDSect'>CDSect</nt> | <nt def='NT-PI'>PI</nt> | <nt def='NT-Comment'>Comment</nt>)*</rhs></prod></prodgroup></scrap></termdef></p>
<p><termdef id="dt-empty" term="Empty"><term></term></termdef><termdef id="dt-eetag" term="empty-element tag"><term></term>
<scrap lang='ebnf'><head></head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id='NT-EmptyElemTag'><lhs>EmptyElemTag</lhs><rhs>'&lt;' <nt def='NT-Name'>Name</nt> (<nt def='NT-S'>S</nt> <nt def='NT-Attribute'>Attribute</nt>)* <nt def='NT-S'>S</nt>? '/&gt;'</rhs><wfc def="uniqattspec"/></prod></prodgroup></scrap></termdef></p>
<!-- (tags for empty elements)(empty-element tag) -->
<p><kw>EMPTY</kw></p>
<p>
<eg>&lt;IMG align="left" src="http://www.w3.org/Icons/WWW/w3c_home" />&lt;br>&lt;/br>&lt;br/></eg></p>
</div2>
<div2 id='elemdecls'><head></head>
<p><termref def="dt-valid">&validity;</termref><termref def="dt-xml-doc">XML</termref><termref def="dt-element"></termref></p>
<p><termref def="dt-content"></termref></p>
<p><termref def="dt-parentchild"></termref>&at-user-option;XML&processor;&error;</p>
<p><termdef id="dt-eldecl" term="Element Type declaration"><term></term>
<scrap lang='ebnf'><head></head><prodgroup pcw2="5.5" pcw4="18" pcw5="9">
<prod id='NT-elementdecl'><lhs>elementdecl</lhs>
<rhs>'&lt;!ELEMENT' <nt def='NT-S'>S</nt>
<nt def='NT-Name'>Name</nt>
<nt def='NT-S'>S</nt>
<nt def='NT-contentspec'>contentspec</nt>
<nt def='NT-S'>S</nt>? '>'</rhs>
<vc def='EDUnique'/></prod>
<prod id='NT-contentspec'><lhs>contentspec</lhs>
<rhs>'EMPTY'
| 'ANY'
| <nt def='NT-Mixed'>Mixed</nt>
| <nt def='NT-children'>children</nt>
</rhs>
<vc def='elementvalid'/>
</prod>
</prodgroup>
</scrap>
<nt def='NT-Name'>Name</nt></termdef></p>
<vcnote id='EDUnique'><head></head>
<p></p></vcnote>
<vcnote id='elementvalid'><head>&validity;</head>
<p><!-- said to be -->&valid;<nt def='NT-elementdecl'>elementdecl</nt>&match;<nt def='NT-Name'>Name</nt>&match;<ulist>
<item><p>a) <kw>EMPTY</kw>&match;<termref def='dt-content'></termref></p></item>
<item><p>b) <nt def='NT-children'>children</nt>&match;<termref def="dt-parentchild"></termref></p></item>
<item><p>c) <nt def='NT-Mixed'>mixed</nt>&match;<termref def='dt-chardata'></termref><termref def='dt-parentchild'></termref>&match;</p></item>
<item><p>d) <kw>ANY</kw>&match;<termref def='dt-parentchild'></termref></p></item></ulist>
</p></vcnote>
<!-- with the new VC, I don't think the next few paras add anything -TWB
<p><termdef id="dt-model" term="content model">An element can declared using a <term>content model</term>, in which case its content can be categorized as <termref def="dt-elemcontent">element content</termref> or <termref def='dt-mixed'>mixed content</termref>, as explained below.</termdef></p>
<p>An element whose type declared using the keyword <kw>EMPTY</kw> must be <termref def="dt-empty">empty</termref> and may be tagged using an <termref def="dt-eetag">empty-element tag</termref> when it appears in the document.</p>
<p>If an element type is declared using the keyword <kw>ANY</kw>, then there are no validity constraints on its content: it may contain <termref def='dt-parentchild'>child elements</termref> of any type and number, interspersed with character data.</p> -->
<p>
<eg>
&lt;!ELEMENT br EMPTY>
&lt;!ELEMENT p (#PCDATA|emph)* >
&lt;!ELEMENT %name.para; %content.para; >
&lt;!ELEMENT container ANY>
</eg></p>
<div3 id='sec-element-content'><head></head>
<p><termdef id='dt-elemcontent' term='Element content'><termref def='dt-parentchild'></termref>()<termref def="dt-stag"></termref><term></term></termdef>&content-particle;(<nt def='NT-cp'>cp</nt>s)&content-particle;&content-particle;&content-particle;
<scrap lang='ebnf'><head></head><prodgroup pcw2="5.5" pcw4="16" pcw5="11">
<prod id='NT-children'><lhs>children</lhs><rhs>(<nt def='NT-choice'>choice</nt> | <nt def='NT-seq'>seq</nt>) ('?' | '*' | '+')?</rhs></prod><prod id='NT-cp'><lhs>cp</lhs><rhs>(<nt def='NT-Name'>Name</nt> | <nt def='NT-choice'>choice</nt> | <nt def='NT-seq'>seq</nt>) ('?' | '*' | '+')?</rhs></prod>
<prod id='NT-choice'><lhs>choice</lhs><rhs>'(' <nt def='NT-S'>S</nt>? cp ( <nt def='NT-S'>S</nt>? '|' <nt def='NT-S'>S</nt>? <nt def='NT-cp'>cp</nt> )*<nt def='NT-S'>S</nt>? ')'</rhs><vc def='vc-PEinGroup'/></prod>
<prod id='NT-seq'><lhs>seq</lhs><rhs>'(' <nt def='NT-S'>S</nt>? cp ( <nt def='NT-S'>S</nt>? ',' <nt def='NT-S'>S</nt>? <nt def='NT-cp'>cp</nt> )*<nt def='NT-S'>S</nt>? ')'</rhs><vc def='vc-PEinGroup'/></prod>
<!-- <prod id='NT-cps'><lhs>cps</lhs><rhs><nt def='NT-S'>S</nt>? <nt def='NT-cp'>cp</nt> <nt def='NT-S'>S</nt>?</rhs></prod>
<prod id='NT-choice'><lhs>choice</lhs><rhs>'(' <nt def='NT-S'>S</nt>? <nt def='NT-ctokplus'>ctokplus</nt> (<nt def='NT-S'>S</nt>? '|' <nt def='NT-S'>S</nt>? <nt def='NT-ctoks'>ctoks</nt>)* <nt def='NT-S'>S</nt>? ')'</rhs></prod>
<prod id="NT-ctokplus"><lhs>ctokplus</lhs><rhs><nt def="NT-cps">cps</nt>('|' <nt def="NT-cps">cps</nt>)+</rhs></prod>
<prod id="NT-ctoks"><lhs>ctoks</lhs><rhs><nt def="NT-cps">cps</nt>('|' <nt def="NT-cps">cps</nt>)*</rhs></prod>
<prod id='NT-seq'><lhs>seq</lhs><rhs>'(' <nt def='NT-S'>S</nt>?<nt def='NT-stoks'>stoks</nt> (<nt def='NT-S'>S</nt>? ',' <nt def='NT-S'>S</nt>? <nt def='NT-stoks'>stoks</nt>)*<nt def='NT-S'>S</nt>? ')'</rhs></prod>
<prod id="NT-stoks"><lhs>stoks</lhs><rhs><nt def="NT-cps">cps</nt>(',' <nt def="NT-cps">cps</nt>)*</rhs></prod>
-->
</prodgroup></scrap>
<nt def='NT-Name'>Name</nt><termref def="dt-parentchild"></termref>&content-particle;<termref def="dt-elemcontent"></termref>&content-particle;<termref def="dt-elemcontent"></termref><!-- character -->&content-particle;1(<code>+</code>)0(<code>*</code>)01(<code>?</code>)&TR-or-Rec;</p>
<!-- namelistlistname -->
<p>&match;&match;<termref def='dt-compat'></termref>&match;&error;<titleref xml-link="simple" href="determinism"></titleref>
<!-- appendix <specref ref="determinism"/>. -->
<!-- appendix on deterministic content models. -->
</p>
<vcnote id='vc-PEinGroup'>
<head></head>
<p><termref def='dt-repltext'>&replacement-text;</termref>&parenthesis;<nt def='NT-choice'></nt><nt def='NT-seq'></nt><nt def='NT-Mixed'></nt>&left-parenthesis;&right-parenthesis;<termref def='dt-PERef'></termref>&replacement-text;&replacement-text;</p>
<p><termref def='dt-interop'></termref><nt def='NT-choice'></nt><nt def='NT-seq'></nt><nt def='NT-Mixed'></nt>&replacement-text;&replacement-text;(<code>|</code><code>,</code>)
</p>
</vcnote>
<p>
<eg>&lt;!ELEMENT spec (front, body, back?)>
&lt;!ELEMENT div1 (head, (p | list | note)*, div2*)>
&lt;!ELEMENT dictionary-body (%div.mix; | %dict.mix;)*></eg></p>
</div3>
<div3 id='sec-mixed-content'>
<head>&mixed-content;</head>
<p><termdef id='dt-mixed' term='Mixed Content'><termref def="dt-parentchild"></termref><termref def='dt-stag'></termref><term>&mixed-content;</term></termdef><!-- may be constrained -->
<scrap lang='ebnf'>
<head>&mixed-content;</head>
<prodgroup pcw2="5.5" pcw4="16" pcw5="11">
<prod id='NT-Mixed'><lhs>Mixed</lhs>
<rhs>'(' <nt def='NT-S'>S</nt>?
'#PCDATA'
(<nt def='NT-S'>S</nt>?
'|'
<nt def='NT-S'>S</nt>?
<nt def='NT-Name'>Name</nt>)*
<nt def='NT-S'>S</nt>?
')*' </rhs>
<rhs>| '(' <nt def='NT-S'>S</nt>? '#PCDATA' <nt def='NT-S'>S</nt>? ')'
</rhs><vc def='vc-PEinGroup'/>
<vc def='vc-MixedChildrenUnique'/>
</prod>
<!--
<prod id="NT-Mtoks"><lhs>Mtoks</lhs>
<rhs><nt def="NT-Name">Name</nt>
(<nt def='NT-S'>S</nt>?
'|'
<nt def='NT-S'>S</nt>?
<nt def="NT-Name">Name</nt>)*
</rhs>
</prod>
-->
</prodgroup>
</scrap>
<nt def='NT-Name'>Name</nt>
</p>
<vcnote id='vc-MixedChildrenUnique'>
<head></head>
<p>&mixed-content;
</p></vcnote>
<p>&mixed-content;
<eg>&lt;!ELEMENT p (#PCDATA|a|ul|b|i|em)*>
&lt;!ELEMENT p (#PCDATA | %font; | %phrase; | %special; | %form;)* >
&lt;!ELEMENT b (#PCDATA)></eg></p>
</div3>
</div2>
<div2 id='attdecls'>
<head></head>
<p>
<termref def="dt-attr"></termref><termref def="dt-element"></termref><termref def="dt-stag"></termref><termref def="dt-eetag"></termref><titleref href='sec-starttags'></titleref>
<ulist>
<item><p>a) </p></item>
<item><p>b) </p></item>
<item><p>c) <termref def="dt-default">&default-value;</termref></p></item>
</ulist>
</p>
<p>
<termdef id="dt-attdecl" term="Attribute-List Declaration">
<term></term>()&default-value;
<scrap lang='ebnf'>
<head></head>
<prod id='NT-AttlistDecl'><lhs>AttlistDecl</lhs>
<rhs>'&lt;!ATTLIST' <nt def='NT-S'>S</nt>
<nt def='NT-Name'>Name</nt>
<nt def='NT-AttDef'>AttDef</nt>*
<nt def='NT-S'>S</nt>? '&gt;'</rhs>
</prod>
<prod id='NT-AttDef'><lhs>AttDef</lhs>
<rhs><nt def='NT-S'>S</nt> <nt def='NT-Name'>Name</nt>
<nt def='NT-S'>S</nt> <nt def='NT-AttType'>AttType</nt>
<nt def='NT-S'>S</nt> <nt def='NT-Default'>Default</nt></rhs>
</prod>
</scrap>
<nt def='NT-AttlistDecl'>AttlistDecl</nt><nt def="NT-Name">Name</nt>&at-user-option;XML&processor;&error;
<nt def='NT-AttDef'>AttDef</nt><nt def='NT-Name'>Name</nt>
</termdef>
</p>
<p>
<nt def='NT-AttlistDecl'>AttlistDecl</nt><termref def='dt-interop'></termref>DTDXML&processor;&at-user-option;&error;
</p>
<div3 id='sec-attribute-types'>
<head></head>
<p>
XML&string;&token;&string;&string;&token;
<scrap lang='ebnf'>
<head>Attribute Types</head>
<prodgroup pcw4="14" pcw5="11.5">
<prod id='NT-AttType'><lhs>AttType</lhs>
<rhs><nt def='NT-StringType'>StringType</nt>
| <nt def='NT-TokenizedType'>TokenizedType</nt>
| <nt def='NT-EnumeratedType'>EnumeratedType</nt>
</rhs>
</prod>
<prod id='NT-StringType'><lhs>StringType</lhs>
<rhs>'CDATA'</rhs>
</prod>
<prod id='NT-TokenizedType'><lhs>TokenizedType</lhs>
<rhs>'ID'</rhs>
<vc def='id'/>
<vc def='one-id-per-el'/>
<vc def='id-default'/>
<rhs>| 'IDREF'</rhs>
<vc def='idref'/>
<rhs>| 'IDREFS'</rhs>
<vc def='idref'/>
<rhs>| 'ENTITY'</rhs>
<vc def='entname'/>
<rhs>| 'ENTITIES'</rhs>
<vc def='entname'/>
<rhs>| 'NMTOKEN'</rhs>
<vc def='nmtok'/>
<rhs>| 'NMTOKENS'</rhs>
<vc def='nmtok'/></prod>
</prodgroup>
</scrap>
</p>
<vcnote id='id' >
<head>ID</head>
<p>
<code>Name</code>&match;XMLID&identify;
</p>
</vcnote>
<vcnote id='one-id-per-el'>
<head>11ID</head>
<p>
ID
</p>
</vcnote>
<vcnote id='id-default'>
<head>ID&default;</head>
<p>
ID&default;<code>#IMPLIED</code><code>#REQUIRED</code>
</p>
</vcnote>
<vcnote id='idref'>
<head>IDREF</head>
<p>
<kw>IDREF</kw><nt def="NT-Name">Name</nt>&match;<kw>IDREFS</kw><nt def="NT-Names">Names</nt>&match;<nt def='NT-Name'>Name</nt>XMLID&match;<kw>IDREF</kw>ID&match;
</p>
</vcnote>
<vcnote id='entname'>
<head></head>
<p>
<kw>ENTITY</kw><nt def="NT-Name">Name</nt>&match;<kw>ENTITIES</kw><nt def="NT-Names">Names</nt>&match;<nt def="NT-Name">Name</nt><termref def="dt-doctype">DTD</termref><termref def="dt-unparsed">&unparsed-entity;</termref>&match;
</p>
</vcnote>
<vcnote id='nmtok'>
<head>&token;</head>
<p>
<kw>NMTOKEN</kw><termref def="NT-Nmtoken">Nmtoken</termref>&match;&string;<kw>NMTOKENS</kw><termref def="NT-Nmtokens">Nmtokens</termref>&match;&string;
</p>
</vcnote>
<p>
XML&processor;&application;<titleref href="AVNormalize"></titleref>
</p>
<p>
<termdef id='dt-enumerated' term='Enumerated Attribute Values'><term></term></termdef>2
<scrap lang='ebnf'>
<head></head>
<prod id='NT-EnumeratedType'><lhs>EnumeratedType</lhs>
<rhs><nt def='NT-NotationType'>NotationType</nt>
| <nt def='NT-Enumeration'>Enumeration</nt>
</rhs></prod>
<prod id='NT-NotationType'><lhs>NotationType</lhs>
<rhs>'NOTATION'
<nt def='NT-S'>S</nt>
'('
<nt def='NT-S'>S</nt>?
<nt def='NT-Name'>Name</nt>
(<nt def='NT-S'>S</nt>? '|' <nt def='NT-Name'>Name</nt>)*
<nt def='NT-S'>S</nt>? ')'
</rhs>
<vc def='notatn' /></prod>
<prod id='NT-Enumeration'><lhs>Enumeration</lhs>
<rhs>'(' <nt def='NT-S'>S</nt>?
<nt def='NT-Nmtoken'>Nmtoken</nt>
(<nt def='NT-S'>S</nt>? '|'
<nt def='NT-S'>S</nt>?
<nt def='NT-Nmtoken'>Nmtoken</nt>)*
<nt def='NT-S'>S</nt>?
')'</rhs>
<vc def='enum'/></prod>
</scrap>
</p>
<vcnote id='notatn'>
<head></head>
<p><titleref href='Notations'></titleref>&match;
</p>
</vcnote>
<vcnote id='enum'>
<head></head>
<p>
<nt def='NT-Nmtoken'>Nmtoken</nt>&token;&match;
</p>
</vcnote>
<p>
<termref def='dt-interop'></termref><nt def='NT-Nmtoken'>Nmtoken</nt>
</p>
</div3>
<div3 id='sec-attr-defaults'>
<head>&default;</head>
<p>
<termref def="dt-attdecl"></termref>XML&processor;
<scrap lang='ebnf'>
<head>&default;</head>
<prodgroup pcw4="14" pcw5="11.5">
<prod id='NT-Default'><lhs>Default</lhs>
<rhs>'#REQUIRED'
|&nbsp;'#IMPLIED' </rhs>
<rhs>| (('#FIXED' S)? <nt def='NT-AttValue'>AttValue</nt>)</rhs>
<vc def='defattrvalid'/>
<wfc def="CleanAttrVals"/>
</prod>
</prodgroup>
</scrap>
<!-- improved by bosak
<scrap lang='ebnf'>
<head>Attribute Defaults</head>
<prod id='NT-Default'><lhs>Default</lhs>
<rhs>'#REQUIRED'
|&nbsp;'#IMPLIED' </rhs>
<vc def='defattrvalid'/>
<wfc def="CleanAttrVals"/>
<rhs>| (('#FIXED' S)? <nt def='NT-AttValue'>AttValue</nt>)</rhs>
</prod>
</scrap>-->
</p>
<vcnote id='defattrvalid'>
<head>&default;</head>
<p>
&default-value;
</p>
</vcnote>
<p>
<kw>#REQUIRED</kw><termref def="dt-stag"></termref>XML&processor;<termref def="dt-valid">&valid;</termref><kw>#IMPLIED</kw>XML&processor;&application;
</p>
<p>
<termdef id="dt-default" term="Attribute Default">
<kw>#REQUIRED</kw><kw>#IMPLIED</kw><nt def='NT-AttValue'>AttValue</nt><term>&default-value;</term><kw>#FIXED</kw>&default-value;<termref def="dt-valid">&valid;</termref>&default-value;&default-value;XML&processor;
</termdef></p>
<p>
<eg>&lt;!ATTLIST termdef
id ID #REQUIRED
name CDATA #IMPLIED>
&lt;!ATTLIST list
type (bullets|ordered|glossary) "ordered">
&lt;!ATTLIST form
method CDATA #FIXED "POST"></eg></p>
</div3>
<div3 id='AVNormalize'>
<head></head>
<p>
XML&processor;&application;
<ulist>
<item>
<p>a) ()&string;&space-character;(#x20)(<titleref xml-link="simple" href="sec-line-ends"></titleref>)
</p></item>
<item>
<p>b) &parsed-entity;&error;
</p></item>
<item>
<p>c) <kw>CDATA</kw>&string;&space-character;(#x20)
</p></item>
</ulist>
&non-validating;&parser;<kw>CDATA</kw></p>
</div3>
</div2>
<div2 id='sec-condition-sect'>
<head></head>
<p>
<termdef id='dt-cond-section' term='conditional section'>
<term></term><termref def='dt-doctype'>&subset;</termref>DTD
</termdef>
<scrap lang='ebnf'>
<head></head>
<prodgroup pcw2="9" pcw4="14.5">
<prod id='NT-conditionalSect'><lhs>conditionalSect</lhs>
<rhs><nt def='NT-includeSect'>includeSect</nt>
| <nt def='NT-ignoreSect'>ignoreSect</nt>
</rhs>
</prod>
<prod id='NT-includeSect'><lhs>includeSect</lhs>
<rhs>'&lt;![' S? 'INCLUDE' S? '['
<!-- (<nt def='NT-markupdecl'>markupdecl</nt>
| <nt def="NT-conditionalSect">conditionalSect</nt>
| <nt def="NT-S">S</nt>)*
-->
<nt def="NT-extSubset">extSubset</nt>
']]&gt;'
</rhs>
</prod>
<prod id='NT-ignoreSect'><lhs>ignoreSect</lhs>
<rhs>'&lt;![' S? 'IGNORE' S? '['
<nt def="NT-ignoreSectContents">ignoreSectContents</nt>*
']]&gt;'</rhs>
</prod>
<prod id='NT-ignoreSectContents'><lhs>ignoreSectContents</lhs>
<rhs><nt def='NT-Ignore'>Ignore</nt>
('&lt;![' <nt def='NT-ignoreSectContents'>ignoreSectContents</nt>
']]&gt;'
<nt def='NT-Ignore'>Ignore</nt>)*</rhs></prod>
<prod id='NT-Ignore'><lhs>Ignore</lhs>
<rhs><nt def='NT-Char'>Char</nt>* -
(<nt def='NT-Char'>Char</nt>* ('&lt;![' | ']]&gt;')
<nt def='NT-Char'>Char</nt>*)
</rhs></prod>
<!--<rhs>
((<nt def='NT-SkipLit'>SkipLit</nt>
| <nt def='NT-Comment'>Comment</nt>
| <nt def='NT-PI'>PI</nt>) -
(<nt def='NT-Char'>Char</nt>* ']]&gt;' <nt def='NT-Char'>Char</nt>*))
| ('&lt;![' <nt def='NT-ignoreSectContents'>ignoreSectContents</nt>*
']]&gt;')
</rhs>
<rhs>
| (<nt def='NT-Char'>Char</nt> - (']' | [&lt;'"]))
</rhs>
<rhs>
| ('&lt;!' (<nt def='NT-Char'>Char</nt> - ('-' | '[')))
</rhs>-->
</prodgroup>
</scrap>
</p>
<p>DTD&subset;&subset;
</p>
<p>
<code>INCLUDE</code>XML&processor;<code>IGNORE</code>(IGNORE)()<code>INCLUDE</code><code>IGNORE</code>
</p>
<p>
XML&processor;
</p>
<p>
<eg>&lt;!ENTITY % draft 'INCLUDE' >
&lt;!ENTITY % final 'IGNORE' >
&lt;![%draft;[
&lt;!ELEMENT book (comments*, title, body, supplements?)>
]]&gt;
&lt;![%final;[
&lt;!ELEMENT book (title, body, supplements?)>
]]&gt;
</eg>
</p>
</div2>
<!--
<div2 id='sec-pass-to-app'>
<head>XML Processor Treatment of Logical Structure</head>
<p>When an XML processor encounters a start-tag, it must make
at least the following information available to the application:
<ulist>
<item>
<p>the element type's generic identifier</p>
</item>
<item>
<p>the names of attributes known to apply to this element type
(validating processors must make available names of all attributes
declared for the element type; non-validating processors must
make available at least the names of the attributes for which
values are specified.
</p>
</item>
</ulist>
</p>
</div2>
-->
</div1>
<!-- &Entities; -->
<div1 id='sec-physical-struct'>
<head></head>
<p>
<termdef id="dt-entity" term="Entity">
XML<term></term><term></term>()<termref def='dt-doctype'>DTD&subset;</termref><term></term>&identified;
</termdef>
<!-- Added for CFG -->
<!-- obscurity amputated by TWB -->
<!-- entire sentence amputated by CMSMcQ: no one but NO ONE is
ready for entities declared as
<!ENTITY foo "http://www.foo.com/bar.xml#id">
and it's pointless to suggest that it's possible
under current circumstances. -->
<!-- An entity may be stored in, -->
<!--but need not be coterminous with, -->
<!-- but need not comprise the whole of, -->
<!-- a single physical storage object such as a file or -->
<!-- database field. -->
<!-- End sentence added for CFG -->XML<termref def="dt-docent"></termref><termref def="dt-xml-proc">XML&processor;</termref></p>
<p>&parsed-entity;&unparsed-entity;<termdef id="dt-parsedent" term="Text Entity"><term>&parsed-entity;</term>&parsed-entity;<termref def='dt-repltext'>&replacement-text;</termref><termref def="dt-text"></termref>
</termdef>
</p>
<p>
<termdef id="dt-unparsed" term="Unparsed Entity">
<term>&unparsed-entity;</term><termref def='dt-text'></termref>XML&unparsed-entity;<termref def="dt-notation"></termref>&identified;&identifier;XML&processor;&application;XML&unparsed-entity;
</termdef>
</p>
<p>&parsed-entity;&unparsed-entity;<kw>ENTITY</kw><kw>ENTITIES</kw></p>
<p>
<termdef id='gen-entity' term='general entity'><term></term>&parsed-entity;&TR-or-Rec;<emph></emph></termdef><termdef id='dt-PE' term='Parameter entity'>DTD&parsed-entity;</termdef></p>
<!--
<div2 id='sec-synchro'>
<head>Logical and Physical Structures</head>
<p>The logical and physical structures (elements and entities)
in an XML document must
be properly nested.
<termref def='dt-stag'>Tags</termref> and <termref def='dt-element'>elements</termref> must
each begin and end in the same <termref def='dt-entity'>entity</termref>, but may
refer to other
entities internally; <termref def='dt-comment'>comments</termref>,
<termref def='dt-pi'>processing instructions</termref>,
<termref def='dt-charref'>character
references</termref>, and
<termref def='dt-entref'>entity references</termref> must each be contained entirely
within a single entity. Entities must each contain an integral number
of elements, comments, processing instructions, and references,
possibly together with character data not contained within any element
in the entity, or else they must contain non-textual data, which by
definition contains no elements.</p></div2>
-->
<div2 id='sec-references'>
<head></head>
<p>
<termdef id="dt-charref" term="Character Reference">
<term></term>ISO/IEC 10646
<scrap lang='ebnf'>
<head></head>
<prod id='NT-CharRef'><lhs>CharRef</lhs>
<rhs>'&amp;#' [0-9]+ ';' </rhs>
<rhs>| '&hcro;' [0-9a-fA-F]+ ';'</rhs>
<wfc def="wf-Legalchar"/>
</prod>
</scrap>
<wfcnote id="wf-Legalchar">
<head></head>
<p><termref def="NT-Char">Char</termref></p>
</wfcnote>
"<code>&amp;#x</code>" "<code>;</code>" ISO/IEC 10646 16
<!--: letter -->
"<code>&amp;#</code>" "<code>;</code>" 10
</termdef>
</p>
<p>
<termdef id="dt-entref" term="Entity Reference">
<term></term></termdef><termdef id='dt-GERef' term='General Entity Reference'>(<code>&amp;</code>)(<code>;</code>)/termdef><termdef id='dt-PERef' term='Parameter-entity reference'><term></term>(<code>%</code>)(<code>;</code>)
</termdef>
</p>
<scrap lang="ebnf">
<head></head>
<prod id='NT-Reference'><lhs>Reference</lhs>
<rhs><nt def='NT-EntityRef'>EntityRef</nt>
| <nt def='NT-CharRef'>CharRef</nt></rhs></prod>
<prod id='NT-EntityRef'><lhs>EntityRef</lhs>
<rhs>'&amp;' <nt def='NT-Name'>Name</nt> ';'</rhs>
<wfc def='wf-entdeclared'/>
<vc def='vc-entdeclared'/>
<wfc def='textent'/>
<wfc def='norecursion'/>
</prod>
<prod id='NT-PEReference'><lhs>PEReference</lhs>
<rhs>'%' <nt def='NT-Name'>Name</nt> ';'</rhs>
<wfc def='wf-entdeclared'/>
<vc def='vc-entdeclared'/>
<wfc def='textent'/>
<wfc def='norecursion'/>
<wfc def='indtd'/>
</prod>
</scrap>
<wfcnote id='wf-entdeclared'>
<head></head>
<p>DTDDTD&subset; "<code>standalone='yes'</code>" <nt def='NT-Name'>Name</nt> <termref def="dt-match">&match;</termref>&well-formed;&magicents; &default-value;</p>
<p>&subset;&non-validating;&processor;<titleref href='include-if-valid'></titleref>&well-formed;
</p>
</wfcnote>
<vcnote id="vc-entdeclared">
<head></head>
<p>
&subset;"<code>standalone='no'</code>" <nt def='NT-Name'>Name</nt> <termref def="dt-match">&match;</termref>&valid;<titleref href="sec-escapes"></titleref> &magicents;&default-value;
</p>
</vcnote>
<!-- FINAL EDIT: is this duplication too clumsy? -->
<wfcnote id='textent'>
<head>&parsed-entity;</head>
<p>
<termref def="dt-unparsed">&unparsed-entity;</termref>&unparsed-entity;<kw>ENTITY</kw><kw>ENTITIES</kw> <termref def="dt-attrval"></termref>
</p>
</wfcnote>
<wfcnote id='norecursion'>
<head></head>
<p>&parsed-entity;</p>
</wfcnote>
<wfcnote id='indtd'>
<head>DTD</head>
<p>
<termref def='dt-doctype'>DTD</termref>
<!--
In the external DTD subset, a parameter-entity reference is
recognized only at the locations where
the nonterminal <nt def="NT-PEReference">PEReference</nt> or the
special operator <code>%</code> appears in a production of the
grammar. In the internal subset, parameter-entity references
are recognized only when they match
the <nt def="NT-InternalPERef">InternalPERef</nt> non-terminal
in the production for <nt def="NT-markupdecl">markupdecl</nt>.
-->
</p>
</wfcnote>
<p>
<eg>Type &lt;key>less-than&lt;/key> (&hcro;3C;) to save options.
This document was prepared on &amp;docdate; and
is classified &amp;security-level;.</eg>
</p>
<p>
<eg>&lt;!ENTITY % ISOLat2
SYSTEM "http://www.xml.com/iso/isolat2-xml.entities" >
%ISOLat2;
</eg>
</p>
</div2>
<div2 id='sec-entity-decl'>
<head></head>
<p>
<termdef id="dt-entdecl" term="entity declaration">
<scrap lang='ebnf'>
<head></head>
<prodgroup pcw2="5" pcw4="18.5">
<prod id='NT-EntityDecl'><lhs>EntityDecl</lhs>
<rhs><nt def="NT-GEDecl">GEDecl</nt></rhs><com></com>
<rhs>| <nt def="NT-PEDecl">PEDecl</nt></rhs><com></com>
</prod>
<prod id='NT-GEDecl'><lhs>GEDecl</lhs>
<rhs>'&lt;!ENTITY' <nt def='NT-S'>S</nt> <nt def='NT-Name'>Name</nt>
<nt def='NT-S'>S</nt> <nt def='NT-EntityDef'>EntityDef</nt>
<nt def='NT-S'>S</nt>? '&gt;'</rhs>
</prod>
<prod id='NT-PEDecl'><lhs>PEDecl</lhs>
<rhs>| '&lt;!ENTITY' <nt def='NT-S'>S</nt> '%' <nt def='NT-S'>S</nt>
<nt def='NT-Name'>Name</nt> <nt def='NT-S'>S</nt>
<nt def='NT-PEDef'>PEDef</nt> <nt def='NT-S'>S</nt>? '&gt;'</rhs>
<com></com>
</prod>
<prod id='NT-EntityDef'><lhs>EntityDef</lhs>
<rhs><nt def='NT-EntityValue'>EntityValue</nt>
</rhs>
<!--<wfc def="WF-EntityValue"/>-->
<rhs>| <nt def='NT-ExternalDef'>ExternalDef</nt></rhs>
<!--<wfc def="WF-External"/>-->
</prod>
<!-- FINAL EDIT: what happened to WFs here? -->
<prod id='NT-PEDef'><lhs>PEDef</lhs>
<rhs><nt def='NT-EntityValue'>EntityValue</nt>
| <nt def='NT-ExternalID'>ExternalID</nt></rhs></prod>
</prodgroup>
</scrap>
<nt def='NT-Name'>Name</nt> <termref def="dt-entref"></termref>&identify;&unparsed-entity;<kw>ENTITY</kw> <kw>ENTITIES</kw>&identify;&at-user-option;XML&processor;
</termdef>
</p>
<!--
<wfcnote id="WF-Entityvalue">
<head>Well-Formed Internal Entity</head>
<p>General entities defined by an <nt
def="NT-EntityValue">EntityValue</nt> must be well-formed, as defined
in section <specref ref="wf-entities"/>.
</p>
</wfcnote>
<wfcnote id="WF-External">
<head>Well-Formed External Entity</head>
<p>General text entities defined by an <nt
def="NT-ExternalDef">ExternalDef</nt>, must be well-formed, as defined
in the section on <titleref xml-link="simple"
href="wf-entities">well-formed entities.</titleref>.</p>
</wfcnote>
-->
<div3 id='sec-internal-ent'>
<head></head>
<p>
<termdef id='dt-internent' term="Internal Entity Replacement Text">
<nt def='NT-EntityValue'>EntityValue</nt><term></term></termdef><termref def='dt-repltext'>&replacement-text;</termref><termref def='dt-litentval'>&literal;</termref><titleref href='intern-replacement'>&replacement-text;</titleref>
<!-- redundant -TWB
Within the <nt def="NT-EntityValue">EntityValue</nt>,
parameter-entity references and character references are recognized
and expanded immediately.
General-entity references within the
replacement text are not recognized
at the time the entity declaration is parsed, though they may be
recognized when the entity itself is referred to.
-->
</p>
<p>
<termref def="dt-parsedent">&parsed-entity;</termref>
</p>
<p>
<eg>&lt;!ENTITY Pub-Status "This is a pre-release of the specification."></eg></p>
</div3>
<div3 id='sec-external-ent'>
<head></head>
<p>
<termdef id="dt-extent" term="External Entity">
<term></term>
<scrap lang='ebnf'>
<head></head>
<prod id='NT-ExternalDef'><lhs>ExternalDef</lhs>
<rhs><nt def='NT-ExternalID'>ExternalID</nt>
<nt def='NT-NDataDecl'>NDataDecl</nt>?</rhs></prod>
<prod id='NT-ExternalID'><lhs>ExternalID</lhs>
<rhs>'SYSTEM' <nt def='NT-S'>S</nt>
<nt def='NT-SystemLiteral'>SystemLiteral</nt></rhs>
<rhs>| 'PUBLIC' <nt def='NT-S'>S</nt>
<nt def='NT-PubidLiteral'>PubidLiteral</nt>
<nt def='NT-S'>S</nt>
<nt def='NT-SystemLiteral'>SystemLiteral</nt>
</rhs>
</prod>
<prod id='NT-NDataDecl'><lhs>NDataDecl</lhs>
<rhs><nt def='NT-S'>S</nt> 'NDATA' <nt def='NT-S'>S</nt>
<nt def='NT-Name'>Name</nt></rhs>
<vc def='not-declared'/></prod>
</scrap>
<nt def='NT-NDataDecl'>NDataDecl</nt> <termref def="dt-unparsed">&unparsed-entity;</termref>&parsed-entity;</termdef>
</p>
<vcnote id='not-declared'>
<head></head>
<p>
<nt def='NT-Name'>Name</nt> <termref def="dt-notation"></termref>&match;
</p>
</vcnote>
<p>
<termdef id="dt-sysid" term="System Identifier">
<kw>SYSTEM</kw> <nt def='NT-SystemLiteral'>SystemLiteral</nt> <term>&identifier;</term>URI</termdef>URI("<code>#</code>")&identifier;URI&identifier;&identifier;XML&processor;&error;&TR-or-Rec;(DTDXML&application;)URIDTD&subset;URI&subset;URI&subset;
</p>
<p>
<termdef id="dt-pubid" term="Public identifier">
&identifier;<term>&identifier;</term>
</termdef>
XML&processor;&identifier;URIXML&processor;&literal;URI&match;&identifier;&string;&space-character;(#x20)
</p>
<p>
<eg>&lt;!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
&lt;!ENTITY open-hatch
PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN"
"http://www.textuality.com/boilerplate/OpenHatch.xml">
&lt;!ENTITY hatch-pic
SYSTEM "../grafix/OpenHatch.gif"
NDATA gif ></eg></p>
</div3>
</div2>
<div2 id='TextEntities'>
<head>&parsed-entity;</head>
<div3 id='sec-TextDecl'>
<head></head>
<p>&parsed-entity;<term></term>
<scrap lang='ebnf'>
<head></head>
<prodgroup pcw4="12.5" pcw5="13">
<prod id='NT-TextDecl'><lhs>TextDecl</lhs>
<rhs>&xmlpio;
<nt def='NT-VersionInfo'>VersionInfo</nt>?
<nt def='NT-EncodingDecl'>EncodingDecl</nt>
<nt def='NT-S'>S</nt>? &pic;</rhs>
<!-- <wfc def='wfc-xmldecliteral'/> -->
<!-- <wfc def='wfc-no-nonleading-encdec'/> -->
</prod>
</prodgroup>
</scrap>
</p>
<p>&parsed-entity;</p>
<p>&parsed-entity;</p>
</div3>
<div3 id='wf-entities'>
<head>&well-formed;&parsed-entity;</head>
<p><nt def='NT-document'>document</nt>&match;&well-formed;<nt def='NT-ExtParsedEnt'>ExtParsedEnt</nt>&match;&parsed-entity;&well-formed;<nt def='NT-ExtPE'>ExtPE</nt>&match;&well-formed;
<scrap lang='ebnf'>
<head>&well-formed;&parsed-entity;</head>
<prod id='NT-ExtParsedEnt'><lhs>ExtParsedEnt</lhs>
<rhs><nt def='NT-TextDecl'>TextDecl</nt>?
<nt def='NT-content'>content</nt></rhs>
</prod>
<prod id='NT-ExtPE'><lhs>ExtPE</lhs>
<rhs><nt def='NT-TextDecl'>TextDecl</nt>?
<nt def='NT-extSubset'>extSubset</nt></rhs>
</prod>
</scrap>
&replacement-text;<nt def='NT-content'>content</nt>&match;&parsed-entity;&well-formed;DTD&well-formed;
</p>
<p>&well-formed;XML<termref def='dt-stag'></termref><termref def='dt-etag'></termref><termref def="dt-empty"></termref><termref def='dt-element'></termref><termref def='dt-comment'></termref><termref def='dt-pi'></termref><termref def='dt-charref'></termref><termref def='dt-entref'></termref></p>
</div3>
<div3 id='charencoding'>
<head></head>
<p>XML&parsed-entity;XML&processor;UTF-8UTF-16
<!--
It is recognized that for some purposes, the use of additional
ISO/IEC 10646 planes other than the Basic Multilingual Plane
may be required.
A facility for handling characters in these planes is therefore a
desirable characteristic in XML processors and applications.
-->
</p>
<p>UTF-16ISO/IEC 10646EUnicodeB&byte-order-mark;(ZERO WIDTH NO-BREAK SPACE#xFEFF)XML&markup;XML&processor;UTF-8UTF-16</p>
<p>XML&processor;UTF-8UTF-16XML&processor;UTF-8UTF-16&parsed-entity;<titleref href='TextDecl'></titleref>
<scrap lang='ebnf'>
<head></head>
<prod id='NT-EncodingDecl'><lhs>EncodingDecl</lhs>
<rhs><nt def="NT-S">S</nt>
'encoding' <nt def='NT-Eq'>Eq</nt>
'"' <nt def='NT-EncName'>EncName</nt> '"' | "'"
<nt def='NT-EncName'>EncName</nt> "'"
</rhs>
</prod>
<prod id='NT-EncName'><lhs>EncName</lhs>
<rhs>[A-Za-z] ([A-Za-z0-9._] | '-')*</rhs>
<com></com>
</prod>
</scrap>
<termref def='dt-docent'></termref><termref def="dt-xmldecl">XML</termref><nt def="NT-EncName">EncName</nt>
</p>
<!-- FINAL EDIT: check name of IANA and charset names -->
<p><code>UTF-8</code><code>UTF-16</code><code>ISO-10646-UCS-2</code><code>ISO-10646-UCS-4</code>UnicodeISO/IEC 10646<code>ISO-8859-1</code><code>ISO-8859-9</code>ISO 8859<code>ISO-2022-JP</code><code>Shift_JIS</code><code>EUC-JP</code>JIS X-0208-1997XML&processor;Internet Assigned Numbers Authority (IANA)(<emph>charset</emph>s)&processor;</p>
<p>XML<termref def="dt-error">&error;</termref>
</p>
<p>&byte-order-mark;UTF-8</p>
<p><!-- XML processors should make an effort to use all available
information, internal and external, to aid in detecting an entity's correct
encoding. Such information may include, but is not limited to:
<ulist><item><p>An HTTP header</p></item>
<item><p>A MIME header obtained other than through HTTP</p></item>
<item><p>Metadata provided by the native OS file system or by document
management software</p></item>
<item><p>The bit patterns at the front of an entity, which may
be analyzed to determine if
the application of any known encoding yields a valid encoding
declaration. See <titleref href='sec-guessing'>the appendix on
autodetection of character sets</titleref>
for a fuller description.</p></item></ulist> -->
XML&processor;&application;<termref def='dt-fatal'>&fatal-error;</termref>
<!--
inform the application of this fact and
may
allow the application to
request either that the entity should be treated as an <termref
def="dt-unparsed">unparsed entity</termref>, or that processing should
cease.-->
</p>
<p>
<eg>&lt;?xml encoding='UTF-8'?>
&lt;?xml encoding='EUC-JP'?></eg></p>
</div3>
</div2>
<div2 id='entproc'>
<head>XML&processor;</head>
<p>&unparsed-entity;<termref def='dt-xml-proc'>XML&processor;</termref>
<glist>
<gitem><label></label>
<def><p><termref def='dt-stag'></termref><termref def='dt-etag'></termref><nt def='NT-content'>content</nt></p></def>
</gitem>
<gitem>
<label></label>
<def><p><termref def='dt-stag'></termref><termref def='dt-attdecl'></termref>&default-value;<nt def='NT-AttValue'>AttValue</nt></p></def></gitem>
<gitem>
<label></label>
<def><p><nt def='NT-Name'>Name</nt><code>ENTITY</code><code>ENTITIES</code>&space;&token;</p>
</def></gitem>
<gitem><label></label>
<def><p><termref def='dt-litentval'>&literal;</termref><nt def='NT-EntityValue'>EntityValue</nt></p></def></gitem>
<gitem><label>DTD</label>
<def><p><termref def='dt-doctype'>DTD</termref>&subset;&subset;<nt def='NT-EntityValue'>EntityValue</nt><nt def="NT-AttValue">AttValue</nt></p></def>
</gitem>
</glist></p>
<!-- border value changed by bosak -->
<htable border='1' cellpadding='7' align='center'>
<!-- tbody wrapper added by bosak -->
<htbody>
<tr><td bgcolor='&cellback;' rowspan='2' colspan='1'></td>
<td bgcolor='&cellback;' align='center' valign='bottom' colspan='4'></td>
<td bgcolor='&cellback;' rowspan='2' align='center'></td>
</tr>
<tr align='center' valign='bottom'>
<td bgcolor='&cellback;'></td>
<td bgcolor='&cellback;'>&newline;</td>
<td bgcolor='&cellback;'>&newline;&parsed-entity;&newline;</td>
<td bgcolor='&cellback;'>&unparsed-entity;</td>
</tr>
<tr align='center' valign='middle'>
<!--<td bgcolor='&cellback;' rowspan='4'>Recognition
Context</td>-->
<td bgcolor='&cellback;' align='right'>&newline;</td>
<td bgcolor='&cellback;'><titleref href='not-recognized'>&newline;</titleref></td>
<td bgcolor='&cellback;'><titleref href='included'></titleref></td>
<td bgcolor='&cellback;'><titleref href='include-if-valid'></titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
<td bgcolor='&cellback;'><titleref href='included'></titleref></td>
</tr>
<tr align='center' valign='middle'>
<td bgcolor='&cellback;' align='right'>&newline;</td>
<td bgcolor='&cellback;'><titleref href='not-recognized'>&newline;</titleref></td>
<td bgcolor='&cellback;'><titleref href='included'></titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
<td bgcolor='&cellback;'><titleref href='included'></titleref></td>
</tr>
<tr align='center' valign='middle'>
<td bgcolor='&cellback;' align='right'>&newline;</td>
<td bgcolor='&cellback;'><titleref href='not-recognized'>&newline;</titleref></td>
<td bgcolor='&cellback;'><titleref href='not-recognized'></titleref></td>
<td bgcolor='&cellback;'><titleref href='not-recognized'></titleref></td>
<td bgcolor='&cellback;'><titleref href='notify'></titleref></td>
<td bgcolor='&cellback;'><titleref href='not recognized'>&newline;</titleref></td>
</tr>
<tr align='center' valign='middle'>
<td bgcolor='&cellback;' align='right'>&newline;</td>
<td bgcolor='&cellback;'><titleref href='included'></titleref></td>
<td bgcolor='&cellback;'><titleref href='bypass'>&bypass;</titleref></td>
<td bgcolor='&cellback;'><titleref href='bypass'>&bypass;</titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
<td bgcolor='&cellback;'><titleref href='included'></titleref></td>
</tr>
<tr align='center' valign='middle'>
<td bgcolor='&cellback;' align='right'>DTD&newline;</td>
<td bgcolor='&cellback;'><titleref href='as-PE'>PE&newline;</titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
<td bgcolor='&cellback;'><titleref href='forbidden'></titleref></td>
</tr>
</htbody>
</htable>
<div3 id='not-recognized'>
<head></head>
<p>DTD<code>%</code>DTD<nt def='NT-content'>content</nt>&markup;&unparsed-entity;
</p>
</div3>
<div3 id='included'>
<head></head>
<p><termdef id="dt-include" term="Include"><termref def='dt-repltext'>&replacement-text;</termref><term></term>&replacement-text;<termref def='dt-chardata'></termref>()<termref def="dt-markup">&markup;</termref>&markup;&escape;(&magicents;)&replacement-text;(&string;"<code>AT&amp;amp;T;</code>""<code>AT&amp;T;</code>")<term></term>
</termdef></p>
</div3>
<div3 id='include-if-valid'>
<head></head>
<p>&validity;<termref def="dt-valid"></termref>XML&processor;&parsed-entity;&replacement-text;<termref def="dt-include"></termref>XML&validity;&replacement-text;<termref def="dt-may"></termref></p>
<p>SGMLXML&application;()&parsed-entity;
</p>
</div3>
<div3 id='forbidden'>
<head></head>
<p><termref def='dt-fatal'>&fatal-error;</termref>
<ulist>
<item><p>a) <termref def='dt-unparsed'>&unparsed-entity;</termref>
</p></item>
<item><p>b) DTD<nt def='NT-EntityValue'>EntityValue</nt><nt def="NT-AttValue">AttValue</nt></p></item>
<item><p>c) </p>
</item>
</ulist>
</p>
</div3>
<div3 id='notify'>
<head></head>
<p><termref def='dt-unparsed'>&unparsed-entity;</termref><kw>ENTITY</kw><kw>ENTITIES</kw>&token;&processor;&application;<termref def="dt-notation"></termref><termref def='dt-sysid'></termref>&identifier;()<termref def='dt-pubid'></termref>&identifier;</p>
</div3>
<div3 id='bypass'>
<head>&bypass;</head>
<p><nt def='NT-EntityValue'>EntityValue</nt></p>
</div3>
<div3 id='as-PE'>
<head>PE</head>
<p>&parsed-entity;&validity;<titleref href='include-if-valid'></titleref>DTD<termref def='dt-repltext'>&replacement-text;</termref>&space-character;(#x20)&replacement-text;DTD&token;
</p>
</div3>
<!--
<div3 id='gen-char-entproc'>
<head>General and Character Entity Processing</head>
<p>General-entity and character references are recognized in three
contexts: wherever the nonterminal <nt def='NT-content'>content</nt> may
appear, at any point within the nonterminal
<nt def='NT-AttValue'>AttValue</nt>,
and within the
<termref def='dt-litentval'>literal entity value</termref>
(<nt def='NT-EntityValue'>EntityValue</nt>)
of an internal entity declaration.
This section discusses the first two cases; the third
is discussed <titleref href='intern-replacement'>below</titleref>.
When an <termref def="dt-xml-proc">XML processor</termref> encounters
such a reference, or the name of an unparsed entity as the value
of an <kw>ENTITY</kw> or <kw>ENTITIES</kw> attribute, then:
<olist>
<item><p>In all cases, the XML processor may
inform the application of the reference's occurrence and its identifier
(for an entity reference, the name; for a character
reference,
the character number in decimal, hexadecimal, or binary form).</p></item>
<item><p>For both character and entity references, the processor must
remove the reference itself from the <termref def="dt-text">text</termref> data
before passing the data to the application.
</p></item>
<item><p>For character references, the processor must
pass the character indicated
to the application in
place of the reference.
</p></item>
<item><p>For an external entity, the processor must inform the
application of the entity's <termref def="dt-sysid">system
identifier</termref>, and <termref def="dt-pubid">public identifier</termref>
if any.
All strings
of white space in the public identifier must be normalized to single space characters (#x20),
and leading and trailing white space must be removed.</p></item>
<item><p>If the external entity is binary, the processor must inform the
application of the associated <termref def="dt-notation">notation</termref>
name, and the notation's associated <termref def='dt-sysid'>system</termref>
and <termref def='dt-pubid'>public</termref> (if any)
identifiers.</p></item>
<item><p><termdef id="dt-include" term="Include">For an internal
(parsed) entity, the processor must <term>include</term> the
entity; that is, retrieve its replacement text
and process it as a part of the document
(i.e. as <nt def="NT-content">content</nt> or <nt
def="NT-AttValue">AttValue</nt>, whichever was being processed when
the reference was recognized), passing the result to the application
in place of the reference. The replacement text may contain both
<termref def='dt-chardata'>character data</termref>
and <termref def="dt-markup">markup</termref>, which must be recognized in
the usual way, except that the replacement text of entities used to escape
markup delimiters (the entities &magicents;) is always treated as
data. (The string "<code>AT&amp;amp;T;</code>" expands to
"<code>AT&amp;T;</code>" since the ampersand replacing "<code>&amp;amp;</code>"
is not recognized
as an entity-reference delimiter.) </termdef></p>
<p>Since the entity may contain other entity references,
an XML processor may have to repeat the inclusion process recursively.</p>
</item>
<item><p>If the entity is an external parsed entity, then in order to
<termref def="dt-valid">validate</termref> the XML document, the processor must
<termref def="dt-include">include</termref> the content of the
entity.</p></item>
<item><p>If the entity is an external parsed entity, and the processor is not
attempting to <termref def="dt-valid">validate</termref> the XML document, the
processor <termref def="dt-may">may</termref>, but need not, <termref
def="dt-include">include</termref> the entity's content.</p>
<p>This rule is based on the recognition that the automatic inclusion
provided by the SGML and XML entity mechanism, primarily designed
to support modularity in authoring, is not necessarily
appropriate for other applications, in particular document browsing.
Browsers, for example, when encountering an external parsed entity reference,
might choose to provide a visual indication of the entity's
presence and retrieve it for display only on demand.
</p></item>
</olist>
</p>
<p><termdef id="dt-escape" term="escape">Entity and character
references can both be used to <term>escape</term> the left angle bracket,
ampersand, and other delimiters. A set of general entities
(&magicents;) is specified for this purpose.
Numeric character references may also be used; they are
expanded immediately when recognized, and must be treated as
character data, so the numeric character references
"<code>&amp;#60;</code>" and "<code>&amp;#38;</code>" may be used to
escape <code>&lt;</code> and <code>&amp;</code> when they occur
in character data.</termdef></p>
</div3>
<div3 id='PE-proc'>
<head>Parameter Entity Processing</head>
<p>Parameter-entity references are only recognized in the
<termref def='dt-doctype'>DTD</termref>.
Their processing, when they appear
within the
<termref def='dt-litentval'>literal entity value</termref>
(<nt def='NT-EntityValue'>EntityValue</nt>)
of an internal entity declaration,
is discussed <titleref href='intern-replacement'>below</titleref>.
They have these intended uses:
<olist>
<item><p>as a replacement for one or more complete markup declarations</p></item>
<item><p>as a replacement for one or more complete "groups" in
element declarations</p></item>
<item><p>as a replacement for one or more complete "tokens" in
markup declarations</p></item>
</olist>
</p>
<p>The constraints requiring that PE replacement texts be properly nested
with <titleref href='vc-PEinMarkupDecl'>markup declarations</titleref>
and <titleref href='vc-PEinGroup'>content groups</titleref>
govern the first two usages.</p>
<p>To support the third intended usage,
when an XML processor encounters a parameter-entity reference
(outside of the
<termref def='dt-litentval'>literal entity value</termref> in an entity
declaration),
it must <termref def="dt-include">include</termref>
the named entity, but first expand its
<termref def='dt-repltext'>replacement text</termref> by attaching
space (#x20) characters to its beginning and the end, before
processing it.</p>
<p>The DTD text must match the relevant
rules of this specification's grammar after all parameter-entity
references have been expanded.
<!-In addition, parameter entities referred to in specific
contexts are required to satisfy certain constraints in their
replacement text; for example, a parameter entity referred to within
the internal DTD subset must match the rule for <nt
def="NT-markupdecl">markupdecl</nt>. ->
</p>
</div3>
-->
</div2>
<div2 id='intern-replacement'>
<head>&replacement-text;</head>
<p><termdef id="dt-litentval" term='Literal Entity Value'><term>&literal;/term>&string;<nt def='NT-EntityValue'>EntityValue</nt>&match;</termdef><termdef id='dt-repltext' term='Replacement Text'><term>&replacement-text;</term>&parameter;</termdef></p>
<p>&literal;<!-- replacement text -->(<nt def='NT-EntityValue'>EntityValue</nt>)&parameter;<!-- replacement text. -->&literal;<termref def='dt-include'></termref>&replacement-text;()&parameter;<emph>&replacement-text;</emph>&literal;,
<!-- in the replacement text that is to be included. -->
<eg><![CDATA[<!ENTITY % pub "&#xc9;ditions Gallimard" >
<!ENTITY rights "All rights reserved" >
<!ENTITY book "La Peste: Albert Camus,
&#xA9; 1947 %pub;. &rights;" >]]></eg>
&replacement-text;"<code>book</code>"
<eg>La Peste: Albert Camus,
&#169; 1947 &#201;ditions Gallimard. &amp;rights;</eg>
"<code>&amp;book;</code>""<code>&amp;rights;</code>"</p>
<p>
<!-- interaction = -->
<titleref href='sec-entexpand'></titleref>
</p>
<!-- Replaced by the above -TB
<p>Implementors of XML processors need to know the rules for
expansion of references in more detail. These rules only come into
play when the replacement text for an internal entity itself contains
other references.
<olist>
<item><p>In the replacement text of an internal entity, parameter-entity
references and character references in the replacement text
are recognized and resolved
when the entity declaration is parsed,
before the replacement text is stored in
the processor's symbol table.
General-entity references in the replacement text are not
resolved when the entity declaration is parsed.</p></item>
<item><p>In the document, when a general-entity reference is
resolved, its replacement text is parsed. Character references
encountered in the replacement text are
resolved immediately; general-entity references encountered in the
replacement text may be resolved or left unresolved, as described
<titleref href="entproc">above</titleref>.
Character and general-entity references must be
contained entirely within the entity's replacement text.
</p></item>
</olist>
</p>
<p>Simple character references do not suffice to escape delimiters
within the replacement text of an internal entity: they will be
expanded when the entity declaration is parsed, before the replacement
text is stored in the symbol table. When the entity itself is
referred to, the replacement text will be parsed again, and the
delimiters (no longer character references)
will be recognized as delimiters. To escape the
characters &magicents; in an entity replacement text, use
a general-entity reference or a doubly-escaped character reference.
See <titleref href='sec-entexpand'>the appendix on expansion
of entity references</titleref>
for detailed examples.</p>
-->
</div2>
<div2 id='sec-predefined-ent'>
<head></head>
<p><termdef id="dt-escape" term="escape">
&left-angle-bracket;<term>&escape;</term>&magicents;"<code>&amp;#60;</code>""<code>&amp;#38;</code>"<code>&lt;</code><code>&amp;</code>&escape;</termdef></p>
<p>XML&processor;<termref def='dt-interop'></termref>&valid;XML&replacement-text;&escape;
<eg><![CDATA[<!ENTITY lt "&#38;#60;">
<!ENTITY gt "&#62;">
<!ENTITY amp "&#38;#38;">
<!ENTITY apos "&#39;">
<!ENTITY quot "&#34;">
]]></eg>
"<code>lt</code>""<code>amp</code>""<code>&lt;</code>""<code>&amp;</code>"&well-formed;&escape;
</p>
</div2>
<div2 id='Notations'>
<head></head>
<p>
<termdef id="dt-notation" term="Notation">
<term></term><termref def="dt-extent">&unparsed-entity;</termref>&identify;<termref def="dt-pi"></termref>&application;&identify;</termdef></p>
<p><termdef id="dt-notdecl" term="Notation Declaration">
<term></term>&identifier;&identifier;&application;XML&processor;
<scrap lang='ebnf'>
<head></head>
<prod id='NT-NotationDecl'><lhs>NotationDecl</lhs>
<rhs>'&lt;!NOTATION' <nt def='NT-S'>S</nt> <nt def='NT-Name'>Name</nt>
<nt def='NT-S'>S</nt>
(<nt def='NT-ExternalID'>ExternalID</nt> |
<nt def='NT-PublicID'>PublicID</nt>)
<nt def='NT-S'>S</nt>? '>'</rhs></prod>
<prod id='NT-PublicID'><lhs>PublicID</lhs>
<rhs>'PUBLIC' <nt def='NT-S'>S</nt>
<nt def='NT-PubidLiteral'>PubidLiteral</nt>
</rhs></prod>
</scrap>
</termdef></p>
<p>XML&processor;&identifier;&application;&identifier;<termref def="dt-sysid">&identifier;</termref>&application;&processor;(XML&processor;&application;XML&error;</p>
</div2>
<div2 id='sec-doc-entity'>
<head></head>
<p><termdef id="dt-docent" term="Document Entity"><term></term>&root;<termref def="dt-xml-proc">XML&processor;</termref></termdef>&TR-or-Rec;XML&processor;&processor;&stream;</p>
</div2>
</div1>
<!-- &Conformance; -->
<div1 id='sec-conformance'>
<head></head>
<p><termref def="dt-xml-proc">XML&processor;</termref>&validating;&non-validating;</p>
<p>&validating;&non-validating;&TR-or-Rec;&well-formed;</p>
<p><termdef id="dt-validating" term="Validating Processor"><term>&validating;&processor;</term><termref def="dt-doctype">DTD</termref>&TR-or-Rec;&validity;
</termdef>
</p>
</div1>
<div1 id='sec-notation'>
<head></head>
<p>XMLBackus-Naur Form(EBNF)
<eg>symbol ::= expression</eg></p>
<p>&string;&literal;
<!--* The distinction between symbols which can and cannot be
recognized using simple regular expressions may be used to set the
boundary between an implementation's lexical scanner and its parser,
but this specification neither constrains the placement of that
boundary nor presupposes that all implementations will have one. *-->
</p>
<p>&string;&match;
<glist>
<gitem>
<label><code>#xN</code></label>
<def><p><code>N</code>16ISO/IEC 10646(UCS-4)&code-value;2&match;<code>#xN</code>&code-value;<!-- bit string -->XML
</p></def>
</gitem>
<gitem>
<label><code>[a-zA-Z]</code>, <code>[#xN-#xN]</code></label>
<def><p>(<termref def='dt-character'></termref>&match;</p></def>
</gitem>
<gitem>
<label><code>[^a-z]</code>, <code>[^#xN-#xN]</code></label>
<def><p><emph></emph><termref def='dt-character'></termref>&match;</p></def>
</gitem>
<gitem>
<label><code>[^abc]</code>, <code>[^#xN#xN#xN]</code></label>
<def><p><termref def='dt-character'></termref>&match;</p></def>
</gitem>
<gitem>
<label><code>"string"</code></label>
<def><p>&double-quote;&string;&literal;<termref def="dt-match">&match;</termref>&string;&literal;&match;</p></def>
</gitem>
<gitem>
<label><code>'string'</code></label>
<def><p>&single-quote;&string;&literal;<termref def="dt-match">&match;</termref>&string;&literal;&match;</p></def>
</gitem>
</glist>
<code>A</code><code>B</code>
<glist>
<gitem>
<label>(<code>expression</code>)</label>
<def><p><code>expression</code></p></def>
</gitem>
<gitem>
<label><code>A?</code></label>
<def><p><code>A</code>&match;(<code>A</code>)</p></def>
</gitem>
<gitem>
<label><code>A B</code></label>
<def><p><code>A</code><code>B</code>&match;
</p></def>
</gitem>
<gitem>
<label><code>A | B</code></label>
<def><p><code>A</code><code>B</code>&match;
</p></def>
</gitem>
<gitem>
<label><code>A - B</code></label>
<def><p><code>A</code>&match;<code>B</code>&match;&string;&match;</p></def>
</gitem>
<gitem>
<label><code>A+</code></label>
<def><p><code>A</code>1&match;</p></def>
</gitem>
<gitem>
<label><code>A*</code></label>
<def><p><code>A</code>0&match;</p></def>
</gitem>
<!-- DEATH TO %'s
<gitem>
<label><code>%a</code></label>
<def><p>specifies that <emph>in the external DTD subset</emph> a
<termref def='dt-param-entity'>parameter entity</termref> may occur in the
text at the position where <code>a</code> may occur; if so, its
replacement text must match <code>S? a S?</code>. If
the expression <code>a</code> is governed by a suffix operator, then
the suffix operator determines both the maximum number of parameter-entity
references allowed and the number of occurrences of <code>a</code>
in the replacement text of the parameter entities: <code>%a*</code>
means that <code>a</code> must occur zero or more times, and
that some of its occurrences may be replaced by references to
parameter entities whose replacement text must contain zero or
more occurrences of <code>a</code>; it is thus a more compact way
of writing <code>%(a*)*</code>.
Similarly, <code>%a+</code> means that <code>a</code>
must occur one or more times, and may be replaced by
parameter entities with replacement text matching
<code>S? (a S?)+</code>.
The recognition of parameter entities in the internal subset is much more
highly constrained.
</p></def>
</gitem>
-->
</glist>
<glist>
<gitem>
<label><code>/* ... */</code></label>
<def><p></p></def>
</gitem>
<gitem>
<label><code>[ wfc: ... ]</code></label>
<def><p>&well-formed;<termref def="dt-wellformed">&well-formed;</termref>&identify;</p></def>
</gitem>
<gitem>
<label><code>[ vc: ... ]</code></label>
<def><p>&validity;<termref def="dt-valid">&valid;</termref>&identify;
</p></def>
</gitem>
</glist>
</p></div1>