Commit 287b0860 authored by gerd's avatar gerd

Renamed `Entry_content to `Entry_element_content.

Added `Entry_content with new meaning. See DEV.xml.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@741 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent ea112755
......@@ -59,6 +59,10 @@ for PXP; if you are looking for the stable distribution, please go
<p>Removed: Pxp_core_types_type, Pxp_type_anchor. Pxp_core_types
has now three submodules A, S, I taking over the roles</p>
<p>Removed: E_pinstr_member. Instead, E_pinstr events are emitted</p>
<p>Renaming, and addition: `Entry_content has been renamed to
`Entry_element_content. A new `Entry_content with different
semantics has been added, now conforming to a standard production.
</p>
</li>
<li><p><em>1.2.0test*:</em> New ~minimization option for the
[write] and [display] methods (user wish).</p>
......
......@@ -198,8 +198,6 @@ where [<entid>] is the entity ID object.
{3:nondocs The wrapping for non-document entities}
CHECK: THAT DOES NOT MATCH THE CODE!
The XML specification demands that external XML entities (that are
referenced from a document entity or another external entity) comply
to this grammar (excerpt from the W3C definition):
......@@ -325,7 +323,13 @@ parse a closed document. Note that the emitted event stream includes
the wrapping for documents as described in {!Intro_events.docs}.
The entry point [`Entry_content] is for non-document external entities,
as described in {!Intro_events.nondocs}.
as described in {!Intro_events.nondocs}. There is a similar entry
point, [`Entry_element_content], which additionally enforces some
constraints on the node structure. In particular, there must be a single
top-level element so that the enforced node structure looks like a
document. We do not recommend to use [`Entry_element_content] - rather
use [`Entry_document], and remove the document wrapping in a postprocessing
step.
The entry point [`Entry_expr] reads a single node (see {!Pxp_types.entry}
for details). It is recommended to use {!Pxp_ev_parser.process_expr}
......
......@@ -160,6 +160,11 @@ class virtual core_parser
* internal or external subset of the DTD.
*)
val mutable permit_any_content = false
(* Used with `Entry_content to allow entity references outside
elements and to turn off other parsing restrictions
*)
method private only_whitespace data =
(* Checks that the string "data" contains only whitespace. On failure,
* Validation_error is raised.
......@@ -615,7 +620,7 @@ class virtual core_parser
* <!DOCTYPE..> and an element)
* "entry_declarations": parses an "external DTD subset", i.e. a sequence
* of declarations
* "entry_content": parses a single element (no <!DOCTYPE...> allowed);
* "entry_element_content": parses a single element (no <!DOCTYPE...> allowed);
* the element needs not to be the root element of the
* DTD
*
......@@ -642,7 +647,7 @@ class virtual core_parser
entry_document():
Begin_entity
$ {{ n_entities_open <- n_entities_open + 1; }}
doc_xmldecl_then_misc_then_prolog_then_rest()
doc_xmldecl_then_misc_then_prolog_then_body()
{{
(* Note: In pull parsing mode, the following code won't be executed! *)
()
......@@ -660,61 +665,86 @@ entry_document():
* assume UTF8, and they are right...)
*/
doc_xmldecl_then_misc_then_prolog_then_rest():
doc_xmldecl_then_misc_then_prolog_then_body():
pl:PI_xml
$ {{ context.manager # current_entity # process_xmldecl pl;
if process_xmldecl then self # event_document_xmldecl pl;
}}
misc()* doc_prolog_then_rest()
misc()* doc_prolog_then_body()
{{ () }}
| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
misc() misc()* doc_prolog_then_rest()
misc() misc()* doc_prolog_then_body()
{{ () }}
| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
doctypedecl() misc()* contents_start()
doctypedecl() misc()* body_start()
{{ () }}
| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
contents_start()
body_start()
{{ () }}
doc_prolog_then_rest():
doctypedecl() misc()* contents_start()
doc_prolog_then_body():
doctypedecl() misc()* body_start()
{{ () }}
| contents_start()
| body_start()
{{ () }}
entry_content():
entry_element_content():
Begin_entity
$ {{ n_entities_open <- n_entities_open + 1; }}
el_xmldecl_then_misc_then_rest()
el_xmldecl_then_misc_then_body()
{{
(* Note: In pull parsing mode the following code won't be executed! *)
()
}}
entry_content():
Begin_entity
$ {{ permit_any_content <- true;
n_entities_open <- n_entities_open + 1;
}}
el_xmldecl_then_entity_body()
{{
(* Note: In pull parsing mode the following code won't be executed! *)
()
}}
/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
el_xmldecl_then_misc_then_rest():
/* See comment for doc_mldecl_then_misc_then_prolog_then_body. */
el_xmldecl_then_misc_then_body():
pl:PI_xml
$ {{ context.manager # current_entity # process_xmldecl pl; }}
misc()* contents_start()
misc()* body_start()
{{ () }}
| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
misc() misc()* contents_start()
misc() misc()* body_start()
{{ () }}
| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
contents_start()
body_start()
{{ () }}
el_xmldecl_then_entity_body():
pl:PI_xml
$ {{ context.manager # current_entity # process_xmldecl pl; }}
entity_body()
{{ () }}
| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
entity_body()
{{ () }}
entry_declarations():
/* Parses a sequence of declarations given by an entity. As side-effect,
* the parsed declarations are put into the dtd object.
......@@ -722,13 +752,13 @@ entry_declarations():
* Note: The following Begin_entity is not counted because this entity
* will certainly be closed when pull parsing mode is entered.
*/
Begin_entity decl_xmldecl_then_rest()
Begin_entity decl_xmldecl_then_body()
{{ () }}
| Eof
{{ () }}
decl_xmldecl_then_rest():
decl_xmldecl_then_body():
/* Note: This rule is also called from declaration()! */
pl:PI_xml
$ {{ context.manager # current_entity # process_xmldecl pl;
......@@ -980,7 +1010,7 @@ declaration():
let old_p_internal_subset = p_internal_subset in
p_internal_subset <- false;
}}
decl_xmldecl_then_rest()
decl_xmldecl_then_body()
{{ (* Restore the old value of 'p_internal_subset'. *)
p_internal_subset <- old_p_internal_subset;
()
......@@ -1664,7 +1694,7 @@ notationdecl():
*/
contents_start():
body_start():
/* parses <element>...</element> misc*, i.e. exactly one element followed
* optionally by white space or processing instructions.
* The element is entered into the global variables as follows:
......@@ -1699,6 +1729,26 @@ contents_start():
start_tag() [parse_fn]()
{{ () }}
entity_body():
/* The body of an external entity, i.e. content. */
End_entity
{{ self # init_for_xml_body
(context.manager # current_entity :> entity_id);
n_entities_open <- n_entities_open - 1;
raise End_of_parsing
}}
| $ {{
self # init_for_xml_body
(context.manager # current_entity :> entity_id);
let parse_fn =
if pull_counter < 0 then
parse_content_push
else
parse_content_pull in
}}
node_tag() [parse_fn]()
{{ () }}
entry_expr():
$ {{ self # init_for_xml_body
......@@ -1711,7 +1761,7 @@ entry_expr_content():
start_tag()
$ {{ begin try
while n_tags_open > 0 do
parse_content yy_current yy_get_next
parse_node_tag yy_current yy_get_next
done
with Not_found ->
yy_position := "expr";
......@@ -1743,12 +1793,12 @@ entry_expr_space():
content_push():
content() content()*
node_tag() node_tag()*
{{ () }}
content_pull():
content()
node_tag()
$ {{ pull_counter <- pull_counter - 1;
if pull_counter <= 0 then begin
pull_counter <- pull_counter_limit;
......@@ -1762,7 +1812,7 @@ content_pull():
content_pull()
{{ () }}
content():
node_tag():
/* parses: start tags, end tags, content, or processing
* instructions. That the tags are properly nested is dynamically checked.
* As result, recognized elements are added to their parent elements,
......@@ -1782,7 +1832,7 @@ content():
| comment()
{{ () }}
| Begin_entity
$ {{ if n_tags_open = 0 then
$ {{ if n_tags_open = 0 && not permit_any_content then
raise(WF_error("Entity reference not allowed here"));
n_entities_open <- n_entities_open + 1;
}}
......@@ -1797,7 +1847,7 @@ content():
}}
/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
/* See comment for doc_mldecl_then_misc_then_prolog_then_body. */
init_inner_entity():
pl:PI_xml
......@@ -1806,7 +1856,7 @@ init_inner_entity():
{{ () }}
| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
content()
node_tag()
{{ () }}
......@@ -2036,7 +2086,7 @@ char_data():
*/
data:CharData
{{
if n_tags_open = 0 then
if n_tags_open = 0 && not permit_any_content then
(* only white space is allowed *)
self # only_whitespace data
else
......@@ -2052,7 +2102,7 @@ char_data():
{{ process_curly_brace RRcurly }}
| data:Cdata
{{
if n_tags_open = 0 then
if n_tags_open = 0 && not permit_any_content then
raise (WF_error("CDATA section not allowed here"));
self # event_char_data data
}}
......@@ -2061,7 +2111,7 @@ cref():
/* Parses &#...; and adds the character to the top element of elstack. */
code:CRef
{{
if n_tags_open = 0 then
if n_tags_open = 0 && not permit_any_content then
(* No surrounding element: character references are not allowed *)
raise(WF_error("Character reference not allowed here"));
self # event_char_data
......@@ -2125,6 +2175,8 @@ comment():
parse_entry_document context.current context.get_next
| `Entry_declarations flags ->
parse_entry_declarations context.current context.get_next
| `Entry_element_content flags ->
parse_entry_element_content context.current context.get_next
| `Entry_content flags ->
parse_entry_content context.current context.get_next
| `Entry_expr flags ->
......
......@@ -67,6 +67,7 @@ object
val mutable pull_counter : int
val mutable p_internal_subset : bool
val mutable ns_scope : Pxp_dtd.namespace_scope option
val mutable permit_any_content : bool
method parse : context -> extended_entry -> unit
......
......@@ -2256,9 +2256,14 @@ val solidify :
* The event stream may be either:
* - A document event stream (as generated by [`Entry_document]).
* In this case [`Document d] is returned.
* - A content event stream (as generated by [`Entry_content]).
* - A content event stream with top-level element (as generated by
* [`Entry_element_content]).
* In this case [`Node n] is returned.
*
* Note that there is no way to solidify a content event stream
* that does not have a single top-level element (i.e. as parsed by
* [`Entry_content]). An attempt will result in an exception.
*
* Document streams contain a DTD. The found DTD is used for the
* node tree. Content streams, on the contrary, do not contain DTDs.
* In this case, an empty DTD is created (in well-formedness mode).
......
......@@ -83,7 +83,7 @@ object (self)
| None -> ()
);
if n_tags_open = 0 then begin
if ep_root_element_seen then
if ep_root_element_seen && not permit_any_content then
raise(WF_error("Document must consist of only one toplevel element"));
ep_root_element_seen <- true;
lit_root := name
......@@ -245,8 +245,9 @@ let process_entity
match entry with
`Entry_document _ -> Document
| `Entry_declarations _ -> failwith "Pxp_yacc.process_entity: bad entry point"
| `Entry_content _ -> Content
| `Entry_expr _ -> Content
| `Entry_content _ -> Content
| `Entry_element_content _ -> Content
| `Entry_expr _ -> Content
in
let en = mgr # current_entity in
let gen_att_events = Some(cfg.escape_attributes <> None) in
......@@ -350,6 +351,7 @@ let create_pull_parser cfg entry mgr =
`Entry_document _ -> Document
| `Entry_declarations _ -> failwith "Pxp_yacc.process_entity: bad entry point"
| `Entry_content _ -> Content
| `Entry_element_content _ -> Content
| `Entry_expr _ -> Content
in
let en = mgr # current_entity in
......
......@@ -66,14 +66,21 @@ val process_entity :
*
* The entry point to the parsing rules can be specified as follows:
* - [`Entry_document]:
* This entry point corresponds to the grammar production for documents.
* The first generated event is always [E_start_doc],
* it contains the whole DTD as object (no events are generated
* during DTD parsing, only the wholly parsed DTD is passed back). The
* events for the contents follow, terminated by [E_end_doc] and then
* events for the XML body follow, terminated by [E_end_doc] and then
* [E_end_of_stream].
* - [`Entry_content]:
* Only events for contents are generated. They are terminated
* This entry point corresponds to the grammar production for
* external entities (XML declaration followed by any sequence of
* content). The emitted events are terminated
* by [E_end_of_stream].
* - [`Entry_element_content]:
* There is no corresponding grammar production in the XML standard.
* An XML declaration, followed by [misc* element misc*]. The emitted
* events are terminated by [E_end_of_stream].
* - [`Entry_declarations]:
* Currently not supported. (But see {!Pxp_dtd_parser} for functions
* parsing DTDs.)
......
......@@ -651,7 +651,7 @@ let parse_content_entity ?id_index cfg src dtd spec =
~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
~id_index:(id_index :> 'ext index option)
~use_document_entity:false
~entry:(`Entry_content []) (* Entry point of the grammar *)
~entry:(`Entry_element_content []) (* Entry point of the grammar *)
~init_lexer:Content (* The initially used lexer *)
in
match pobj # root with
......@@ -685,7 +685,7 @@ let parse_wfcontent_entity cfg src spec =
~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
~id_index:None
~use_document_entity:false
~entry:(`Entry_content []) (* Entry point of the grammar *)
~entry:(`Entry_element_content []) (* Entry point of the grammar *)
~init_lexer:Content (* The initially used lexer *)
in
match pobj # root with
......
......@@ -184,7 +184,9 @@ val parse_content_entity :
* Despite its name, this function {b cannot} parse the [content]
* production defined in the XML specification! This is a misnomer
* I'm sorry about. The [content] production would allow to parse
* a list of elements and other node kinds.
* a list of elements and other node kinds. Also, this function
* corresponds to the event entry point [`Entry_element_content] and
* not [`Entry_content].
*
* If the optional argument [id_index] is present, the parser adds
* any ID attribute to the passed index. An index is required to detect
......
......@@ -216,6 +216,7 @@ type entry =
`Extend_dtd_fully | `Parse_xml_decl ] list
| `Entry_declarations of [ `Val_mode_dtd | `Extend_dtd_fully ] list
| `Entry_content of [ `Dummy ] list
| `Entry_element_content of [ `Dummy ] list
| `Entry_expr of [ `Dummy ] list
]
......
......@@ -577,6 +577,7 @@ type entry =
[ `Entry_document of [ `Val_mode_dtd | `Extend_dtd_fully |
`Parse_xml_decl ] list
| `Entry_declarations of [ `Val_mode_dtd | `Extend_dtd_fully ] list
| `Entry_element_content of [ `Dummy ] list
| `Entry_content of [ `Dummy ] list
| `Entry_expr of [ `Dummy ] list
]
......@@ -585,8 +586,14 @@ type entry =
* must have a DOCTYPE and may have a DTD.
* - [`Entry_declarations]: The parser reads the external subset
* of a DTD
* - [`Entry_content]: The parser reads an entity containing contents,
* i.e. "misc* element misc*".
* - [`Entry_element_content]:
* The parser reads an entity containing contents, but there must
* be one top element, i.e. "misc* element misc*". At the beginning,
* there can be an XML declaration as for external entities.
* - [`Entry_content]:
* The parser reads an entity containing contents, but without the
* restriction of having a top element. At the beginning,
* there can be an XML declaration as for external entities.
* - [`Entry_expr]: The parser reads a single element, a single
* processing instruction or a single comment, or whitespace, whatever is
* found. In contrast to the other entry points, the expression
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment