Commit 4542d28a authored by gerd's avatar gerd

Continued documentation.

Added a few features:
Pxp_dtd.Entity.lookup,  node method entity_id, Pxp_event.close_entities.
Restructured Pxp_types stuff: get rid of Pxp_core_types_type, Pxp_type_anchor.
Pxp_core_types with submodules A,S,I.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@738 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent d5d3b0f7
......@@ -53,6 +53,11 @@ for PXP; if you are looking for the stable distribution, please go
<li>
<p><em>1.2.1:</em> Revised documentation</p>
<p>Addition: Pxp_event.unwrap_document</p>
<p>Addition: Pxp_dtd.Entity.lookup</p>
<p>Addition: node method entity_id</p>
<p>Addition: Pxp_event.close_entities</p>
<p>Removed: Pxp_core_types_type, Pxp_type_anchor. Pxp_core_types
has now three submodules A, S, I taking over the roles</p>
</li>
<li><p><em>1.2.0test*:</em> New ~minimization option for the
[write] and [display] methods (user wish).</p>
......
......@@ -100,6 +100,7 @@ odoc: html/pic/done
-I ../../src/pxp-engine \
-load ../../src/pxp-engine/pxp_engine.dump \
-d html/ref \
-colorize-code \
-css-style style.css
......
......@@ -3,7 +3,7 @@ include $(TOP_DIR)/Makefile.rules
PACKAGES = netstring
OBJ = pxp_lexing.cmo pxp_type_anchor.cmo \
OBJ = pxp_lexing.cmo \
pxp_core_types.cmo pxp_lexer_types.cmo \
pxp_lex_aux.cmo pxp_lexers.cmo \
pxp_lib.cmo pxp_aux.cmo pxp_reader.cmo \
......@@ -16,7 +16,7 @@ OBJ = pxp_lexing.cmo pxp_type_anchor.cmo \
pxp_yacc.cmo pxp_marshal.cmo pxp_codewriter.cmo
DOC = pxp_types.mli pxp_document.mli pxp_dtd.mli pxp_tree_parser.mli \
pxp_core_types_type.mli pxp_ev_parser.mli \
pxp_core_types.mli pxp_ev_parser.mli \
pxp_event.mli pxp_dtd_parser.mli pxp_codewriter.mli \
pxp_marshal.mli pxp_yacc.mli pxp_reader.mli \
intro_trees.txt intro_extensions.txt intro_namespaces.txt \
......
......@@ -376,7 +376,7 @@ open while parsing is in progress. It is expected by the user to
continue calling [push] until the end of the stream is reached (at
least until [Some E_end_of_stream], [Some E_error], or [None] is
returned by [pull]). See the description of
{!Pxp_ev_parser.create_pull_parser} for a way of prematurely closing
{!Pxp_ev_parser.close_entities} for a way of prematurely closing
the parser for the exceptional cases where parsing cannot go on until
the final parser state is reached.
......
......@@ -453,6 +453,12 @@ let config =
}
]}
Note that the "super root node" is sometimes called "root node" in
various XML standards giving semantical model of XML. For PXP the
name "super root node" is preferred because this node type is not
obligatory, and the top-most element node can also be considered as
root of the tree.
{3:whitespace Controlling whitespace}
......
......@@ -556,7 +556,7 @@ let book =
>>
]}
[book] is a function [unit -> ]{!Pxp_types.event}. One can call it to
[book] is a function [unit -> ]{!Pxp_types.event}[ option]. One can call it to
pull the events out of it one after the other:
{[
......
......@@ -9,7 +9,7 @@
(* Lexing *)
open Pxp_core_types
open Pxp_core_types.I
open Pxp_lexer_types
open Pxp_lexers
open Pxp_lib
......
......@@ -343,7 +343,7 @@ class virtual core_parser
* define them
*)
method private virtual init_for_xml_body : unit -> unit
method private virtual init_for_xml_body : entity_id -> unit
method private virtual event_document_xmldecl :
Pxp_lexer_types.prolog_token list -> unit
......@@ -1688,7 +1688,8 @@ contents_start():
* object only back via a global variable). The only solution is to
* modify the object that has been passed to the parsing function directly.
*/
$ {{ self # init_for_xml_body();
$ {{ self # init_for_xml_body
(context.manager # current_entity :> entity_id);
let parse_fn =
if pull_counter < 0 then
parse_content_push
......@@ -1700,7 +1701,9 @@ contents_start():
entry_expr():
$ {{ self # init_for_xml_body(); }}
$ {{ self # init_for_xml_body
(context.manager # current_entity :> entity_id);
}}
entry_expr_content()
{{ () }}
......
......@@ -81,7 +81,7 @@ object
method private pop_src_norm_mapping : unit -> unit
method private virtual init_for_xml_body : unit -> unit
method private virtual init_for_xml_body : entity_id -> unit
method private virtual event_document_xmldecl :
Pxp_lexer_types.prolog_token list -> unit
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -4,7 +4,7 @@
*)
open Pxp_aux
open Pxp_core_types
open Pxp_core_types.I
module Graph = struct
......
......@@ -60,6 +60,6 @@ type dfa_definition =
dfa_null : bool; (* Whether dfa_start member of dfa_stops *)
}
val dfa_of_regexp_content_model : Pxp_core_types.regexp_spec -> dfa_definition
val dfa_of_regexp_content_model : Pxp_core_types.I.regexp_spec -> dfa_definition
(* Computes the DFA or raises Not_found if it does not exist *)
This diff is collapsed.
This diff is collapsed.
......@@ -4,7 +4,7 @@
* Copyright by Gerd Stolpmann. See LICENSE for details.
*)
open Pxp_core_types
open Pxp_core_types.I
open Pxp_lexer_types
open Pxp_lexers
open Pxp_entity
......@@ -1195,7 +1195,7 @@ and dtd_notation the_name the_xid init_encoding =
object (self)
val name = the_name
val xid = (the_xid : ext_id)
val encoding = (init_encoding : Pxp_core_types.rep_encoding)
val encoding = (init_encoding : Pxp_core_types.I.rep_encoding)
method name = name
method ext_id = xid
method encoding = encoding
......@@ -1236,7 +1236,7 @@ and proc_instruction the_target the_value init_encoding =
object (self)
val target = the_target
val value = (the_value : string)
val encoding = (init_encoding : Pxp_core_types.rep_encoding)
val encoding = (init_encoding : Pxp_core_types.I.rep_encoding)
initializer
match target with
......@@ -1326,12 +1326,20 @@ module Entity = struct
| Entity(make,resolver) ->
make dtd (* resolver ignored *)
let entity_id ent = (ent :> < >)
let entity_id ent = (ent :> entity_id)
class fake = object end
class fake = object method pxp_magic_coercion() : unit = raise Not_found end
let create_entity_id () = new fake
let lookup eid =
try
let () = eid#pxp_magic_coercion() in
assert false
with
| Not_found -> invalid_arg "Pxp_dtd.Event.lookup"
| Pxp_entity.Coerced_entity e -> e
end
This diff is collapsed.
......@@ -24,7 +24,7 @@ object (self)
* because this is impossible for `Entry_declarations.
*)
method private init_for_xml_body() =
method private init_for_xml_body _ =
assert false
method private event_document_xmldecl xmldecl =
......
......@@ -5,7 +5,7 @@
*)
open Pxp_core_types
open Pxp_core_types.I
open Pxp_lexer_types
open Pxp_aux
open Pxp_reader
......@@ -250,7 +250,58 @@ let update_other_lines v tok =
;;
class virtual entity the_dtd the_name the_swarner the_warner init_encoding =
class type entity =
object
method pxp_magic_coercion : unit -> unit
method is_ndata : bool
method name : string
method lex_id : lexers
method set_lex_id : lexers -> unit
method line : int
method column : int
method set_line_column : int -> int -> unit
method encoding : rep_encoding
method set_manager : prelim_entity_manager -> unit
method counts_as_external : bool
method set_counts_as_external : unit
method lexer_obj : lexer_obj
method resolver : resolver option
method open_entity : ?gen_att_events:bool -> bool -> lexers -> unit
method close_entity : lexers
method is_open : bool
method replacement_text : (string * bool)
method xml_declaration : (string * string) list option
method set_debugging_mode : bool -> unit
method full_name : string
method next_token : token
method next_ignored_token : token
method process_xmldecl : prolog_token list -> unit
method process_missing_xmldecl : unit
method ext_id : ext_id
method resolver_id : resolver_id
method notation : string
end
and prelim_entity_manager =
object
method current_entity : entity
method pop_entity : unit -> unit
method push_entity : entity -> unit
end
class type v_entity =
object
inherit entity
val v : entity entity_variables
end
exception Coerced_entity of entity
class virtual entity_base the_dtd the_name the_swarner the_warner init_encoding =
object (self)
(* This class prescribes the type of all entity objects. Furthermore,
* the default 'next_token' mechanism is implemented.
......@@ -259,6 +310,8 @@ class virtual entity the_dtd the_name the_swarner the_warner init_encoding =
val v = make_variables
the_dtd the_name the_swarner the_warner init_encoding
method virtual pxp_magic_coercion : unit -> unit
method is_ndata = false
(* Returns if this entity is an NDATA (unparsed) entity *)
......@@ -285,9 +338,7 @@ class virtual entity the_dtd the_name the_swarner the_warner init_encoding =
( match manager with
None -> assert false
| Some m -> m
: < current_entity : entity;
pop_entity : unit -> unit;
push_entity : entity -> unit >
: prelim_entity_manager
)
method set_manager m = manager <- Some m
......@@ -354,9 +405,6 @@ class virtual entity the_dtd the_name the_swarner the_warner init_encoding =
*)
method lexer_obj = v.lexobj
method xml_declaration =
(* return the (name,value) pairs of the initial <?xml name=value ...?>
* processing instruction.
......@@ -682,7 +730,7 @@ class virtual entity the_dtd the_name the_swarner the_warner init_encoding =
;;
class ndata_entity the_name the_ext_id the_notation init_encoding =
class ndata_entity the_name the_ext_id the_notation init_encoding : entity =
object (self)
(* An NDATA entity is very restricted; more or less you can only find out
* its external ID and its notation.
......@@ -693,6 +741,9 @@ class ndata_entity the_name the_ext_id the_notation init_encoding =
val mutable notation = the_notation
val encoding = (init_encoding : rep_encoding)
method pxp_magic_coercion() =
raise (Coerced_entity (self :> entity))
method name = (name : string)
method ext_id = (ext_id : ext_id)
method notation = (notation : string)
......@@ -769,10 +820,6 @@ class ndata_entity the_name the_ext_id the_notation init_encoding =
( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
: (string * bool) )
method lexer_obj =
( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
: lexer_obj )
method next_token =
( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
: token )
......@@ -792,14 +839,13 @@ class ndata_entity the_name the_ext_id the_notation init_encoding =
end
;;
class external_entity the_resolver the_dtd the_name the_swarner the_warner
the_ext_id
the_system_base
the_p_special_empty_entities
init_encoding
class external_entity
the_resolver the_dtd the_name the_swarner the_warner
the_ext_id the_system_base the_p_special_empty_entities
init_encoding : v_entity
=
object (self)
inherit entity
inherit entity_base
the_dtd the_name the_swarner the_warner
init_encoding
as super
......@@ -840,6 +886,9 @@ class external_entity the_resolver the_dtd the_name the_swarner the_warner
v.counts_as_external <- true;
method pxp_magic_coercion() =
raise (Coerced_entity (self :> entity))
method private set_encoding e =
assert resolver_is_open;
resolver # change_encoding e
......@@ -1030,11 +1079,27 @@ class external_entity the_resolver the_dtd the_name the_swarner the_warner
end
;;
(*
class external_entity the_resolver the_dtd the_name the_swarner the_warner
the_ext_id
the_system_base
the_p_special_empty_entities
init_encoding : entity
=
object (self)
inherit external_entity_base
the_resolver the_dtd the_name the_swarner the_warner
the_ext_id the_system_base the_p_special_empty_entities
init_encoding
end
;;
*)
class document_entity the_resolver the_dtd the_name the_swarner the_warner
the_ext_id
the_system_base
init_encoding
init_encoding : entity
=
object (self)
inherit external_entity the_resolver the_dtd the_name the_swarner
......@@ -1053,6 +1118,9 @@ class document_entity the_resolver the_dtd the_name the_swarner the_warner
method counts_as_external = false
(* Document entities count never as external! *)
method pxp_magic_coercion() =
raise (Coerced_entity (self :> entity))
end
;;
......@@ -1060,7 +1128,7 @@ class document_entity the_resolver the_dtd the_name the_swarner the_warner
class internal_entity the_dtd the_name the_swarner the_warner the_literal_value
the_p_internal_subset
init_is_parameter_entity
init_encoding
init_encoding : entity
=
(* An internal entity uses a "literal entity value" as character source.
* This value is first expanded and preprocessed, i.e. character and
......@@ -1073,7 +1141,7 @@ class internal_entity the_dtd the_name the_swarner the_warner the_literal_value
*)
object (self)
inherit entity
inherit entity_base
the_dtd the_name the_swarner the_warner
init_encoding
as super
......@@ -1212,6 +1280,10 @@ class internal_entity the_dtd the_name the_swarner the_warner the_literal_value
method resolver = (None : resolver option)
method pxp_magic_coercion() =
raise (Coerced_entity (self :> entity))
end
;;
......@@ -1242,7 +1314,7 @@ class internal_entity the_dtd the_name the_swarner the_warner the_literal_value
type section_state = P_bof | P_normal of int | P_pre_eof | P_eof
(* P_normal n: The number n is the number of open inner entities *)
class entity_section (init_ent:entity) =
class entity_section (init_ent:entity) : entity =
object (self)
val ent = init_ent
val mutable state = P_bof
......@@ -1331,8 +1403,9 @@ object (self)
method process_missing_xmldecl = ()
method ext_id = ent # ext_id
method notation = ent # notation
end
;;
(* class entity_manager: has been moved to Pxp_entity_manager *)
method pxp_magic_coercion() =
raise (Coerced_entity (self :> entity))
end
;;
......@@ -15,10 +15,9 @@ open Pxp_aux
(* The subclass event_parser for the event-based interface: *)
class any_entity_id = object end ;;
class event_parser init_dtd init_config init_event_handler
init_want_start_doc init_pull_counter lit_root =
let null_entity_id = Pxp_dtd.Entity.create_entity_id() in
object (self)
inherit core_parser init_dtd init_config init_pull_counter
......@@ -31,7 +30,7 @@ object (self)
val mutable xml_standalone = false
val mutable ep_root_element_seen = false
val mutable ep_elstack = stack_create (None,"","",new any_entity_id)
val mutable ep_elstack = stack_create (None,"","",null_entity_id)
val mutable ep_early_events = []
......@@ -44,7 +43,7 @@ object (self)
raise(WF_error("Root element is not closed"));
method private init_for_xml_body() =
method private init_for_xml_body _ =
if not init_done then begin
if config.recognize_standalone_declaration then
dtd # set_standalone_declaration xml_standalone;
......@@ -315,8 +314,13 @@ let process_expr
;;
let create_pull_parser
cfg entry mgr =
let close_entities mgr =
let top = mgr#top_entity in
mgr # pop_entity_until top;
if top # is_open then ignore(top # close_entity)
let create_pull_parser cfg entry mgr =
(* Do control inversion with a queue serving as buffer, and a very special
* kind of continuations
......
......@@ -22,8 +22,8 @@
*)
open Pxp_types
open Pxp_dtd
open Pxp_types
val create_entity_manager :
?is_document:bool -> (* default: true *)
......@@ -153,8 +153,12 @@ val process_expr :
* hard to change!
*)
val close_entities : Pxp_entity_manager.entity_manager -> unit
(** Closes all entities managed by this entity manager, and frees
operating system resources like open files.
*)
val create_pull_parser :
?close:((unit -> unit) ref) ->
config ->
entry ->
Pxp_entity_manager.entity_manager ->
......@@ -180,13 +184,4 @@ val create_pull_parser :
* let next = create_pull_parser cfg entry mng in
* let stream = Stream.from(fun _ -> next())
* ]}
*
* The optional argument [close] may be set to a variable, and the
* pull parser sets this variable to a function that closes the event
* stream immediately when invoked. This implies that all resources
* of the operating system (like files) are closed. The token stream
* is immediately ended (i.e. [None] is returned). The [close] argument
* exists to allow users to stop parsing at any point. If you can ensure
* to read from the event stream until [E_end_of_stream] or [E_error] is
* encountered, it is not required to care of closing the parser engine.
*)
......@@ -116,7 +116,8 @@ let pfilter p get_ev =
;;
type filter = (unit -> event option) -> (unit -> event option)
type pull_fn = unit -> event option
type filter = pull_fn -> pull_fn
let norm_cdata_filter get_ev =
let q = Queue.create () in
......@@ -149,7 +150,7 @@ let norm_cdata_filter get_ev =
let drop_ignorable_whitespace_filter get_ev =
let found_dtd = ref None in
let found_dtd = ref (None : Pxp_dtd.dtd option) in
let elements = Stack.create() in
let pos = ref("",0,0) in
......@@ -228,7 +229,7 @@ let unwrap_document pull =
let get_doc_details() =
if not !first_event_done then (
match pull() with
| E_start_doc(v,dtd) ->
| Some(E_start_doc(v,dtd)) ->
doc_details := Some(v,dtd);
first_event_done := true
| _ ->
......@@ -248,7 +249,7 @@ let unwrap_document pull =
doc_details := Some(v,dtd);
first_event_done := true;
false
| E_end_doc | E_start_super | E_end_super | E_end_of_stream ->
| E_end_doc _ | E_start_super | E_end_super | E_end_of_stream ->
false
| E_error e ->
raise e
......@@ -611,8 +612,8 @@ let string_of_event e =
match e with
| E_start_doc(v,dtd) ->
sprintf "E_start_doc(%s,<%d>)\n" v (Oo.id dtd)
| E_end_doc ->
"E_end_doc\n"
| E_end_doc name ->
sprintf "E_end_doc(%s)" name
| E_start_tag(name,attlist,scope_opt,entid) ->
sprintf "E_start_tag(%s,%s,%s,<%d>)"
name
......@@ -641,5 +642,5 @@ let string_of_event e =
| E_error e ->
sprintf "E_error(%s)" (Pxp_types.string_of_exn e)
| E_end_of_stream ->
"E_end_of_stream\n"
"E_end_of_stream"
;;
......@@ -3,10 +3,13 @@
*
*)
open Pxp_core_types
open Pxp_core_types.I
open Pxp_lexer_types
class dummy_entity = object end
class dummy_entity =
object
method pxp_magic_coercion() : unit = raise Not_found
end
let dummy_entity = ( new dummy_entity : entity_id )
......
......@@ -41,10 +41,7 @@ type prolog_token =
| Pro_eof
type entity_id = < >
(* The class without properties; but you can still compare if two objects
* are the same.
*)
type entity_id = < pxp_magic_coercion : unit -> unit >
type token =
| Begin_entity (* Beginning of entity *)
......@@ -183,7 +180,7 @@ let string_of_tok tok =
class type lexer_factory =
object
method encoding : Pxp_core_types.rep_encoding
method encoding : Pxp_core_types.I.rep_encoding
method open_source : Pxp_reader.lexer_source -> lexer_obj
method open_string : string -> lexer_obj
method open_string_inplace : string -> lexer_obj
......@@ -192,7 +189,7 @@ end
and lexer_obj =
object
method factory : lexer_factory
method encoding : Pxp_core_types.rep_encoding
method encoding : Pxp_core_types.I.rep_encoding
method open_source : Pxp_reader.lexer_source -> unit
method open_string : string -> unit
method open_string_inplace : string -> unit
......
......@@ -27,9 +27,12 @@ type prolog_token =
| Pro_string of string (* "..." or '...' *)
| Pro_eof
type entity_id = < >
(* The class without properties; but you can still compare if two objects
* are the same.
type entity_id = < pxp_magic_coercion : unit -> unit >
(* User code should only compare [entity_id] objects. The
[pxp_magic_coercion] function will either raise
- [Not_found] if the [entity_id] is not connected with a real entity, or
- [Pxp_entity.Coerced_entity e] if the [entity_id] points to the entity
[e]
*)
type token =
......@@ -137,7 +140,7 @@ val string_of_tok : token -> string
*)
class type lexer_factory =
object
method encoding : Pxp_core_types.rep_encoding
method encoding : Pxp_core_types.I.rep_encoding
(** The (announced) character encoding of the scanned strings *)
method open_source : Pxp_reader.lexer_source -> lexer_obj
......@@ -160,7 +163,7 @@ object
method factory : lexer_factory
(** The [lexer_factory] that created this [lexer_obj] *)
method encoding : Pxp_core_types.rep_encoding
method encoding : Pxp_core_types.I.rep_encoding
(** The character encoding of the scanned strings *)
method open_source : Pxp_reader.lexer_source -> unit
......
......@@ -5,7 +5,7 @@
*)
open Pxp_core_types
open Pxp_core_types.I
open Pxp_lexer_types
let lexer_factories = Hashtbl.create 100;;
......
......@@ -5,7 +5,7 @@
*)
open Pxp_core_types
open Pxp_core_types.I
open Pxp_lexer_types
val get_lexer_factory : rep_encoding -> lexer_factory
......
......@@ -3,7 +3,7 @@
*
*)
open Pxp_core_types
open Pxp_core_types.I
open Pxp_document
open Pxp_dtd
open Pxp_aux
......
......@@ -4,11 +4,11 @@
* Copyright by Gerd Stolpmann. See LICENSE for details.
*)
open Pxp_core_types;;
open Pxp_core_types.I;;
open Netchannels;;
exception Not_competent = Pxp_core_types.Not_competent;;
exception Not_resolvable (* of exn *) = Pxp_core_types.Not_resolvable;;
exception Not_competent = Pxp_core_types.I.Not_competent;;
exception Not_resolvable (* of exn *) = Pxp_core_types.I.Not_resolvable;;
type lexer_source =
{ lsrc_lexbuf : Lexing.lexbuf Lazy.t;
......
......@@ -21,7 +21,9 @@
*)
open Pxp_core_types;;
open Pxp_core_types.I;;
(** {fixpxpcoretypes true} *) (* Set back to false at the end of the file *)
(** {2 Types and exceptions} *)
......@@ -974,3 +976,6 @@ val lookup_system_id_as_string :
resolver;;
(* Same as the equally named class *)
(** {fixpxpcoretypes false} *)
......@@ -87,6 +87,7 @@ class ['ext] tree_parser
transform_dtd id_index
=
let make_pool_string = pool_string init_config.name_pool in
let null_id = Pxp_dtd.Entity.create_entity_id() in
object (self)