Commit b8498edb authored by gerd's avatar gerd

Revised namespace handling: There are now namespace

scope objects. Retrieving the display prefix is now easier.
Method "display" to write the document with display prefixes.
The namespace_info stuff has been removed.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@690 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent 2296ae60
......@@ -23,14 +23,6 @@ end;;
module Str_hashtbl = Hashtbl.Make(HashedString);;
module StringOrd = struct
type t = string
let compare = (compare : string -> string -> int)
end;;
module StringMap = Map.Make(StringOrd);;
(* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
let character ?swarner enc warner k =
assert (k>=0);
if (k >= 0xd800 & k < 0xe000) or (k >= 0xfffe & k <= 0xffff) or k > 0x10ffff
......
......@@ -174,14 +174,34 @@ class virtual core_parser
* by the subclass, but this is completely voluntary
*)
val mutable ns_stack = Stack.create()
(* Stack of previous ns_scope, ns_cache, ns_default_normprefix *)
val mutable ns_scope = None
(* The current namespace_scope *)
val mutable ns_cache = StringMap.empty
(* The cache mapping display prefixes to normprefixes *)
val mutable ns_default_normprefix = ""
(* The default normprefix, or "" if none *)
(*
val mutable src_norm_mapping = [ "xml", "xml" ]
(* Namespace processing: Contains pairs (srcprefix, normprefix).
* srcprefix = "!" is used as guard.
*)
val mutable default_normprefix = ""
(* Namespace_processing: The default normprefix, or "" if none *)
*)
method private init_ns_processing (mng:namespace_manager) =
let scope =
new namespace_scope_impl
mng None [ "xml", mng # get_primary_uri "xml" ] in
ns_scope <- Some scope;
ns_cache <- StringMap.empty;
ns_default_normprefix <- ""
method private push_src_norm_mapping (mng:namespace_manager) name attlist =
(* [mng]: namespace manager
......@@ -189,6 +209,10 @@ class virtual core_parser
* [attlist]: source attribute list
* returns quadruple (src_prefix, localname, norm_name, norm_attlist)
*)
(* Save state: *)
Stack.push (ns_scope, ns_cache, ns_default_normprefix) ns_stack;
let split_attlist =
List.map
(fun (name, value) -> namespace_split name, value)
......@@ -211,15 +235,12 @@ class virtual core_parser
)
split_attlist;
(* Apply xmlns_attlist: *)
src_norm_mapping <- ( "!", default_normprefix ) :: src_norm_mapping;
(* add guard *)
let mapping = ref [] in
List.iter
(fun (srcprefix, uri) ->
let normprefix =
mng # lookup_or_add_namespace srcprefix uri in
src_norm_mapping <- (srcprefix, normprefix) :: src_norm_mapping;
mapping := (srcprefix, uri) :: !mapping;
)
!xmlns_attlist;
......@@ -228,15 +249,25 @@ class virtual core_parser
None -> ()
| Some "" ->
(* Delete default namespace: *)
default_normprefix <- "";
ns_default_normprefix <- "";
mapping := ("", "") :: !mapping;
| Some uri ->
let normprefix =
try mng # get_normprefix uri
with Not_found ->
mng # lookup_or_add_namespace "default" uri
in
default_normprefix <- normprefix
ns_default_normprefix <- normprefix;
mapping := ("", uri) :: !mapping;
);
(* Create new scope: *)
let scope =
if !mapping = [] then
ns_scope
else
Some(new namespace_scope_impl mng ns_scope !mapping) in
ns_scope <- scope;
(* Normalize the regular_attlist: *)
let norm_attlist =
......@@ -262,20 +293,12 @@ class virtual core_parser
(prefix, localname, norm_name, norm_attlist)
method private pop_src_norm_mapping () =
(* Pop until the guard is found *)
let rec pop m =
match m with
[] ->
assert false
| ("!",d)::m' ->
default_normprefix <- d;
src_norm_mapping <- m'
| (_,_)::m' ->
pop m'
in
pop src_norm_mapping
let (scope, cache, default_normprefix) = Stack.pop ns_stack in
ns_scope <- scope;
ns_cache <- cache;
ns_default_normprefix <- default_normprefix
method private normalize_namespace_prefix
......@@ -284,18 +307,28 @@ class virtual core_parser
raise(Namespace_error("Found several colons in a name"));
if prefix = "" then begin
(* No prefix *)
if apply_default && default_normprefix <> "" then
default_normprefix ^ ":" ^ localname
if apply_default && ns_default_normprefix <> "" then
ns_default_normprefix ^ ":" ^ localname
else
localname
end
else begin
(* Prefix exists *)
let normprefix =
try List.assoc prefix src_norm_mapping
try
StringMap.find prefix ns_cache
with
Not_found ->
raise(Namespace_error ("Namespace prefix not declared: " ^ prefix))
let scope =
match ns_scope with Some s -> s | None -> assert false in
let np =
try scope # normprefix_of_display_prefix prefix
with
Not_found ->
raise(Namespace_error ("Namespace prefix not declared: " ^ prefix))
in
ns_cache <- StringMap.add prefix np ns_cache;
np
in
normprefix ^ ":" ^ localname
end
......
......@@ -66,13 +66,14 @@ object
val pull_counter_limit : int
val mutable pull_counter : int
val mutable p_internal_subset : bool
val mutable src_norm_mapping : (string * string ) list
val mutable default_normprefix : string
val mutable ns_scope : Pxp_dtd.namespace_scope option
method parse : context -> extended_entry -> unit
method private only_whitespace : string -> unit
method private init_ns_processing : Pxp_dtd.namespace_manager -> unit
method private push_src_norm_mapping :
namespace_manager -> string -> (string * string) list ->
(string * string * string *
......
......@@ -4,6 +4,8 @@
* Copyright by Gerd Stolpmann. See LICENSE for details.
*)
module StringMap = Map.Make(String);;
type private_id = Pxp_type_anchor.private_id
type ext_id = Pxp_type_anchor.ext_id =
......
......@@ -10,6 +10,8 @@
*)
module type CORE_TYPES = sig
module StringMap : Map.S with type key = string
type ext_id = Pxp_type_anchor.ext_id =
System of string
| Public of (string * string)
......
......@@ -4,6 +4,7 @@
*)
open Pxp_aux
open Pxp_core_types
module Graph = struct
......
This diff is collapsed.
This diff is collapsed.
......@@ -27,6 +27,8 @@ type validation_record =
;;
(* class type? *)
class namespace_manager =
object (self)
val uri_of_prefix = Hashtbl.create 10 (* not unique *)
......@@ -98,6 +100,91 @@ object (self)
;;
class type namespace_scope =
object
method namespace_manager : namespace_manager
method parent_scope : namespace_scope option
method declaration : (string * string) list
method effective_declaration : (string * string) list
method display_prefix_of_uri : string -> string
method display_prefix_of_normprefix : string -> string
method uri_of_display_prefix : string -> string
method normprefix_of_display_prefix : string -> string
end
;;
module StrSet = Set.Make(String);;
class namespace_scope_impl mng parent_opt decl : namespace_scope =
object(self)
method namespace_manager = mng
method parent_scope = parent_opt
method declaration = decl
method effective_declaration =
let rec collect visible d s =
match d with
| ("", "") :: d' ->
if StrSet.mem "" visible then
collect visible d' s (* no effect *)
else
collect (StrSet.add "" visible) d' s (* hide inner default *)
| (dp, uri) :: d' ->
if StrSet.mem dp visible then
collect visible d' s
else
(dp, uri) :: collect (StrSet.add dp visible) d' s
| [] ->
( match s # parent_scope with
Some s' ->
collect visible s'#declaration s'
| None ->
[]
)
in
collect StrSet.empty self#declaration (self : #namespace_scope :> namespace_scope)
method display_prefix_of_uri uri =
try
fst(List.find (fun (p,u) -> u = uri) decl)
with
Not_found ->
( match parent_opt with
Some pa -> pa # display_prefix_of_uri uri
| None -> raise Not_found
)
method display_prefix_of_normprefix np =
let uris = mng # get_uri_list np in
try
fst(List.find (fun (p,u) -> List.mem u uris) decl)
with
Not_found ->
( match parent_opt with
Some pa -> pa # display_prefix_of_normprefix np
| None -> raise Not_found
)
method uri_of_display_prefix dp =
try
List.assoc dp decl
with
Not_found ->
( match parent_opt with
Some pa -> pa # uri_of_display_prefix dp
| None -> raise Not_found
)
method normprefix_of_display_prefix dp =
let uri = self # uri_of_display_prefix dp in
mng # get_normprefix uri
end
;;
class dtd ?swarner the_warner init_encoding =
object (self)
val mutable root = (None : string option)
......
......@@ -52,7 +52,7 @@ class namespace_manager :
(* This class manages mappings from URIs to normalized prefixes. For every
* namespace a namespace_manager object contains a set of mappings
* uri1 |-> np, uri2 |-> np, ..., uriN |-> np.
* The normalized prefix np is characterical of the namespace, and
* The normalized prefix np is characterstic of the namespace, and
* identifies the namespace uniquely.
* The first URI uri1 is the primary URI, the other URIs are aliases.
* The following operations are supported:
......@@ -125,6 +125,85 @@ class namespace_manager :
;;
(** The recursive class type [namespace_scope] represents the original
* namespace declarations found in the XML text. A single [namespace_scope]
* object contains a list of declared namespaces
* {[ [ (dp1, uri1); (dp2; uri2); ... ] ]}
* corresponding to the "xmlns"-type declarations found in a single
* XML element:
* {[ <element xmlns:dp1="uri1" xmlns:dp2="uri2" ... > ]}
* For the declaration of a default namespace [xmlns="uri"] the pair
* [("",uri)] must be included in the list. The special pair [("","")]
* means that the former default namespace is "undeclared".
*
* Furthermore, the [namespace_scope] object may have a parent
* [namespace_scope], representing the namespace declarations in the
* surrounding XML text.
*
* The [namespace_scope] objects are connected with the [namespace_manager]
* to allow translations from the namespace prefixes found in the XML
* text (also called "display prefixes" from now on) to the normalized
* prefixes stored in the [namespace_manager], and vice versa.
*
* The [namespace_scope] objects are intentionally immutable in order to
* allow memory sharing.
*)
class type namespace_scope =
object
method namespace_manager : namespace_manager
(** Returns the [namespace_manager] to which this scope object is
* connected
*)
method parent_scope : namespace_scope option
(** Returns the parent object, if any *)
method declaration : (string * string) list
(** Returns the list of namespace declarations of this scope (i.e.
* the declarations in parent objects are not considered). The
* list contains pairs [ (display_prefix, uri) ].
*)
method effective_declaration : (string * string) list
(** Returns the list of namespace declarations of this scope and
* all parent scopes. The list contains pairs [ (display_prefix, uri) ].
* Prefixes hidden by earlier declarations are suppressed in the list
*)
method display_prefix_of_uri : string -> string
(** Translates the URI to the corresponding display prefix as declared
* in this object or any parent object. Raises [Not_found] when the
* declaration cannot be found.
*)
method display_prefix_of_normprefix : string -> string
(** Translates the normalized prefix to the corresponding display
* prefix as declared in this object or any parent object. Raises
* [Not_found] when the declaration cannot be found, or the
* normalized prefix is unknown to the namespace manager.
*)
method uri_of_display_prefix : string -> string
(** Translates the display prefix to the corresponding URI as
* declared in this object or any parent object. Raises
* [Not_found] when the declaration cannot be found.
*)
method normprefix_of_display_prefix : string -> string
(** Translates the display prefix to the corresponding normalized
* prefix as declared in this object or any parent object. Raises
* [Not_found] when the declaration cannot be found, or the
* namespace manager does not know the namespace.
*)
end
;;
class namespace_scope_impl :
namespace_manager ->
namespace_scope option ->
(string * string) list ->
namespace_scope
(** An implementation of [namespace_scope]. New scopes are created by
* {[ new namespace_scope_impl mng parent_opt decl ]}
* where [mng] is the namespace managaer, [parent_opt] is the optional
* parent scope, and [decl] is the declaration list.
*)
class dtd :
(* Creation:
* new dtd
......
......@@ -50,6 +50,11 @@ object (self)
event_handler (E_start_doc(xml_version,
xml_standalone,
dtd));
(* Init namespace processing, if necessary: *)
( match config.enable_namespace_processing with
None -> ()
| Some mng -> self # init_ns_processing mng
);
init_done <- true;
List.iter event_handler ep_early_events;
ep_early_events <- [];
......@@ -94,6 +99,8 @@ object (self)
(* enabled namespaces *)
let (src_prefix, localname, norm_name, norm_attlist) =
self # push_src_norm_mapping mng name attlist in
let scope = match ns_scope with Some s -> s | None -> assert false
in
let mixed_attlist =
List.map (fun (orig_prefix, localname, norm_name, value) ->
(if orig_prefix = ""
......@@ -104,7 +111,7 @@ object (self)
if not emptiness then
stack_push (position, name, norm_name, tag_beg_entid) ep_elstack;
event_handler(E_ns_start_tag
(name,norm_name,mixed_attlist,tag_beg_entid));
(name,norm_name,mixed_attlist,scope,tag_beg_entid));
if emptiness then (
self # pop_src_norm_mapping();
event_handler(E_ns_end_tag(name,norm_name,tag_beg_entid));
......@@ -477,7 +484,7 @@ let drop_ignorable_whitespace_filter get_ev =
let ign_ws = has_ignorable_ws name in
Stack.push ign_ws elements;
ev
| Some(E_ns_start_tag(_,name,_,_)) ->
| Some(E_ns_start_tag(_,name,_,_,_)) ->
let ign_ws = has_ignorable_ws name in
Stack.push ign_ws elements;
ev
......
......@@ -245,6 +245,11 @@ object (self)
early_material <- [];
(* Move the super root or the emulation to the stack: *)
stack_push (super_root, "", (self :> entity_id)) elstack;
(* Init namespace processing, if necessary: *)
( match config.enable_namespace_processing with
None -> ()
| Some mng -> self # init_ns_processing mng
);
init_done <- true;
end
......@@ -311,14 +316,9 @@ object (self)
spec dtd norm_name norm_attlist
in
if config.enable_namespace_info then begin
let info =
new namespace_info_impl
src_prefix
element
( ("!", default_normprefix) :: src_norm_mapping) in
element # set_namespace_info (Some info);
end;
let scope = match ns_scope with Some s -> s | None -> assert false
in
element # set_namespace_scope scope;
element
......
......@@ -12,6 +12,7 @@ open Pxp_entity_manager
open Pxp_reader
open Netchannels
type config =
{ warner : collect_warnings;
swarner : symbolic_warnings option;
......@@ -33,7 +34,6 @@ type config =
(* enable_name_pool_for_notation_names : bool; *)
enable_name_pool_for_pinstr_targets : bool;
enable_namespace_processing : namespace_manager option;
enable_namespace_info : bool;
escape_contents :
(Pxp_lexer_types.token -> entity_manager -> string) option;
escape_attributes :
......@@ -63,7 +63,6 @@ let default_config =
enable_name_pool_for_pinstr_targets = false;
enable_name_pool_for_attribute_values = false;
enable_namespace_processing = None;
enable_namespace_info = false;
escape_contents = None;
escape_attributes = None;
debugging_mode = false;
......@@ -223,7 +222,7 @@ type event =
| E_start_tag of (string * (string * string) list *
Pxp_lexer_types.entity_id)
| E_ns_start_tag of (string * string * (string * string * string) list *
Pxp_lexer_types.entity_id)
namespace_scope * Pxp_lexer_types.entity_id)
| E_end_tag of (string * Pxp_lexer_types.entity_id)
| E_ns_end_tag of (string * string * Pxp_lexer_types.entity_id)
| E_char_data of string
......
......@@ -338,18 +338,6 @@ type config =
* E_end_tag, respectively.
*)
enable_namespace_info : bool;
(* Whether to set the namespace_info slot of elements.
* This option has only an effect if enable_namespace_processing is
* non-None.
*
* Warning! This option requires a lot of memory!
*
* Default: false
*
* Event-based parser: this option is ignored.
*)
(* Experimental stuff: *)
escape_contents :
......@@ -654,7 +642,7 @@ type event =
| E_start_tag of (string * (string * string) list *
Pxp_lexer_types.entity_id)
| E_ns_start_tag of (string * string * (string * string * string) list *
Pxp_lexer_types.entity_id)
Pxp_dtd.namespace_scope * Pxp_lexer_types.entity_id)
| E_end_tag of (string * Pxp_lexer_types.entity_id)
| E_ns_end_tag of (string * string * Pxp_lexer_types.entity_id)
| E_char_data of string
......@@ -673,10 +661,11 @@ type event =
* <name attlist>
* only used in non-namespace mode
*
* E_ns_start_tag (orig_name, norm_name, attlist, entid)
* E_ns_start_tag (orig_name, norm_name, attlist, scope, entid)
* only used in namespace mode; orig_name is the element as found
* in the XML text; norm_name is the normalized element name;
* attlist consists of triples (orig_name, norm_name, value).
* [scope] is the namespace scope object.
*
* E_end_tag (name, entid):
* </name>
......
......@@ -35,7 +35,6 @@ type config = Pxp_types.config =
enable_name_pool_for_attribute_values : bool;
enable_name_pool_for_pinstr_targets : bool;
enable_namespace_processing : Pxp_dtd.namespace_manager option;
enable_namespace_info : bool;
escape_contents :
(Pxp_lexer_types.token -> Pxp_entity_manager.entity_manager ->
string) option;
......@@ -96,7 +95,7 @@ type event = Pxp_types.event =
| E_end_doc
| E_start_tag of (string * (string * string) list * Pxp_lexer_types.entity_id)
| E_ns_start_tag of (string * string * (string * string * string) list *
Pxp_lexer_types.entity_id)
Pxp_dtd.namespace_scope * Pxp_lexer_types.entity_id)
| E_end_tag of (string * Pxp_lexer_types.entity_id)
| E_ns_end_tag of (string * string * Pxp_lexer_types.entity_id)
| E_char_data of string
......
......@@ -49,7 +49,6 @@ type config = Pxp_types.config =
enable_name_pool_for_attribute_values : bool;
enable_name_pool_for_pinstr_targets : bool;
enable_namespace_processing : Pxp_dtd.namespace_manager option;
enable_namespace_info : bool;
escape_contents :
(Pxp_lexer_types.token -> Pxp_entity_manager.entity_manager ->
string) option;
......@@ -154,7 +153,7 @@ type event = Pxp_types.event =
| E_end_doc
| E_start_tag of (string * (string * string) list * Pxp_lexer_types.entity_id)
| E_ns_start_tag of (string * string * (string * string * string) list *
Pxp_lexer_types.entity_id)
Pxp_dtd.namespace_scope * Pxp_lexer_types.entity_id)
| E_end_tag of (string * Pxp_lexer_types.entity_id)
| E_ns_end_tag of (string * string * Pxp_lexer_types.entity_id)
| E_char_data of string
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment