Commit 1633e8b3 authored by gerd's avatar gerd

Moved event functions to new module Pxp_event, there

are also new functions.
	Extended E_pinstr and E_pinstr_member by entity_id field.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@707 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent ac91bb99
......@@ -48,7 +48,7 @@ let event_list ?(del_pinstr_member=true) ?(keep_positions = true) generator =
let rec unroll() =
match generator() with
None -> []
| Some (E_pinstr_member(_,_)) when del_pinstr_member -> unroll()
| Some (E_pinstr_member(_,_,_)) when del_pinstr_member -> unroll()
| Some (E_position(e,l,c)) when not keep_positions -> unroll()
| Some e -> e :: unroll()
in
......@@ -72,9 +72,9 @@ let print_event =
"E_end_tag name=" ^ name
| E_char_data data ->
"E_char_data data=\"" ^ String.escaped data ^ "\""
| E_pinstr (target, value) ->
| E_pinstr (target, value, _) ->
"E_pinstr target=" ^ target ^ ",value=" ^ value
| E_pinstr_member (target, value) ->
| E_pinstr_member (target, value, _) ->
"E_pinstr_member target=" ^ target ^ ",value=" ^ value
| E_comment data ->
"E_comment data=\"" ^ String.escaped data ^ "\""
......@@ -127,11 +127,11 @@ let rec compare_event_lists l1 l2 =
| E_char_data(data1),
E_char_data(data2) ->
data1 = data2
| E_pinstr(target1,value1),
E_pinstr(target2,value2) ->
| E_pinstr(target1,value1,_),
E_pinstr(target2,value2,_) ->
target1 = target2 && value1 = value2
| E_pinstr_member(target1,value1),
E_pinstr_member(target2,value2) ->
| E_pinstr_member(target1,value1,_),
E_pinstr_member(target2,value2,_) ->
true
(* Don't compare; E_pinstr_member do not have a defined
* order
......
......@@ -10,7 +10,7 @@ OBJ = pxp_lexing.cmo pxp_type_anchor.cmo \
pxp_dfa.cmo \
pxp_entity.cmo pxp_dtd.cmo \
pxp_entity_manager.cmo \
pxp_types.cmo pxp_document.cmo \
pxp_types.cmo pxp_event.cmo pxp_document.cmo \
pxp_core_parser.cmo pxp_tree_parser.cmo pxp_ev_parser.cmo \
pxp_dtd_parser.cmo \
pxp_yacc.cmo pxp_marshal.cmo pxp_codewriter.cmo
......
......@@ -367,6 +367,7 @@ class virtual core_parser
(string*int*int) option ->
string ->
string ->
entity_id ->
unit
method private virtual event_comment :
......@@ -956,7 +957,7 @@ declaration():
| notationdecl()
{{ () }}
| pi: PI
{{ let target, value = pi in
{{ let target, value, ent_id = pi in
let pi = new proc_instruction target value config.encoding in
dtd # add_pinstr pi
}}
......@@ -2077,13 +2078,13 @@ pi():
else
None
in
let target0,value = pi in
let target0,value,ent_id = pi in
let target =
if config.enable_name_pool_for_pinstr_targets
then make_pool_string target0
else target0 in
self # event_pinstr position target value
self # event_pinstr position target value ent_id
}}
......
......@@ -105,6 +105,7 @@ object
(string*int*int) option ->
string ->
string ->
entity_id ->
unit
method private virtual event_comment :
......
......@@ -4101,7 +4101,7 @@ let solidify ?dtd cfg spec next_ev : 'ext solid_xml =
);
pos := None
| Some (E_pinstr(target,value)) ->
| Some (E_pinstr(target,value,_)) ->
(* A PI may occur everywhere between start_doc and end_doc. *)
if !doc_state = End_seen then unexpected "E_pinstr";
if !doc_state = Null then doc_state := NA;
......@@ -4122,7 +4122,7 @@ let solidify ?dtd cfg spec next_ev : 'ext solid_xml =
);
pos := None
| Some (E_pinstr_member(target,value)) ->
| Some (E_pinstr_member(target,value,_)) ->
if !doc_state = End_seen then unexpected "E_pinstr";
if !doc_state = Null then doc_state := NA;
let pi = new proc_instruction target value !eff_dtd#encoding in
......@@ -4190,13 +4190,11 @@ type 'ext flux_state =
]
class dummy = object end ;;
let liquefy_node ?(omit_end = false) ?(omit_positions = false)
(init_fstate : 'ext flux_state) =
let fstate = ref init_fstate in
let eid = new dummy in
let eid = Pxp_dtd.Entity.create_entity_id() in
let rec generate arg =
match !fstate with
`Node_start n ->
......@@ -4238,7 +4236,7 @@ let liquefy_node ?(omit_end = false) ?(omit_positions = false)
(fun target ->
List.map
(fun pi ->
E_pinstr_member(target,pi#value)
E_pinstr_member(target,pi#value,eid)
)
(n # pinstr target)
)
......@@ -4255,7 +4253,7 @@ let liquefy_node ?(omit_end = false) ?(omit_positions = false)
let (entity,line,colpos) = n # position in
let pos = E_position(entity,line,colpos) in
let value = (List.hd (n # pinstr target)) # value in
let ev = E_pinstr(target,value) in
let ev = E_pinstr(target,value,eid) in
let out =
if omit_positions then [ ev ] else [ pos; ev ] in
fstate := `Output(out, !fstate);
......@@ -4321,6 +4319,7 @@ let liquefy_node ?(omit_end = false) ?(omit_positions = false)
let liquefy_doc ?(omit_end = false) ?(omit_positions = false)
(doc : 'ext document) =
let eid = Pxp_dtd.Entity.create_entity_id() in
let fstate = ref `Start in
let rec generate arg =
match !fstate with
......@@ -4331,7 +4330,7 @@ let liquefy_doc ?(omit_end = false) ?(omit_positions = false)
(fun target ->
List.map
(fun pi ->
E_pinstr_member(target,pi#value)
E_pinstr_member(target,pi#value,eid)
)
(doc # pinstr target)
)
......
......@@ -219,7 +219,7 @@ let update_content_lines v tok =
LineEnd _ ->
v.line <- v.line + 1;
v.column <- 0;
| (PI(_,_)|PI_xml _|Cdata _) ->
| (PI(_,_,_)|PI_xml _|Cdata _) ->
count_lines v.linecount v.lexobj#lexeme;
update_lines v;
| _ ->
......@@ -500,11 +500,13 @@ class virtual entity the_dtd the_name the_swarner the_warner init_encoding =
cd
(* If there are CRLF sequences in a PI value, normalize them, too *)
| PI(name,value) as pi ->
| PI(name,value,_) as pi ->
if v.normalize_newline then
PI(name, normalize_line_separators v.lfactory value)
PI(name,
normalize_line_separators v.lfactory value,
(self :> entity_id))
else
pi
PI(name, value, (self :> entity_id))
(* Attribute values: If they are already normalized, they are turned
* into Attval_nl_normalized. This is detected by other code.
......
......@@ -159,7 +159,7 @@ object (self)
event_handler(E_char_data(data))
method private event_pinstr position target value =
method private event_pinstr position target value ent_id =
if config.enable_pinstr_nodes &&
(n_tags_open > 0 || config.enable_super_root_node) then begin
let ev_list =
......@@ -167,14 +167,14 @@ object (self)
Some(e,l,c) -> [ E_position(e,l,c) ]
| None -> []
)
@ [ E_pinstr(target,value) ] in
@ [ E_pinstr(target,value,ent_id) ] in
if init_done then
List.iter event_handler ev_list
else
ep_early_events <- ep_early_events @ ev_list
end
else begin
let ev = E_pinstr_member(target,value) in
let ev = E_pinstr_member(target,value,ent_id) in
if init_done then
event_handler ev
else
......@@ -417,110 +417,3 @@ let create_pull_parser
record_error exn;
return_result
;;
type 'a filter = ('a -> event option) -> ('a -> event option)
let norm_cdata_filter get_ev =
let q = Queue.create () in
let rec get_ev' thing =
try
Queue.pop q
with
Queue.Empty ->
let ev = get_ev thing in
match ev with
Some (E_char_data s) ->
if s = "" then
get_ev' thing
else
gather_string [s] thing
| _ ->
ev
and gather_string sl thing =
let ev = get_ev thing in
match ev with
Some (E_char_data s) ->
gather_string (s :: sl) thing
| _ ->
Queue.add (Some(E_char_data(String.concat "" (List.rev sl)))) q;
Queue.add ev q;
get_ev' thing
in
get_ev'
;;
let drop_ignorable_whitespace_filter get_ev =
let found_dtd = ref None in
let elements = Stack.create() in
let pos = ref("",0,0) in
let has_ignorable_ws elname =
match !found_dtd with
Some dtd ->
( try
let el = dtd # element elname in
let cm = el # content_model in
( match cm with
Regexp _ -> true
| Mixed ml -> not (List.mem MPCDATA ml)
| _ -> false
)
with
Undeclared -> false
| Validation_error _ -> false (* element not found *)
)
| None ->
false
in
let pop() =
try
ignore(Stack.pop elements)
with
Stack.Empty ->
failwith "Pxp_ev_parser.drop_ignorable_whitespace_filter: bad event stream"
in
let rec get_ev' thing =
let ev = get_ev thing in
match ev with
Some(E_start_doc(_,dtd)) ->
if !found_dtd <> None then
failwith "Pxp_ev_parser.drop_ignorable_whitespace_filter: More than one E_start_doc event";
found_dtd := Some dtd;
ev
| Some(E_position(e,line,col)) ->
pos := (e,line,col);
ev
| Some(E_start_tag(name,_,_,_)) ->
let ign_ws = has_ignorable_ws name in
Stack.push ign_ws elements;
ev
| Some(E_end_tag(name,_)) ->
pop();
ev
| Some(E_char_data s) ->
let ign_ws = try Stack.top elements with Stack.Empty -> true in
if ign_ws then (
if not (Pxp_lib.only_whitespace s) then
let (e,line,col) = !pos in
let where = "In entity " ^ e ^ ", at line " ^
string_of_int line ^ ", position " ^
string_of_int col ^ ":\n" in
raise(At(where,WF_error("Data not allowed here")))
else
(* drop this event, and continue with next: *)
get_ev' thing
)
else ev
| _ ->
ev
in
get_ev'
;;
......@@ -137,7 +137,7 @@ val create_pull_parser :
config ->
entry ->
Pxp_entity_manager.entity_manager ->
('a -> event option)
(unit -> event option)
(* let next_event = create_pull_parser cfg entry mng in
* let ev = next_event()
*
......@@ -151,46 +151,13 @@ val create_pull_parser :
* error happens. Only the E_error event is generated (as last event).
*
* To create a stream of events, just do:
* let stream = Stream.from(create_pull_parser cfg entry mng)
* let next = create_pull_parser cfg entry mng in
* let stream = Stream.from(fun _ -> next())
*)
(**********************************************************************)
(* Filters *)
(**********************************************************************)
(* Filters are currently only available for the pull model (which is
* the more general one).
*
* Example:
* let stream = Stream.from
* (norm_cdata_filter(create_pull_parser cfg entry mng))
*)
type 'a filter = ('a -> event option) -> ('a -> event option)
val norm_cdata_filter : 'a filter
(* This filter
* - removes empty E_char_data events
* - concatenates adjacent E_char_data events
* but does not touch any other parts of the event stream.
*)
val drop_ignorable_whitespace_filter : 'a filter
(* This filter
* - checks whether character data between elements in a
* "regexp" or "non-PCDATA mixed" content model consists
* only of whitespace, and
* - removes these whitespace characters from the event stream.
* If the check fails, a WF_Error will be raised.
*
* This filter works only if the DTD found in the event stream
* actually contains element declarations. This is usually enabled
* by including the `Extend_dtd_fully or `Val_mode_dtd options to
* the [entry] passed to the [create_pull_parser] call. Furthermore,
* there must be an E_start_doc event.
*
* This filter does not perform any other validation checks.
*)
(* Filters have been moved to Pxp_event! *)
(* TODO: ID filter that creates an ID index *)
(* For conversions from trees to event streams, and vice versa,
* see Pxp_document.
*)
This diff is collapsed.
(* $Id$
* ----------------------------------------------------------------------
* PXP: The polymorphic XML parser for Objective Caml.
* Copyright by Gerd Stolpmann. See LICENSE for details.
*)
open Pxp_types
open Pxp_dtd
(**********************************************************************)
(* Event streams and lists *)
(**********************************************************************)
val to_list : (unit -> event option) -> event list
(* Fetch all events from the event stream, and return the corresponding
* list of events.
*)
val of_list : event list -> (unit -> event option)
(* Pull the events from the input list *)
val concat : (unit -> event option) list ->
(unit -> event option)
(* Pull the events from the streams in turn *)
val iter : (event -> unit) -> (unit -> event option) -> unit
(* Iterates over the events of the stream and calls the function *)
(* Missing: map, fold, ... *)
val extract : event -> (unit -> event option) -> (unit -> event option)
(* let next' = extract e next:
* Extracts a subexpression from the stream [next] prepended by [e].
* A subexpression consists of either
* - a single data, comment, PI, or error event
* - a start tag, either of an element, a super root, or a document,
* until the corresponding end tag
* - a position event followed by a subexpression
* The returned stream contains all events of the subexpression.
* When the extracted stream is read, the original stream is read, too.
*
* Example:
* let l = [ E_pinstr; E_start_tag; E_data; E_start_tag; E_end_tag;
* E_comment; E_end_tag; E_data ];;
* let g = of_list l;;
* g();;
* let Some e = g();; (* e = E_start_tag *)
* let g' = extract e g;;
* g'();; (* returns Some E_start_tag *)
* ...
* g'();; (* returns Some E_end_tag *)
* g'();; (* returns None, end of subexpression *)
* g();; (* returns Some E_data *)
* g();; (* returns None *)
*)
(**********************************************************************)
(* Filters *)
(**********************************************************************)
type filter = (unit -> event option) -> (unit -> event option)
val norm_cdata_filter : filter
(* This filter
* - removes empty E_char_data events
* - concatenates adjacent E_char_data events
* but does not touch any other parts of the event stream.
*)
val drop_ignorable_whitespace_filter : filter
(* This filter
* - checks whether character data between elements in a
* "regexp" or "non-PCDATA mixed" content model consists
* only of whitespace, and
* - removes these whitespace characters from the event stream.
* If the check fails, a WF_Error will be raised.
*
* This filter works only if the DTD found in the event stream
* actually contains element declarations. This is usually enabled
* by including the `Extend_dtd_fully or `Val_mode_dtd options to
* the [entry] passed to the [create_pull_parser] call. Furthermore,
* there must be an E_start_doc event.
*
* This filter does not perform any other validation checks.
*)
val pfilter : (event -> bool) -> filter
(* Filters an event stream by a predicate
*
* Example: Remove comments:
* pfilter (function E_comment _ -> false | _ -> true) g
*)
(* Missing: ID check *)
(**********************************************************************)
(* Printing *)
(**********************************************************************)
type dtd_style =
[ `Ignore
| `Include
| `Reference
]
val write_events :
?default:string -> (* Default: none *)
?dtd_style:dtd_style -> (* Default: `Include *)
output_stream ->
encoding ->
rep_encoding ->
(unit -> event option) ->
unit
(* Writes the events to the [output_stream]. The events must be encoded
* as indicated by the [rep_encoding] argument, but the output is written
* as specified by the [encoding] argument.
*
* The normalized namespace prefixes are declared as needed. Additionally,
* one can set the default namespace by passing [default], which must be
* the normalized prefix of the default namespace.
*
* For E_doc_start events, the DTD may be written. This is controlled by
* [dtd_style]:
* - `Ignore: No DOCTYPE clause is written
* - `Include: The DOCTYPE clause is written, and the DTD is included
* in the internal subset
* - `Reference: The DOCTYPE clause is written as a reference to an
* external DTD
*)
val display_events :
?dtd_style:dtd_style -> (* Default: `Include *)
output_stream ->
encoding ->
rep_encoding ->
(unit -> event option) ->
unit
(* Writes the events to the [output_stream]. The events must be encoded
* as indicated by the [rep_encoding] argument, but the output is written
* as specified by the [encoding] argument.
*
* Namespace prefixes are declared as defined in the namespace scopes.
* Missing prefixes are invented on the fly.
*
* The way the DTD is printed can be set as in [write_events].
*)
......@@ -160,7 +160,7 @@
let len_param = String.length pi - s_len in
(* It is possible that len_param = -1 *)
if len_param >= 1 then
PI(s_name, String.sub pi s_len len_param)
PI(s_name, String.sub pi s_len len_param, dummy_entity)
else
PI(s_name, "")
PI(s_name, "", dummy_entity)
......@@ -90,7 +90,7 @@ type token =
| Tag_beg of (string*entity_id) (* <name *)
| Tag_end of (string*entity_id) (* </name *)
| PI of (string*string) (* <?name ... ?> *)
| PI of (string*string*entity_id) (* <?name ... ?> *)
| PI_xml of (prolog_token list) (* <?xml ...?> *)
| Cdata of string (* <![CDATA[...]]> *)
| CRef of int (* &#digits; *)
......
......@@ -76,7 +76,7 @@ type token =
| Tag_beg of (string*entity_id) (* <name *)
| Tag_end of (string*entity_id) (* </name *)
| PI of (string*string) (* <?name ... ?> *)
| PI of (string*string*entity_id) (* <?name ... ?> *)
| PI_xml of (prolog_token list) (* <?xml ...?> *)
| Cdata of string (* <![CDATA[...]]> *)
| CRef of int (* &#digits; *)
......
......@@ -422,7 +422,7 @@ object (self)
else
current_data <- data :: current_data
method private event_pinstr position target value =
method private event_pinstr position target value ent_id =
(* position: The position of the processing instruction
* target: The name following <?
* value: The string following the name
......
......@@ -224,8 +224,8 @@ type event =
Pxp_lexer_types.entity_id)
| E_end_tag of (string * Pxp_lexer_types.entity_id)
| E_char_data of string
| E_pinstr of (string * string)
| E_pinstr_member of (string * string)
| E_pinstr of (string * string * Pxp_lexer_types.entity_id)
| E_pinstr_member of (string * string * Pxp_lexer_types.entity_id)
| E_comment of string
| E_start_super
| E_end_super
......
......@@ -643,8 +643,8 @@ type event =
Pxp_lexer_types.entity_id)
| E_end_tag of (string * Pxp_lexer_types.entity_id)
| E_char_data of string
| E_pinstr of (string * string)
| E_pinstr_member of (string * string)
| E_pinstr of (string * string * Pxp_lexer_types.entity_id)
| E_pinstr_member of (string * string * Pxp_lexer_types.entity_id)
| E_comment of string
| E_start_super
| E_end_super
......@@ -696,6 +696,14 @@ type event =
* E_error(exn):
* this last event indicates that the parser has terminated with
* error
*
* Note Pxp_lexer_types.entity_id: currently, this is just < >, i.e.
* the class type without properties. It is planned, however, that
* one can at least query the base URI of the entity. The best
* way of dealing with this parameter for now:
* - When parsing events, ignore it
* - When creating events, use Pxp_dtd.Entity.create_entity_id to
* generate new IDs.
*)
......
......@@ -953,7 +953,7 @@ let generate_event_generator
let target_expr = generate_for_string_expr target in
let value_expr = generate_for_string_expr value in
let loc = mkloc p1 p2 in
[ `Single, <:expr< Pxp_types.E_pinstr($target_expr$,$value_expr$) >> ]
[ `Single, <:expr< Pxp_types.E_pinstr($target_expr$,$value_expr$,_eid) >> ]
| (`Super subnodes,p1,p2) ->
let subnodes_expr = generate_for_nodelist_expr nsmode subnodes in
let loc = mkloc p1 p2 in
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment