Commit 2296ae60 authored by gerd's avatar gerd

Pxp_reader: Exceptions are now also in Pxp_types.

	class [combine]: removed preferred subresolvers, due
to problems with the semantics. The resolver is now always
searched from the begining.
	class [resolve_as_file]: option base_url_defaults_to_cwd
is now false by default. [true] exposes a number of problematic
effects.
	Added tests for class [combine].


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@689 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent e20bb5aa
......@@ -458,6 +458,272 @@ let t025 () =
true
;;
(**********************************************************************)
(* t03X: Combination *)
let t030() =
(* Combine a catalog SYSTEM ID with "resolve_as_file" *)
let file_pwd = "file://" ^ Sys.getcwd() ^ "/" in
let res_a =
new lookup_system_id_as_file
[ "foo", "t_a.dat" ] in
let res_b =
new resolve_as_file
~base_url_defaults_to_cwd:false
() in
let res_c =
new combine [ res_a; res_b ] in
res_c # init_rep_encoding `Enc_iso88591;
res_c # init_warner None (new drop_warnings);
let lex_c1_src = res_c # open_rid { null_rid with
rid_system = Some "foo";
rid_system_base = Some file_pwd;
} in
let lex_c1 = Lazy.force lex_c1_src.lsrc_lexbuf in
assert(nextchar lex_c1 = Some 'a');
(* The following works because catalogs ignore system_base: *)
let res_c' = res_c # clone in
let lex_c1'_src = res_c' # open_rid { null_rid with
rid_system = Some "foo";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c1' = Lazy.force lex_c1'_src.lsrc_lexbuf in
assert(nextchar lex_c1' = Some 'a');
res_c' # close_in;
(* But this does not work, because system_base is not absolute: *)
( try
let res_c' = res_c # clone in
let lex_c1'_src = res_c' # open_rid { null_rid with
rid_system = Some "t_b.dat";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c1' = Lazy.force lex_c1'_src.lsrc_lexbuf in
res_c' # close_in;
assert false
with
Not_resolvable Neturl.Malformed_URL -> ()
);
res_c # close_in;
let lex_c2_src = res_c # open_rid { null_rid with
rid_system = Some "t_b.dat";
rid_system_base = Some file_pwd;
} in
let lex_c2 = Lazy.force lex_c2_src.lsrc_lexbuf in
assert(nextchar lex_c2 = Some 'b');
(* The following works because catalogs ignore system_base: *)
let res_c' = res_c # clone in
let lex_c2'_src = res_c' # open_rid { null_rid with
rid_system = Some "foo";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c2' = Lazy.force lex_c2'_src.lsrc_lexbuf in
assert(nextchar lex_c2' = Some 'a');
res_c' # close_in;
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c2'_src = res_c' # open_rid { null_rid with
rid_system = Some "t_a.dat";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c2' = Lazy.force lex_c2'_src.lsrc_lexbuf in
assert(nextchar lex_c2' = Some 'a');
res_c' # close_in;
res_c # close_in;
true
;;
let t031() =
(* Combine a catalog PUBLIC ID with "resolve_as_file" *)
let file_pwd = "file://" ^ Sys.getcwd() ^ "/" in
let res_a =
new lookup_public_id_as_file
[ "foo", "t_a.dat" ] in
let res_b =
new resolve_as_file
~base_url_defaults_to_cwd:false
() in
let res_c =
new combine [ res_a; res_b ] in
res_c # init_rep_encoding `Enc_iso88591;
res_c # init_warner None (new drop_warnings);
let lex_c1_src = res_c # open_rid { null_rid with
rid_public = Some "foo";
rid_system_base = Some file_pwd;
} in
let lex_c1 = Lazy.force lex_c1_src.lsrc_lexbuf in
assert(nextchar lex_c1 = Some 'a');
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c1'_src = res_c' # open_rid { null_rid with
rid_public = Some "foo";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c1' = Lazy.force lex_c1'_src.lsrc_lexbuf in
assert(nextchar lex_c1' = Some 'a');
res_c' # close_in;
(* But this does not work, because system_base is None: *)
( try
let res_c' = res_c # clone in
let lex_c1'_src = res_c' # open_rid { null_rid with
rid_system = Some "t_b.dat";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c1' = Lazy.force lex_c1'_src.lsrc_lexbuf in
res_c' # close_in;
assert false
with
Not_resolvable Not_found -> ()
);
res_c # close_in;
let lex_c2_src = res_c # open_rid { null_rid with
rid_system = Some "t_b.dat";
rid_system_base = Some file_pwd;
} in
let lex_c2 = Lazy.force lex_c2_src.lsrc_lexbuf in
assert(nextchar lex_c2 = Some 'b');
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c2'_src = res_c' # open_rid { null_rid with
rid_public = Some "foo";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c2' = Lazy.force lex_c2'_src.lsrc_lexbuf in
assert(nextchar lex_c2' = Some 'a');
res_c' # close_in;
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c2'_src = res_c' # open_rid { null_rid with
rid_system = Some "t_a.dat";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c2' = Lazy.force lex_c2'_src.lsrc_lexbuf in
assert(nextchar lex_c2' = Some 'a');
res_c' # close_in;
res_c # close_in;
true
;;
let t032() =
(* Combine a mixed PUBLIC/SYSTEM ID catalog with "resolve_as_file" *)
let file_pwd = "file://" ^ Sys.getcwd() ^ "/" in
let res_a =
new lookup_id_as_file
[ Public("foo", file_pwd ^ "foo"), "t_a.dat" ] in
let res_b =
new resolve_as_file
~base_url_defaults_to_cwd:false
() in
let res_c =
new norm_system_id
(new combine [ res_a; res_b ]) in
res_c # init_rep_encoding `Enc_iso88591;
res_c # init_warner None (new drop_warnings);
let lex_c1_src = res_c # open_rid { null_rid with
rid_public = Some "foo";
rid_system_base = Some file_pwd;
} in
let lex_c1 = Lazy.force lex_c1_src.lsrc_lexbuf in
assert(nextchar lex_c1 = Some 'a');
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c1'_src = res_c' # open_rid { null_rid with
rid_public = Some "foo";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c1' = Lazy.force lex_c1'_src.lsrc_lexbuf in
assert(nextchar lex_c1' = Some 'a');
res_c' # close_in;
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c1'_src = res_c' # open_rid { null_rid with
rid_system = Some "t_b.dat";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c1' = Lazy.force lex_c1'_src.lsrc_lexbuf in
res_c' # close_in;
res_c # close_in;
let lex_c2_src = res_c # open_rid { null_rid with
rid_system = Some "t_b.dat";
rid_system_base = Some file_pwd;
} in
let lex_c2 = Lazy.force lex_c2_src.lsrc_lexbuf in
assert(nextchar lex_c2 = Some 'b');
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c2'_src = res_c' # open_rid { null_rid with
rid_public = Some "foo";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c2' = Lazy.force lex_c2'_src.lsrc_lexbuf in
assert(nextchar lex_c2' = Some 'a');
res_c' # close_in;
(* This is expected to work: *)
let res_c' = res_c # clone in
let lex_c2'_src = res_c' # open_rid { null_rid with
rid_system = Some "foo";
rid_system_base =
res_c#active_id.rid_system;
} in
let lex_c2' = Lazy.force lex_c2'_src.lsrc_lexbuf in
assert(nextchar lex_c2' = Some 'a');
res_c' # close_in;
res_c # close_in;
true
;;
(**********************************************************************)
let test f n =
......@@ -482,3 +748,7 @@ test t022 "022";;
test t023 "023";;
test t024 "024";;
test t025 "025";;
test t030 "030";;
test t031 "031";;
test t032 "032";;
......@@ -232,6 +232,10 @@ exception Method_not_applicable of string
exception Namespace_method_not_applicable of string
exception Not_competent
exception Not_resolvable of exn
let rec string_of_exn x0 =
match x0 with
At (s, x) ->
......@@ -252,6 +256,10 @@ let rec string_of_exn x0 =
"INTERNAL ERROR (method `" ^ mname ^ "' not applicable)"
| Parsing.Parse_error ->
"SYNTAX ERROR"
| Not_competent ->
"NO COMPETENT RESOLVER FOUND"
| Not_resolvable x ->
"NOT RESOLVABLE: " ^ string_of_exn x
| _ ->
"Other exception: " ^ Printexc.to_string x0
;;
......
......@@ -267,6 +267,12 @@ module type CORE_TYPES = sig
* (New in PXP 1.1)
*)
exception Not_competent
(* The resolver cannot open this kind of entity ID *)
exception Not_resolvable of exn
(* While opening the entity, the nested exception occurred *)
val string_of_exn : exn -> string
(* Converts a PXP exception into a readable string *)
......
......@@ -7,8 +7,8 @@
open Pxp_core_types;;
open Netchannels;;
exception Not_competent;;
exception Not_resolvable of exn;;
exception Not_competent = Pxp_core_types.Not_competent;;
exception Not_resolvable (* of exn *) = Pxp_core_types.Not_resolvable;;
type lexer_source =
{ lsrc_lexbuf : Lexing.lexbuf Lazy.t;
......@@ -703,7 +703,8 @@ class resolve_as_file
?(system_encoding = `Enc_utf8)
?(map_private_id = (fun _ -> raise Not_competent))
?(open_private_id = (fun _ -> raise Not_competent))
?(base_url_defaults_to_cwd = true)
?(base_url_defaults_to_cwd = false)
?(not_resolvable_if_not_found = true)
()
=
......@@ -813,6 +814,9 @@ class resolve_as_file
~out_enc: system_encoding
path_utf8 in
(* May raise Malformed_code *)
if (not not_resolvable_if_not_found) && not(Sys.file_exists path) then
raise Not_competent;
(new input_channel(open_in_bin path), None, None)
(* May raise Sys_error *)
......@@ -1045,9 +1049,8 @@ type combination_mode =
;;
class combine ?prefer ?mode rl =
class combine ?mode rl =
object (self)
val prefered_resolver = prefer
val mode = mode
val resolvers = (rl : resolver list)
val mutable internal_encoding = `Enc_utf8
......@@ -1117,9 +1120,7 @@ class combine ?prefer ?mode rl =
if active_resolver <> None then failwith "Pxp_reader.combine # open_rid";
let r, lb =
match prefered_resolver with
None -> find_competent_resolver resolvers
| Some r -> find_competent_resolver (r :: resolvers)
find_competent_resolver resolvers
in
active_resolver <- Some r;
lb
......@@ -1149,12 +1150,11 @@ class combine ?prefer ?mode rl =
let c =
match active_resolver with
None ->
new combine ?prefer:None ?mode
new combine ?mode
(List.map (fun q -> q # clone) resolvers)
| Some r ->
let r' = r # clone in
new combine
?prefer:(Some r')
?mode
(List.map
(fun q -> if q == r then r' else q # clone)
......
......@@ -350,6 +350,7 @@ class resolve_as_file :
?map_private_id: (private_id -> Neturl.url) ->
?open_private_id: (private_id -> in_channel * encoding option) ->
?base_url_defaults_to_cwd: bool ->
?not_resolvable_if_not_found:bool ->
unit ->
resolver;;
......@@ -387,13 +388,16 @@ class resolve_as_file :
* from and to get the character encoding. The URL is taken into account
* when subsequently relative SYSTEM IDs must be resolved.
*
* Option ~base_url_defaults_to_cwd: If true (the default), relative URLs
* Option ~base_url_defaults_to_cwd: If true, relative URLs
* are interpreted relative to the current working directory at the time
* the class is instantiated, but only if there is no parent URL, i.e.
* rid_system_base=None. If false, such URLs cannot be resolved. This
* option is selected by default because of backward compatibility.
* rid_system_base=None. If false (the default), such URLs cannot be resolved.
* In general, it is better to set this option to false, and to
* initialize rid_system_base properly.
*
* Option ~not_resolvable_if_not_found: If true (the default),
* "File not found" errors stop the resolution process. If false,
* "File not found" is treated as [Not_competent].
*)
val make_file_url :
......@@ -428,6 +432,9 @@ class lookup_id :
(* The general catalog class. The catalog argument specifies pairs (xid,r)
* mapping external IDs xid to subresolvers r. The subresolver is invoked
* if an entity with the corresponding xid is to be opened.
*
* Note: SYSTEM IDs are simply compared literally, without making
* relative IDs absolute. See norm_system_id below for this function.
*)
......@@ -440,6 +447,9 @@ class lookup_id_as_file :
* to files. The file is read if an entity with the corresponding xid is
* to be opened.
*
* Note: SYSTEM IDs are simply compared literally, without making
* relative IDs absolute. See norm_system_id below for this function.
*
* ~fixenc: Overrides the encoding of the file contents. By default, the
* standard rule is applied to find out the encoding of the file.
*)
......@@ -453,6 +463,9 @@ class lookup_id_as_string :
(* The catalog argument specifies pairs (xid,s) mapping external IDs xid
* to strings s. The string is read if an entity with the corresponding
* xid is to be opened.
*
* Note: SYSTEM IDs are simply compared literally, without making
* relative IDs absolute. See norm_system_id below for this function.
*)
......@@ -463,6 +476,7 @@ class lookup_public_id :
(* This is the generic builder for PUBLIC id catalog resolvers: The catalog
* argument specifies pairs (pubid, r) mapping PUBLIC identifiers to
* subresolvers.
*
* The subresolver is invoked if an entity with the corresponding PUBLIC
* id is to be opened.
*)
......@@ -479,6 +493,13 @@ class lookup_public_id_as_file :
* filenames must already be encoded in the character set the system uses
* for filenames.
*
* Note: This class does not enable the resolution of inner IDs of PUBLIC
* entities by relative SYSTEM names. To get this effect, use
* the class lookup_id, and feed it with combined
* Public(pubid,sysid) identifiers. In this case, the entity has both
* a PUBLIC and a SYSTEM ID, and resolution of inner relative SYSTEM
* names works.
*
* ~fixenc: Overrides the encoding of the file contents. By default, the
* standard rule is applied to find out the encoding of the file.
*)
......@@ -510,7 +531,10 @@ class lookup_system_id :
* Important note: Two SYSTEM IDs are considered as equal if they are
* equal in their string representation. (This may not what you want
* and may cause trouble... However, I currently do not know how to
* implement a "semantic" comparison logic.)
* implement a "semantical" comparison logic.)
*
* Note: SYSTEM IDs are simply compared literally, without making
* relative IDs absolute. See norm_system_id below for this function.
*)
......@@ -524,6 +548,9 @@ class lookup_system_id_as_file :
* filenames must already be encoded in the character set the system uses
* for filenames.
*
* Note: SYSTEM IDs are simply compared literally, without making
* relative IDs absolute. See norm_system_id below for this function.
*
* ~fixenc: Overrides the encoding of the file contents. By default, the
* standard rule is applied to find out the encoding of the file.
*)
......@@ -538,6 +565,9 @@ class lookup_system_id_as_string :
* pairs (sysid, text) mapping SYSTEM identifiers to XML text (which must
* begin with <?xml ...?>).
*
* Note: SYSTEM IDs are simply compared literally, without making
* relative IDs absolute. See norm_system_id below for this function.
*
* ~fixenc: Overrides the encoding of the strings.
*)
......@@ -549,7 +579,7 @@ class norm_system_id : resolver -> resolver
* Normalization includes:
* - Relative URLs are made absolute. If this fails, the problematic
* relative URL will be rejected.
* - .. and . and // are removed
* - .. and . and // in the middle of URLs are removed
* - Escaping of reserved characters is normalized
*
* Normalization is recommended for catalogs, e.g.
......@@ -590,7 +620,8 @@ class rewrite_system_id :
* The class normalizes URLs as norm_system_id does, before the match
* is tried.
*
* By default, URLs that do not match any pattern are rejected.
* By default, URLs that do not match any pattern are rejected
* (Not_competent).
*
* The rewritten URL is only visible within the passed subresolver.
* If the opened entity accesses other entities by relative URLs,
......@@ -628,7 +659,6 @@ type combination_mode =
class combine :
?prefer:resolver ->
?mode:combination_mode ->
resolver list ->
resolver;;
......@@ -652,8 +682,6 @@ class combine :
* resolvers are cloned and again combined. If the 'clone' method is
* invoked after 'open_rid' (i.e. while the resolver is open), only the
* active resolver is cloned.
*
* ~prefer: This is an internally used option.
*)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment