Commit 32dcf141 authored by gerd's avatar gerd

new: return the internal lexbuf of a lexer_obj


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@744 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent b0a94536
......@@ -66,6 +66,8 @@ for PXP; if you are looking for the stable distribution, please go
<p>Improvement: The parser also accepts a BOM as UTF-8 sequence.
Also, the autodetection of the encoding for UTF-16 has been
enhanced</p>
<p>Fix: Pxp_marshal module also keeps namespace scope objects</p>
<p>Addition: method lexbuf in lexer_obj</p>
</li>
<li><p><em>1.2.0test*:</em> New ~minimization option for the
[write] and [display] methods (user wish).</p>
......
......@@ -9,7 +9,7 @@
(**********************************************************************)
open Pxp_yacc
open Pxp_ev_parser
open Pxp_lexer_types
open Pxp_types
open Expr
......@@ -19,30 +19,8 @@ open Printf
(* dump_event: dumps a single parsing event *)
let dump_event =
function
E_start_doc(v,sa,dtd) ->
printf "E_start_doc version=%s standalone=%b\n" v sa
| E_end_doc ->
printf "E_end_doc\n"
| E_start_tag(name,attlist,_) ->
printf "E_start_tag %s %s\n" name
(String.concat " " (List.map (fun (n,v) -> n ^ "=" ^ v) attlist))
| E_end_tag(name,_) ->
printf "E_end_tag %s\n" name
| E_char_data data ->
printf "E_char_data %s\n" data
| E_pinstr(target,data) ->
printf "E_pinstr %s %s\n" target data
| E_comment data ->
printf "E_comment %s\n" data
| E_position(ent,line,col) ->
printf "E_position %s line=%d col=%d\n" ent line col
| E_error e ->
printf "E_error %s\n" (Printexc.to_string e)
| E_end_of_stream ->
printf "E_end_of_stream\n"
;;
let dump_event e =
print_endline (Pxp_event.string_of_event e)
(* parse: prints the events while parsing the passed string *)
......@@ -54,7 +32,6 @@ let parse s =
(create_entity_manager default_config (from_string s))
dump_event;
flush stdout
;;
(* curly_parse: demonstrates how to use escape_contents. The character
......@@ -99,7 +76,10 @@ let curly_parse s =
line_col := add_col n !line_col;
tok
in
let lexbuf = mng # current_lexbuf in
let lexbuf =
match mng # current_lexer_obj # lexbuf with
| `Ocamllex lexbuf -> lexbuf
| `Netulex _ -> failwith "Netulex lexbuf not supported" in
let value = topexpr scan lexbuf in
printf "Result of expression: %d\n" value;
mng # update_line_column !line_col;
......@@ -168,11 +148,8 @@ let curly_parse s =
* This is currently very experimental!
*)
class any_entity_id = object end ;;
(* An entity ID is an object without properties except identity *)
let rec_curly_parse s =
let ent_id_guard = new any_entity_id in
let ent_id_guard = Pxp_dtd.Entity.create_entity_id() in
let base_config = default_config in
let rec escape ent_id tok mng =
......
......@@ -883,7 +883,10 @@ the character following the right curly brace.
{[
let parse_number mng =
let lexbuf = mng # current_lexbuf in (* FIXME: DOES NOT EXIST ANYMORE *)
let lexbuf =
match mng # current_lexer_obj # lexbuf with
| `Ocamllex lexbuf -> lexbuf
| `Netulex _ -> failwith "Netulex lexbufs not supported" in
match Lex.scan_number lexbuf with
| `Int n ->
let s = string_of_int n in
......@@ -906,6 +909,12 @@ the character following the right curly brace.
failwith "Unexpected EOF"
]}
Due to the way PXP works internally, the method [mng # current_lexobj
# lexbuf] can return two different kinds of lexical buffers. [`Ocamllex]
means it is a [Lexing.lexbuf] buffer. This type of buffer is used for
all 8 bit encodings, and if the special [pxp-lex-utf8] lexer is used.
The lexer [pxp-ulex-utf8], however, will return a [Netulex]-style buffer.
Finally, we enable to use our escaping functions in the config record:
{[
......
......@@ -178,6 +178,11 @@ let string_of_tok tok =
| DQuote -> "DQuote"
| ERef_att _ -> "ERef_att"
type lexbuf =
[ `Ocamllex of Lexing.lexbuf
| `Netulex of Netulex.Ulexing.lexbuf
]
class type lexer_factory =
object
method encoding : Pxp_core_types.I.rep_encoding
......@@ -218,6 +223,7 @@ object
method lexeme : string
method lexeme_strlen : int
method sub_lexeme : int -> int -> string
method lexbuf : lexbuf
end
......
......@@ -136,6 +136,12 @@ val string_of_lexers : lexers -> string
val string_of_tok : token -> string
type lexbuf =
[ `Ocamllex of Lexing.lexbuf
| `Netulex of Netulex.Ulexing.lexbuf
]
(** The [lexer_factory] creates lexers for a certain character encoding.
*)
class type lexer_factory =
......@@ -223,6 +229,11 @@ object
*
* For some implementations, this function is very ineffecient.
*)
method lexbuf : lexbuf
(** Direct access to the lexbuf. Depending on the lexer, different
lexbuf types can be returned
*)
end
......
......@@ -101,6 +101,8 @@ object(self)
Netconversion.ustring_of_uarray `Enc_${encoding} ~pos ~len ua
)
method lexbuf = `Ocamllex lexbuf
method scan_document =
fun () ->
......
......@@ -125,6 +125,8 @@ object(self)
method sub_lexeme pos len =
Ulexing.utf8_sub_lexeme lexbuf pos len
method lexbuf = `Netulex lexbuf
method scan_document =
fun () ->
......
......@@ -330,6 +330,8 @@ object(self)
let ua = Netconversion.uarray_of_ustring `Enc_${encoding} s in
Netconversion.ustring_of_uarray `Enc_${encoding} ~pos ~len ua
method lexbuf = `Ocamllex lexbuf
method scan_document =
fun () ->
${scan_document}.scan_document
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment