Commit 3d9078ad authored by gerd's avatar gerd

Finished pxp-pp.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@697 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent 00518123
......@@ -7,16 +7,18 @@ with_lex=1
with_wlex=1
with_wlex_compat=1
with_ulex=1
with_pp=1
lexlist="utf8,iso88591,iso88592,iso88593,iso88594,iso88595,iso88596,iso88597,iso88598,iso88599,iso885910,iso885913,iso885914,iso885915,iso885916"
version="1.1.95test1"
version="1.1.95test2"
exec_suffix=""
help_lex="Enable/disable ocamllex-based lexical analyzer for the -lexlist encodings"
help_wlex="Enable/disable wlex-based lexical analyzer for UTF-8"
help_wlex_compat="Enable/disable wlex-style compatibility package for UTF-8 and ISO-8859-1"
help_ulex="Enable/disable ulex-based lexical analyzer for UTF-8"
help_pp="Enable/disable the build of the preprocessor (pxp-pp)"
options="lex wlex wlex_compat ulex"
options="lex wlex wlex_compat ulex pp"
lexlist_options="utf8 usascii iso88591 iso88592 iso88593 iso88594 iso88595 iso88596 iso88597 iso88598 iso88599 iso885910 iso885913 iso885914 iso885915 iso885916 koi8r windows1250 windows1251 windows1252 windows1253 windows1254 windows1255 windows1256 windows1257 windows1258 cp437 cp737 cp775 cp850 cp852 cp855 cp856 cp857 cp860 cp861 cp862 cp863 cp864 cp865 cp866 cp869 cp874 cp1006 macroman"
print_options () {
......@@ -189,6 +191,7 @@ if [ $with_wlex -gt 0 ]; then
echo "not found"
echo "wlex support is disabled"
with_wlex=0
with_wlex_compat=0
fi
fi
......@@ -206,6 +209,12 @@ if [ $with_ulex -gt 0 ]; then
fi
fi
# If ulex not found/disabled, also disable pxp-pp:
if [ $with_ulex -eq 0 ]; then
with_pp=0
fi
######################################################################
# Check Lexing.lexbuf type
......@@ -229,6 +238,25 @@ fi
rm -f tmp.*
######################################################################
# Check type of camlp4 locations
printf "%s" "Checking type of camlp4 location... "
cat <<EOF >tmp.ml
open Stdpp;;
raise_with_loc (0,0) Not_found;;
EOF
if ocamlc -c -I +camlp4 tmp.ml >/dev/null 2>/dev/null; then
echo "old style"
camlp4_loc=""
else
echo "new style"
camlp4_loc="-DOCAML_NEW_LOC"
fi
rm -f tmp.*
######################################################################
# Pregenerated wlex lexers
......@@ -286,7 +314,10 @@ print_options
echo
pkglist="pxp pxp-engine"
# Currently pkglist is constant
if [ $with_pp -gt 0 ]; then
pkglist="$pkglist pxp-pp"
fi
genpkglist=""
# Generated packages
......@@ -405,6 +436,7 @@ ALLGENPKGLIST = $allgenpkglist
EXEC_SUFFIX = $exec_suffix
LEXBUF_307 = $lexbuf_307
LEX_OPT = $lex_opt
CAMLP4_LOC = $camlp4_loc
_EOF_
######################################################################
......
......@@ -85,8 +85,8 @@ The following text is valid XML:
The first element has the expanded name (namespace1,a) while the second element
has the expanded name (namespace2,a); so the elements have different types. As
already pointed out, PXP does not support the expanded names directly (there is
some support for them in elements, but not in attributes). Alternatively, the
already pointed out, PXP does not support the expanded names directly.
Alternatively, the
XML text is transformed while it is being parsed such that the prefixes become
unique. In this example, the transformed text would read:
......@@ -124,6 +124,52 @@ because PXP normalizes any prefixes for namespace1 or namespace2 to the
preferred prefixes "x" and "y".
</p>
<p>Since PXP-1.1.95, the namespace support has been extended. In
addition to prefix normalization, the parser now also stores the
scoping structure of the namespaces (in the namespace_scope
objects). More or less, this means that the parser remembers
which elements have which "xmlns" attributes. There are two
important applications of this feature:</p>
<p>First, it is now possible to look up the namespace URI when
only the original, non-normalized namespace prefix is known.
A number of XML standards, e.g. XSchema, use namespace prefixes
within data nodes. Of course, these prefixes are not normalized
by PXP, but simply remain as they are when the XML text is
parsed. To get the URI of such a prefix p in the context of node
n, just call
<code>
n # namespace_scope # uri_of_display_prefix p
</code>
In PXP terminology, the non-normalized prefixes are now called
"display prefixes".</p>
<p>The other application is that it is now even possible to
retrieve the original "display" prefix of node names, e.g.
<code>
n # display_prefix
</code>
returns it. However, the display prefix is only guessed in the
sense that when there are several prefixes bound to the same
URI, one of the prefixes may be taken. For instance, in
<code><![CDATA[
<x:a xmlns:x="sample" xmlns:y="sample"/>
]]></code>
both "x" and "y" are bound to the same URI "sample", and
the display_prefix method selects now one of the prefixes
at random.</p>
<p>It is now even possible to output the parsed XML text
with original namespace structure: The "display" method
outputs XML text where the namespaces are declared as in the
original XML text.</p>
<p>Regarding the "xmlns" attributes, PXP treats them in a very special
way. It is not only allowed not to declare them in the DTD, such declarations
would be even not applied to the actual "xmlns" attributes. For example,
......
......@@ -58,6 +58,17 @@ the runtime part of wlex, and not the "wlex" command itself.</p>
<p>-with-wlex-compat</p>
<p>Creates a compatibility package pxp-wlex that includes lexers
for UTF8 and ISO-8859-1 (may be required to build old software)</p>
</li>
<li>
<p>-with-ulex</p>
<p>Enables the lexical analyzer that works for UTF-8 as internal encoding, and that is based on Alain Frisch's ulex tool. It
is relatively small, but a bit slower than the ocamllex-based lexers.
ulex will supersede wlex soon.</p>
</li>
<li>
<p>-with-pp</p>
<p>Enables the PXP preprocessor (installed as package pxp-pp).
See the file PREPROCESSOR for details. The preprocessor also requires ulex.</p>
</li>
<li>
<p>-lexlist &lt;list-of-encodings&gt;</p>
......
......@@ -23,7 +23,7 @@ installrel = $$HOME/homepage/ocaml-programming.de/packages/documentation/pxp/ind
.PHONY: all
all: README INSTALL ABOUT-FINDLIB SPEC EXTENSIONS
all: README INSTALL ABOUT-FINDLIB SPEC EXTENSIONS PREPROCESSOR
README: README.xml common.xml config.xml readme.dtd
$(readme) -text README.xml >README
......@@ -40,6 +40,9 @@ SPEC: SPEC.xml common.xml config.xml readme.dtd
EXTENSIONS: EXTENSIONS.xml common.xml config.xml readme.dtd
$(readme) -text EXTENSIONS.xml >EXTENSIONS
PREPROCESSOR: PREPROCESSOR.xml common.xml config.xml readme.dtd
$(readme) -text PREPROCESSOR.xml >PREPROCESSOR
DEV: DEV.xml common.xml config.xml readme.dtd
$(readme) -text DEV.xml >DEV
#$(readme) -html DEV.xml >$(installdev)
......
This diff is collapsed.
Available options for godi.conf:
- GODI_PXP_WITH_WLEX: Whether to build the pxp-wlex
lexer (yes/no)
GODI_PXP_WITH_WLEX
......@@ -3,7 +3,7 @@
GODI_PLIST= yes
.include "../../mk/godi.pkg.mk"
VERSION= 1.1.95test1
VERSION= 1.1.95test2
PKGNAME= godi-pxp-${VERSION}
DISTNAME= trunk
DISTFILES=
......@@ -13,15 +13,25 @@ MAINTAINER= gerd@gerd-stolpmann.de
HOMEPAGE= http://www.ocaml-programing.de/
COMMENT= PXP is an advanced XML parser
GODI_PXP_WITH_WLEX?=yes
DEPENDS+= godi-ocaml>=3.07:../../godi/godi-ocaml
DEPENDS+= godi-ocamlnet>0.97.1:../../godi/godi-ocamlnet
DEPENDS+= godi-wlex>=20021107:../../godi/godi-wlex
DEPENDS+= godi-ulex>=0:../../godi/godi-ulex
BUILD_DEPENDS+= godi-findlib>=1.0:../../godi/godi-findlib
.if ${GODI_PXP_WITH_WLEX} == "yes"
DEPENDS+= godi-wlex>=20021107:../../godi/godi-wlex
.endif
PATH:= ${LOCALBASE}/bin:${PATH}
HAS_CONFIGURE= yes
.if ${GODI_PXP_WITH_WLEX} == "yes"
CONFIGURE_ARGS+= -with-wlex -with-wlex-compat
.else
CONFIGURE_ARGS+= -without-wlex -without-wlex-compat
.endif
# ocamlfind must install into the pkg-lib directory, not into site-lib.
# Use the build time configuration file:
......
......@@ -16,6 +16,8 @@
@findlib pxp-lex-iso885915
@findlib pxp-lex-iso885916
@findlib pxp-lex-utf8
@findlib pxp-wlex
@findlib pxp-wlex-utf8
@optional @findlib pxp-wlex
@optional @findlib pxp-wlex-utf8
@findlib pxp-ulex
@findlib pxp-pp
@deepdir doc/godi-pxp
......@@ -37,6 +37,7 @@ f doc/INSTALL
f doc/README
f doc/DEV
f doc/EXTENSIONS
f doc/PREPROCESSOR
f doc/RELEASE-NOTES
f doc/SPEC
f doc/design.txt
......@@ -256,6 +257,12 @@ f gensrc-pre/pxp-wlex-utf8/pxp_wlex_utf8_01.mll
f gensrc-pre/pxp-wlex-utf8/pxp_wlex_utf8_01.ml.306
f gensrc-pre/pxp-wlex-utf8/pxp_wlex_utf8_01.ml.307
d src/pxp-pp
f src/pxp-pp/Makefile
f src/pxp-pp/META.in
f src/pxp-pp/PPSPEC
f src/pxp-pp/pxp_pp.ml
d rtests
f rtests/Makefile
f rtests/README
......
include ../Makefile.conf
-include ../Makefile.conf
.PHONY: all
all: toploops
......
......@@ -6,11 +6,14 @@ clean:
distclean:
$(MAKE) -C pxp distclean
$(MAKE) -C pxp-engine distclean
$(MAKE) -C pxp-pp distclean
CLEAN:
$(MAKE) -C pxp CLEAN
$(MAKE) -C pxp-engine CLEAN
$(MAKE) -C pxp-pp CLEAN
uninstall:
$(MAKE) -C pxp uninstall
$(MAKE) -C pxp-engine uninstall
$(MAKE) -C pxp-pp uninstall
......@@ -20,7 +20,7 @@ depend:
$(OCAMLDEP) *.ml *.mli >depend
clean:
rm -f $(CLEAN_LIST) pxp_core_parser.ml pxp_lib.ml
rm -f $(CLEAN_LIST) pxp_core_parser.ml pxp_lib.ml pxp_lexing.ml
CLEAN: clean
......
......@@ -1313,6 +1313,13 @@ module Entity = struct
~name ~xid ~resolver dtd
| Entity(make,resolver) ->
make dtd (* resolver ignored *)
let entity_id ent = (ent :> < >)
class fake = object end
let create_entity_id () = new fake
end
......@@ -751,6 +751,14 @@ module Entity : sig
Pxp_entity.entity
(* Creates an external entity that reads from the passed source *)
val entity_id : Pxp_entity.entity -> Pxp_lexer_types.entity_id
(* Returns the abstract entity ID *)
val create_entity_id : unit -> Pxp_lexer_types.entity_id
(* Create a new abstract entity ID. This ID can be used whereever
* an entity_id is expected but no entity is available.
*)
end
;;
......
description = "Preprocessor for PXP"
version = "@VERSION@"
# At runtime, we need at least pxp-engine.
requires = "camlp4,pxp-engine"
# At preprocess time, we need netstring and ulex:
requires(syntax) = "camlp4,netstring,ulex"
# The toploop is the combination of both:
requires(syntax,toploop) = "camlp4,netstring,ulex,pxp-engine"
# Specification of stand-alone preprocessor call:
archive(syntax,preprocessor) = "pxp_pp.cma"
# Specification for the toploop:
archive(syntax,toploop) = "pxp_pp.cma"
pxp_pp.cmo: pxp_pp.ml
ocamlfind ocamlc -c -package netstring,ulex,camlp4.quotations,camlp4.macro -syntax camlp4o pxp_pp.ml
TOP_DIR = ../..
include $(TOP_DIR)/Makefile.rules
PACKAGES = netstring,ulex,camlp4.quotations,camlp4.macro
OCAMLC_OPTIONS += -syntax camlp4o
OCAMLC_OPTIONS += -ppopt "$(CAMLP4_LOC)"
all: pxp_pp.cma
opt:
pxp_pp.cma: pxp_pp.cmo
$(OCAMLC) -a -o pxp_pp.cma pxp_pp.cmo
clean:
rm -f $(CLEAN_LIST)
CLEAN: clean
distclean: clean
rm -f META depend
install:
$(OCAMLFIND) install pxp-pp pxp_pp.cma META
uninstall:
$(OCAMLFIND) remove pxp-pp
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment