Commit f7cb270f authored by gerd's avatar gerd

Invocations of 'do_perm32', 'do_perm48', and 'do_perm64' have

been manually inlined. This speeds the algorithm up from 55 sec per
1MB CBC-encryption to 38 sec.


git-svn-id: https://godirepo.camlcity.org/svn/lib-cryptgps/trunk@4 b101cce4-44db-0310-b718-db4b2d8d2e05
parent 103d6e17
(* $Id: crypt_des.ml,v 1.2 1999/06/17 15:24:24 gerd Exp $
(* $Id: crypt_des.ml,v 1.3 1999/06/17 15:57:02 gerd Exp $
* ----------------------------------------------------------------------
*
*)
......@@ -498,35 +498,84 @@ module Cryptsystem : Cryptsystem_64.T =
let s7 = Lazy.force sbox7 in
let s8 = Lazy.force sbox8 in
let f (k48_0,k48_1) l32_0 l32_1 r32_0 r32_1 =
(* computes new right half *)
let (r48_0, r48_1) = do_perm48 xperm (r32_0,r32_1) in (* sic! *)
let r'48_0 = k48_0 lxor r48_0 in
let r'48_1 = k48_1 lxor r48_1 in
let x0 = s1.( r'48_0 lsr 18 ) in
let x1 = s2.( (r'48_0 lsr 12) land 63 ) in
let x2 = s3.( (r'48_0 lsr 6) land 63 ) in
let x3 = s4.( r'48_0 land 63 ) in
let x4 = s5.( r'48_1 lsr 18 ) in
let x5 = s6.( (r'48_1 lsr 12) land 63 ) in
let x6 = s7.( (r'48_1 lsr 6) land 63 ) in
let x7 = s8.( r'48_1 land 63 ) in
let y0 = x0 lor x1 lor x2 lor x3 in
let y1 = x4 lor x5 lor x6 lor x7 in
let p0,p1 = do_perm32 pboxperm (y0,y1) in
(p0 lxor l32_0, p1 lxor l32_1)
in
let rec do_rounds i blast0 blast1 last0 last1 =
if i < 16 then
let r0,r1 = f k.(i) blast0 blast1 last0 last1 in
do_rounds (i+1) last0 last1 r0 r1
let k48_0, k48_1 = k.(i) in
let l32_0 = blast0 in
let l32_1 = blast1 in
let r32_0 = last0 in
let r32_1 = last1 in
(* --------- manually inlined code ---------- *)
(* OLD: let (r48_0, r48_1) = do_perm48 xperm (r32_0,r32_1) in *)
let (a0,b0) = xperm.( r32_0 lsr 16 ) in
let (a1,b1) = xperm.( 256 + ((r32_0 lsr 8) land 0xff)) in
let (a2,b2) = xperm.( 512 + (r32_0 land 0xff) ) in
let (a3,b3) = xperm.( 768 + ( r32_1 lsr 16 ) ) in
let (a4,b4) = xperm.( 1024 + ((r32_1 lsr 8) land 0xff) ) in
let (a5,b5) = xperm.( 1280 + (r32_1 land 0xff) ) in
let r48_0 = a0 lor a1 lor a2 lor a3 lor a4 lor a5 in
let r48_1 = b0 lor b1 lor b2 lor b3 lor b4 lor b5 in
(* ------------------ end -------------------- *)
let r'48_0 = k48_0 lxor r48_0 in
let r'48_1 = k48_1 lxor r48_1 in
let x0 = s1.( r'48_0 lsr 18 ) in
let x1 = s2.( (r'48_0 lsr 12) land 63 ) in
let x2 = s3.( (r'48_0 lsr 6) land 63 ) in
let x3 = s4.( r'48_0 land 63 ) in
let x4 = s5.( r'48_1 lsr 18 ) in
let x5 = s6.( (r'48_1 lsr 12) land 63 ) in
let x6 = s7.( (r'48_1 lsr 6) land 63 ) in
let x7 = s8.( r'48_1 land 63 ) in
let y0 = x0 lor x1 lor x2 lor x3 in
let y1 = x4 lor x5 lor x6 lor x7 in
(* --------- manually inlined code ---------- *)
(* OLD: let p0,p1 = do_perm32 pboxperm (y0,y1) in *)
let (a0,b0) = pboxperm.( y0 lsr 8 ) in
let (a1,b1) = pboxperm.( 256 + (y0 land 0xff)) in
let (a2,b2) = pboxperm.( 512 + (y1 lsr 8) ) in
let (a3,b3) = pboxperm.( 768 + (y1 land 0xff)) in
let p0 = a0 lor a1 lor a2 lor a3 in
let p1 = b0 lor b1 lor b2 lor b3 in
(* ------------------ end -------------------- *)
do_rounds (i+1) last0 last1 (p0 lxor l32_0) (p1 lxor l32_1)
else
do_perm64 fperm (last0, last1, blast0, blast1)
(* --------- manually inlined code ---------- *)
(* OLD: do_perm64 fperm (last0, last1, blast0, blast1) *)
let (a0,b0,c0,d0) = fperm.( last0 lsr 8 ) in
let (a1,b1,c1,d1) = fperm.( 256 + (last0 land 0xff)) in
let (a2,b2,c2,d2) = fperm.( 512 + (last1 lsr 8) ) in
let (a3,b3,c3,d3) = fperm.( 768 + (last1 land 0xff) ) in
let (a4,b4,c4,d4) = fperm.( 1024 + (blast0 lsr 8) ) in
let (a5,b5,c5,d5) = fperm.( 1280 + (blast0 land 0xff) ) in
let (a6,b6,c6,d6) = fperm.( 1536 + (blast1 lsr 8) ) in
let (a7,b7,c7,d7) = fperm.( 1792 + (blast1 land 0xff) ) in
(a0 lor a1 lor a2 lor a3 lor a4 lor a5 lor a6 lor a7,
b0 lor b1 lor b2 lor b3 lor b4 lor b5 lor b6 lor b7,
c0 lor c1 lor c2 lor c3 lor c4 lor c5 lor c6 lor c7,
d0 lor d1 lor d2 lor d3 lor d4 lor d5 lor d6 lor d7)
(* ------------------ end -------------------- *)
in
let (l00, l01, r00, r01) = do_perm64 iperm x in
(* --------- manually inlined code ---------- *)
(* OLD: let (l00, l01, r00, r01) = do_perm64 iperm x in *)
let (a,b,c,d) = x in
let (a0,b0,c0,d0) = iperm.( a lsr 8 ) in
let (a1,b1,c1,d1) = iperm.( 256 + (a land 0xff)) in
let (a2,b2,c2,d2) = iperm.( 512 + (b lsr 8) ) in
let (a3,b3,c3,d3) = iperm.( 768 + (b land 0xff) ) in
let (a4,b4,c4,d4) = iperm.( 1024 + (c lsr 8) ) in
let (a5,b5,c5,d5) = iperm.( 1280 + (c land 0xff) ) in
let (a6,b6,c6,d6) = iperm.( 1536 + (d lsr 8) ) in
let (a7,b7,c7,d7) = iperm.( 1792 + (d land 0xff) ) in
let l00 = a0 lor a1 lor a2 lor a3 lor a4 lor a5 lor a6 lor a7 in
let l01 = b0 lor b1 lor b2 lor b3 lor b4 lor b5 lor b6 lor b7 in
let r00 = c0 lor c1 lor c2 lor c3 lor c4 lor c5 lor c6 lor c7 in
let r01 = d0 lor d1 lor d2 lor d3 lor d4 lor d5 lor d6 lor d7 in
(* ------------------ end -------------------- *)
do_rounds 0 l00 l01 r00 r01
......@@ -707,6 +756,11 @@ module Cryptmodes = Cryptmodes_64.Make_modes(Cryptsystem)
* history:
*
* $Log: crypt_des.ml,v $
* Revision 1.3 1999/06/17 15:57:02 gerd
* Invocations of 'do_perm32', 'do_perm48', and 'do_perm64' have
* been manually inlined. This speeds the algorithm up from 55 sec per
* 1MB CBC-encryption to 38 sec.
*
* Revision 1.2 1999/06/17 15:24:24 gerd
* Instead of calling 'f' sequently, there is now a loop. This
* prevents the compiler from inlining the 16 invocations of 'f', which
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment