From 626f1ee196fe06514d66771ff0e3f82d7686af25 Mon Sep 17 00:00:00 2001 From: matz Date: Fri, 1 Jul 2005 03:25:46 +0000 Subject: [PATCH] * missing/crypt.c: replaced with 4.4BSD version. * missing/erf.c: ditto. * missing/vsnprintf.c: removed the third provision from the old BSD license. [ruby-core:05177] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8687 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 9 + missing/crypt.c | 1154 ++++++++++++++++++++++++++++++++++--------- missing/erf.c | 461 ++++++++++++++--- missing/vsnprintf.c | 6 +- 4 files changed, 1311 insertions(+), 319 deletions(-) diff --git a/ChangeLog b/ChangeLog index b231cfe1be..d309b05f17 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Fri Jul 1 11:34:08 2005 Yukihiro Matsumoto + + * missing/crypt.c: replaced with 4.4BSD version. + + * missing/erf.c: ditto. + + * missing/vsnprintf.c: removed the third provision from the old + BSD license. [ruby-core:05177] + Fri Jul 1 01:45:21 2005 Nobuyoshi Nakada * enum.c (enum_min, enum_max): must not return Qundef. diff --git a/missing/crypt.c b/missing/crypt.c index 9f9b562c36..ee5af0c67c 100644 --- a/missing/crypt.c +++ b/missing/crypt.c @@ -1,276 +1,962 @@ -/* From Andy Tanenbaum's book "Computer Networks", - rewritten in C -*/ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Tom Truscott. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ -struct block { - unsigned char b_data[64]; -}; +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)crypt.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ -struct ordering { - unsigned char o_data[64]; -}; +#include +#include +#include -static struct block key; +/* + * UNIX password, and DES, encryption. + * By Tom Truscott, trt@rti.rti.org, + * from algorithms by Robert W. Baldwin and James Gillogly. + * + * References: + * "Mathematical Cryptology for Computer Scientists and Mathematicians," + * by Wayne Patterson, 1987, ISBN 0-8476-7438-X. + * + * "Password Security: A Case History," R. Morris and Ken Thompson, + * Communications of the ACM, vol. 22, pp. 594-597, Nov. 1979. + * + * "DES will be Totally Insecure within Ten Years," M.E. Hellman, + * IEEE Spectrum, vol. 16, pp. 32-39, July 1979. + */ -static struct ordering InitialTr = { - 58,50,42,34,26,18,10, 2,60,52,44,36,28,20,12, 4, - 62,54,46,38,30,22,14, 6,64,56,48,40,32,24,16, 8, - 57,49,41,33,25,17, 9, 1,59,51,43,35,27,19,11, 3, - 61,53,45,37,29,21,13, 5,63,55,47,39,31,23,15, 7, -}; +/* ===== Configuration ==================== */ -static struct ordering FinalTr = { - 40, 8,48,16,56,24,64,32,39, 7,47,15,55,23,63,31, - 38, 6,46,14,54,22,62,30,37, 5,45,13,53,21,61,29, - 36, 4,44,12,52,20,60,28,35, 3,43,11,51,19,59,27, - 34, 2,42,10,50,18,58,26,33, 1,41, 9,49,17,57,25, -}; +/* + * define "MUST_ALIGN" if your compiler cannot load/store + * long integers at arbitrary (e.g. odd) memory locations. + * (Either that or never pass unaligned addresses to des_cipher!) + */ +#if !defined(vax) +#define MUST_ALIGN +#endif -static struct ordering swap = { - 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48, - 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64, - 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, - 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, -}; +#ifdef CHAR_BITS +#if CHAR_BITS != 8 + #error C_block structure assumes 8 bit characters +#endif +#endif -static struct ordering KeyTr1 = { - 57,49,41,33,25,17, 9, 1,58,50,42,34,26,18, - 10, 2,59,51,43,35,27,19,11, 3,60,52,44,36, - 63,55,47,39,31,23,15, 7,62,54,46,38,30,22, - 14, 6,61,53,45,37,29,21,13, 5,28,20,12, 4, -}; +/* + * define "LONG_IS_32_BITS" only if sizeof(long)==4. + * This avoids use of bit fields (your compiler may be sloppy with them). + */ +#if !defined(cray) +#define LONG_IS_32_BITS +#endif -static struct ordering KeyTr2 = { - 14,17,11,24, 1, 5, 3,28,15, 6,21,10, - 23,19,12, 4,26, 8,16, 7,27,20,13, 2, - 41,52,31,37,47,55,30,40,51,45,33,48, - 44,49,39,56,34,53,46,42,50,36,29,32, -}; +/* + * define "B64" to be the declaration for a 64 bit integer. + * XXX this feature is currently unused, see "endian" comment below. + */ +#if defined(cray) +#define B64 long +#endif +#if defined(convex) +#define B64 long long +#endif -static struct ordering etr = { - 32, 1, 2, 3, 4, 5, 4, 5, 6, 7, 8, 9, - 8, 9,10,11,12,13,12,13,14,15,16,17, - 16,17,18,19,20,21,20,21,22,23,24,25, - 24,25,26,27,28,29,28,29,30,31,32, 1, -}; +/* + * define "LARGEDATA" to get faster permutations, by using about 72 kilobytes + * of lookup tables. This speeds up des_setkey() and des_cipher(), but has + * little effect on crypt(). + */ +#if defined(notdef) +#define LARGEDATA +#endif -static struct ordering ptr = { - 16, 7,20,21,29,12,28,17, 1,15,23,26, 5,18,31,10, - 2, 8,24,14,32,27, 3, 9,19,13,30, 6,22,11, 4,25, -}; +/* compile with "-DSTATIC=int" when profiling */ +#ifndef STATIC +#define STATIC static +#endif +STATIC init_des(), init_perm(), permute(); +#ifdef DEBUG +STATIC prtab(); +#endif -static unsigned char s_boxes[8][64] = { -{ 14, 4,13, 1, 2,15,11, 8, 3,10, 6,12, 5, 9, 0, 7, - 0,15, 7, 4,14, 2,13, 1,10, 6,12,11, 9, 5, 3, 8, - 4, 1,14, 8,13, 6, 2,11,15,12, 9, 7, 3,10, 5, 0, - 15,12, 8, 2, 4, 9, 1, 7, 5,11, 3,14,10, 0, 6,13, -}, +/* ==================================== */ -{ 15, 1, 8,14, 6,11, 3, 4, 9, 7, 2,13,12, 0, 5,10, - 3,13, 4, 7,15, 2, 8,14,12, 0, 1,10, 6, 9,11, 5, - 0,14, 7,11,10, 4,13, 1, 5, 8,12, 6, 9, 3, 2,15, - 13, 8,10, 1, 3,15, 4, 2,11, 6, 7,12, 0, 5,14, 9, -}, +/* + * Cipher-block representation (Bob Baldwin): + * + * DES operates on groups of 64 bits, numbered 1..64 (sigh). One + * representation is to store one bit per byte in an array of bytes. Bit N of + * the NBS spec is stored as the LSB of the Nth byte (index N-1) in the array. + * Another representation stores the 64 bits in 8 bytes, with bits 1..8 in the + * first byte, 9..16 in the second, and so on. The DES spec apparently has + * bit 1 in the MSB of the first byte, but that is particularly noxious so we + * bit-reverse each byte so that bit 1 is the LSB of the first byte, bit 8 is + * the MSB of the first byte. Specifically, the 64-bit input data and key are + * converted to LSB format, and the output 64-bit block is converted back into + * MSB format. + * + * DES operates internally on groups of 32 bits which are expanded to 48 bits + * by permutation E and shrunk back to 32 bits by the S boxes. To speed up + * the computation, the expansion is applied only once, the expanded + * representation is maintained during the encryption, and a compression + * permutation is applied only at the end. To speed up the S-box lookups, + * the 48 bits are maintained as eight 6 bit groups, one per byte, which + * directly feed the eight S-boxes. Within each byte, the 6 bits are the + * most significant ones. The low two bits of each byte are zero. (Thus, + * bit 1 of the 48 bit E expansion is stored as the "4"-valued bit of the + * first byte in the eight byte representation, bit 2 of the 48 bit value is + * the "8"-valued bit, and so on.) In fact, a combined "SPE"-box lookup is + * used, in which the output is the 64 bit result of an S-box lookup which + * has been permuted by P and expanded by E, and is ready for use in the next + * iteration. Two 32-bit wide tables, SPE[0] and SPE[1], are used for this + * lookup. Since each byte in the 48 bit path is a multiple of four, indexed + * lookup of SPE[0] and SPE[1] is simple and fast. The key schedule and + * "salt" are also converted to this 8*(6+2) format. The SPE table size is + * 8*64*8 = 4K bytes. + * + * To speed up bit-parallel operations (such as XOR), the 8 byte + * representation is "union"ed with 32 bit values "i0" and "i1", and, on + * machines which support it, a 64 bit value "b64". This data structure, + * "C_block", has two problems. First, alignment restrictions must be + * honored. Second, the byte-order (e.g. little-endian or big-endian) of + * the architecture becomes visible. + * + * The byte-order problem is unfortunate, since on the one hand it is good + * to have a machine-independent C_block representation (bits 1..8 in the + * first byte, etc.), and on the other hand it is good for the LSB of the + * first byte to be the LSB of i0. We cannot have both these things, so we + * currently use the "little-endian" representation and avoid any multi-byte + * operations that depend on byte order. This largely precludes use of the + * 64-bit datatype since the relative order of i0 and i1 are unknown. It + * also inhibits grouping the SPE table to look up 12 bits at a time. (The + * 12 bits can be stored in a 16-bit field with 3 low-order zeroes and 1 + * high-order zero, providing fast indexing into a 64-bit wide SPE.) On the + * other hand, 64-bit datatypes are currently rare, and a 12-bit SPE lookup + * requires a 128 kilobyte table, so perhaps this is not a big loss. + * + * Permutation representation (Jim Gillogly): + * + * A transformation is defined by its effect on each of the 8 bytes of the + * 64-bit input. For each byte we give a 64-bit output that has the bits in + * the input distributed appropriately. The transformation is then the OR + * of the 8 sets of 64-bits. This uses 8*256*8 = 16K bytes of storage for + * each transformation. Unless LARGEDATA is defined, however, a more compact + * table is used which looks up 16 4-bit "chunks" rather than 8 8-bit chunks. + * The smaller table uses 16*16*8 = 2K bytes for each transformation. This + * is slower but tolerable, particularly for password encryption in which + * the SPE transformation is iterated many times. The small tables total 9K + * bytes, the large tables total 72K bytes. + * + * The transformations used are: + * IE3264: MSB->LSB conversion, initial permutation, and expansion. + * This is done by collecting the 32 even-numbered bits and applying + * a 32->64 bit transformation, and then collecting the 32 odd-numbered + * bits and applying the same transformation. Since there are only + * 32 input bits, the IE3264 transformation table is half the size of + * the usual table. + * CF6464: Compression, final permutation, and LSB->MSB conversion. + * This is done by two trivial 48->32 bit compressions to obtain + * a 64-bit block (the bit numbering is given in the "CIFP" table) + * followed by a 64->64 bit "cleanup" transformation. (It would + * be possible to group the bits in the 64-bit block so that 2 + * identical 32->32 bit transformations could be used instead, + * saving a factor of 4 in space and possibly 2 in time, but + * byte-ordering and other complications rear their ugly head. + * Similar opportunities/problems arise in the key schedule + * transforms.) + * PC1ROT: MSB->LSB, PC1 permutation, rotate, and PC2 permutation. + * This admittedly baroque 64->64 bit transformation is used to + * produce the first code (in 8*(6+2) format) of the key schedule. + * PC2ROT[0]: Inverse PC2 permutation, rotate, and PC2 permutation. + * It would be possible to define 15 more transformations, each + * with a different rotation, to generate the entire key schedule. + * To save space, however, we instead permute each code into the + * next by using a transformation that "undoes" the PC2 permutation, + * rotates the code, and then applies PC2. Unfortunately, PC2 + * transforms 56 bits into 48 bits, dropping 8 bits, so PC2 is not + * invertible. We get around that problem by using a modified PC2 + * which retains the 8 otherwise-lost bits in the unused low-order + * bits of each byte. The low-order bits are cleared when the + * codes are stored into the key schedule. + * PC2ROT[1]: Same as PC2ROT[0], but with two rotations. + * This is faster than applying PC2ROT[0] twice, + * + * The Bell Labs "salt" (Bob Baldwin): + * + * The salting is a simple permutation applied to the 48-bit result of E. + * Specifically, if bit i (1 <= i <= 24) of the salt is set then bits i and + * i+24 of the result are swapped. The salt is thus a 24 bit number, with + * 16777216 possible values. (The original salt was 12 bits and could not + * swap bits 13..24 with 36..48.) + * + * It is possible, but ugly, to warp the SPE table to account for the salt + * permutation. Fortunately, the conditional bit swapping requires only + * about four machine instructions and can be done on-the-fly with about an + * 8% performance penalty. + */ -{ 10, 0, 9,14, 6, 3,15, 5, 1,13,12, 7,11, 4, 2, 8, - 13, 7, 0, 9, 3, 4, 6,10, 2, 8, 5,14,12,11,15, 1, - 13, 6, 4, 9, 8,15, 3, 0,11, 1, 2,12, 5,10,14, 7, - 1,10,13, 0, 6, 9, 8, 7, 4,15,14, 3,11, 5, 2,12, -}, +typedef union { + unsigned char b[8]; + struct { +#if defined(LONG_IS_32_BITS) + /* long is often faster than a 32-bit bit field */ + long i0; + long i1; +#else + long i0: 32; + long i1: 32; +#endif + } b32; +#if defined(B64) + B64 b64; +#endif +} C_block; -{ 7,13,14, 3, 0, 6, 9,10, 1, 2, 8, 5,11,12, 4,15, - 13, 8,11, 5, 6,15, 0, 3, 4, 7, 2,12, 1,10,14, 9, - 10, 6, 9, 0,12,11, 7,13,15, 1, 3,14, 5, 2, 8, 4, - 3,15, 0, 6,10, 1,13, 8, 9, 4, 5,11,12, 7, 2,14, -}, +/* + * Convert twenty-four-bit long in host-order + * to six bits (and 2 low-order zeroes) per char little-endian format. + */ +#define TO_SIX_BIT(rslt, src) { \ + C_block cvt; \ + cvt.b[0] = src; src >>= 6; \ + cvt.b[1] = src; src >>= 6; \ + cvt.b[2] = src; src >>= 6; \ + cvt.b[3] = src; \ + rslt = (cvt.b32.i0 & 0x3f3f3f3fL) << 2; \ + } -{ 2,12, 4, 1, 7,10,11, 6, 8, 5, 3,15,13, 0,14, 9, - 14,11, 2,12, 4, 7,13, 1, 5, 0,15,10, 3, 9, 8, 6, - 4, 2, 1,11,10,13, 7, 8,15, 9,12, 5, 6, 3, 0,14, - 11, 8,12, 7, 1,14, 2,13, 6,15, 0, 9,10, 4, 5, 3, -}, +/* + * These macros may someday permit efficient use of 64-bit integers. + */ +#define ZERO(d,d0,d1) d0 = 0, d1 = 0 +#define LOAD(d,d0,d1,bl) d0 = (bl).b32.i0, d1 = (bl).b32.i1 +#define LOADREG(d,d0,d1,s,s0,s1) d0 = s0, d1 = s1 +#define OR(d,d0,d1,bl) d0 |= (bl).b32.i0, d1 |= (bl).b32.i1 +#define STORE(s,s0,s1,bl) (bl).b32.i0 = s0, (bl).b32.i1 = s1 +#define DCL_BLOCK(d,d0,d1) long d0, d1 -{ 12, 1,10,15, 9, 2, 6, 8, 0,13, 3, 4,14, 7, 5,11, - 10,15, 4, 2, 7,12, 9, 5, 6, 1,13,14, 0,11, 3, 8, - 9,14,15, 5, 2, 8,12, 3, 7, 0, 4,10, 1,13,11, 6, - 4, 3, 2,12, 9, 5,15,10,11,14, 1, 7, 6, 0, 8,13, -}, +#if defined(LARGEDATA) + /* Waste memory like crazy. Also, do permutations in line */ +#define LGCHUNKBITS 3 +#define CHUNKBITS (1<>4]; OR(D,D0,D1,*tp); p += (1< 0); + STORE(D,D0,D1,*out); +} +#endif /* LARGEDATA */ - while (n-- > 0) { - data->b_data[n] = x.b_data[t->o_data[n] - 1]; - } + +/* ===== (mostly) Standard DES Tables ==================== */ + +static unsigned char IP[] = { /* initial permutation */ + 58, 50, 42, 34, 26, 18, 10, 2, + 60, 52, 44, 36, 28, 20, 12, 4, + 62, 54, 46, 38, 30, 22, 14, 6, + 64, 56, 48, 40, 32, 24, 16, 8, + 57, 49, 41, 33, 25, 17, 9, 1, + 59, 51, 43, 35, 27, 19, 11, 3, + 61, 53, 45, 37, 29, 21, 13, 5, + 63, 55, 47, 39, 31, 23, 15, 7, +}; + +/* The final permutation is the inverse of IP - no table is necessary */ + +static unsigned char ExpandTr[] = { /* expansion operation */ + 32, 1, 2, 3, 4, 5, + 4, 5, 6, 7, 8, 9, + 8, 9, 10, 11, 12, 13, + 12, 13, 14, 15, 16, 17, + 16, 17, 18, 19, 20, 21, + 20, 21, 22, 23, 24, 25, + 24, 25, 26, 27, 28, 29, + 28, 29, 30, 31, 32, 1, +}; + +static unsigned char PC1[] = { /* permuted choice table 1 */ + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4, +}; + +static unsigned char Rotates[] = { /* PC1 rotation schedule */ + 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, +}; + +/* note: each "row" of PC2 is left-padded with bits that make it invertible */ +static unsigned char PC2[] = { /* permuted choice table 2 */ + 9, 18, 14, 17, 11, 24, 1, 5, + 22, 25, 3, 28, 15, 6, 21, 10, + 35, 38, 23, 19, 12, 4, 26, 8, + 43, 54, 16, 7, 27, 20, 13, 2, + + 0, 0, 41, 52, 31, 37, 47, 55, + 0, 0, 30, 40, 51, 45, 33, 48, + 0, 0, 44, 49, 39, 56, 34, 53, + 0, 0, 46, 42, 50, 36, 29, 32, +}; + +static unsigned char S[8][64] = { /* 48->32 bit substitution tables */ + /* S[1] */ + 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, + 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8, + 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0, + 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13, + /* S[2] */ + 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, + 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5, + 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15, + 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9, + /* S[3] */ + 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, + 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1, + 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7, + 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12, + /* S[4] */ + 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, + 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9, + 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4, + 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14, + /* S[5] */ + 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, + 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6, + 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14, + 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3, + /* S[6] */ + 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, + 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8, + 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6, + 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13, + /* S[7] */ + 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, + 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6, + 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2, + 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12, + /* S[8] */ + 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, + 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2, + 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8, + 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11, +}; + +static unsigned char P32Tr[] = { /* 32-bit permutation function */ + 16, 7, 20, 21, + 29, 12, 28, 17, + 1, 15, 23, 26, + 5, 18, 31, 10, + 2, 8, 24, 14, + 32, 27, 3, 9, + 19, 13, 30, 6, + 22, 11, 4, 25, +}; + +static unsigned char CIFP[] = { /* compressed/interleaved permutation */ + 1, 2, 3, 4, 17, 18, 19, 20, + 5, 6, 7, 8, 21, 22, 23, 24, + 9, 10, 11, 12, 25, 26, 27, 28, + 13, 14, 15, 16, 29, 30, 31, 32, + + 33, 34, 35, 36, 49, 50, 51, 52, + 37, 38, 39, 40, 53, 54, 55, 56, + 41, 42, 43, 44, 57, 58, 59, 60, + 45, 46, 47, 48, 61, 62, 63, 64, +}; + +static unsigned char itoa64[] = /* 0..63 => ascii-64 */ + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + +/* ===== Tables that are initialized at run time ==================== */ + + +static unsigned char a64toi[128]; /* ascii-64 => 0..63 */ + +/* Initial key schedule permutation */ +static C_block PC1ROT[64/CHUNKBITS][1< final permutation table */ +static C_block CF6464[64/CHUNKBITS][1<= 0; ) { + if ((t = (unsigned char)setting[i]) == '\0') + t = '.'; + encp[i] = t; + num_iter = (num_iter<<6) | a64toi[t]; + } + setting += 4; + encp += 4; + salt_size = 4; + break; + default: + num_iter = 25; + salt_size = 2; + } + + salt = 0; + for (i = salt_size; --i >= 0; ) { + if ((t = (unsigned char)setting[i]) == '\0') + t = '.'; + encp[i] = t; + salt = (salt<<6) | a64toi[t]; + } + encp += salt_size; + if (des_cipher((char *)&constdatablock, (char *)&rsltblock, + salt, num_iter)) + return (NULL); + + /* + * Encode the 64 cipher bits as 11 ascii characters. + */ + i = ((long)((rsltblock.b[0]<<8) | rsltblock.b[1])<<8) | rsltblock.b[2]; + encp[3] = itoa64[i&0x3f]; i >>= 6; + encp[2] = itoa64[i&0x3f]; i >>= 6; + encp[1] = itoa64[i&0x3f]; i >>= 6; + encp[0] = itoa64[i]; encp += 4; + i = ((long)((rsltblock.b[3]<<8) | rsltblock.b[4])<<8) | rsltblock.b[5]; + encp[3] = itoa64[i&0x3f]; i >>= 6; + encp[2] = itoa64[i&0x3f]; i >>= 6; + encp[1] = itoa64[i&0x3f]; i >>= 6; + encp[0] = itoa64[i]; encp += 4; + i = ((long)((rsltblock.b[6])<<8) | rsltblock.b[7])<<2; + encp[2] = itoa64[i&0x3f]; i >>= 6; + encp[1] = itoa64[i&0x3f]; i >>= 6; + encp[0] = itoa64[i]; + + encp[3] = 0; + + return (cryptresult); } -static void rotate(struct block *key) -{ - register unsigned char *p = key->b_data; - register unsigned char *ep = &(key->b_data[55]); - int data0 = key->b_data[0], data28 = key->b_data[28]; - while (p++ < ep) *(p-1) = *p; - key->b_data[27] = (char) data0; - key->b_data[55] = (char) data28; +/* + * The Key Schedule, filled in by des_setkey() or setkey(). + */ +#define KS_SIZE 16 +static C_block KS[KS_SIZE]; + +/* + * Set up the key schedule from the key. + */ +des_setkey(key) + register const char *key; +{ + register DCL_BLOCK(K, K0, K1); + register C_block *ptabp; + register int i; + static int des_ready = 0; + + if (!des_ready) { + init_des(); + des_ready = 1; + } + + PERM6464(K,K0,K1,(unsigned char *)key,(C_block *)PC1ROT); + key = (char *)&KS[0]; + STORE(K&~0x03030303L, K0&~0x03030303L, K1, *(C_block *)key); + for (i = 1; i < 16; i++) { + key += sizeof(C_block); + STORE(K,K0,K1,*(C_block *)key); + ptabp = (C_block *)PC2ROT[Rotates[i]-1]; + PERM6464(K,K0,K1,(unsigned char *)key,ptabp); + STORE(K&~0x03030303L, K0&~0x03030303L, K1, *(C_block *)key); + } + return (0); } -static struct ordering *EP = &etr; - -static void f(int i, struct block *key, struct block *a, struct block *x) +/* + * Encrypt (or decrypt if num_iter < 0) the 8 chars at "in" with abs(num_iter) + * iterations of DES, using the the given 24-bit salt and the pre-computed key + * schedule, and store the resulting 8 chars at "out" (in == out is permitted). + * + * NOTE: the performance of this routine is critically dependent on your + * compiler and machine architecture. + */ +des_cipher(in, out, salt, num_iter) + const char *in; + char *out; + long salt; + int num_iter; { - struct block e, ikey, y; - int k; - register unsigned char *p, *q, *r; + /* variables that we want in registers, most important first */ +#if defined(pdp11) + register int j; +#endif + register long L0, L1, R0, R1, k; + register C_block *kp; + register int ks_inc, loop_count; + C_block B; - e = *a; - transpose(&e, EP, 48); - for (k = rots[i]; k; k--) rotate(key); - ikey = *key; - transpose(&ikey, &KeyTr2, 48); - p = &(y.b_data[48]); - q = &(e.b_data[48]); - r = &(ikey.b_data[48]); - while (p > y.b_data) { - *--p = *--q ^ *--r; - } - q = x->b_data; - for (k = 0; k < 8; k++) { - register int xb, r; + L0 = salt; + TO_SIX_BIT(salt, L0); /* convert to 4*(6+2) format */ - r = *p++ << 5; - r += *p++ << 3; - r += *p++ << 2; - r += *p++ << 1; - r += *p++; - r += *p++ << 4; +#if defined(vax) || defined(pdp11) + salt = ~salt; /* "x &~ y" is faster than "x & y". */ +#define SALT (~salt) +#else +#define SALT salt +#endif - xb = s_boxes[k][r]; +#if defined(MUST_ALIGN) + B.b[0] = in[0]; B.b[1] = in[1]; B.b[2] = in[2]; B.b[3] = in[3]; + B.b[4] = in[4]; B.b[5] = in[5]; B.b[6] = in[6]; B.b[7] = in[7]; + LOAD(L,L0,L1,B); +#else + LOAD(L,L0,L1,*(C_block *)in); +#endif + LOADREG(R,R0,R1,L,L0,L1); + L0 &= 0x55555555L; + L1 &= 0x55555555L; + L0 = (L0 << 1) | L1; /* L0 is the even-numbered input bits */ + R0 &= 0xaaaaaaaaL; + R1 = (R1 >> 1) & 0x55555555L; + L1 = R0 | R1; /* L1 is the odd-numbered input bits */ + STORE(L,L0,L1,B); + PERM3264(L,L0,L1,B.b, (C_block *)IE3264); /* even bits */ + PERM3264(R,R0,R1,B.b+4,(C_block *)IE3264); /* odd bits */ - *q++ = (char) (xb >> 3) & 1; - *q++ = (char) (xb>>2) & 1; - *q++ = (char) (xb>>1) & 1; - *q++ = (char) (xb & 1); - } - transpose(x, &ptr, 32); + if (num_iter >= 0) + { /* encryption */ + kp = &KS[0]; + ks_inc = sizeof(*kp); + } + else + { /* decryption */ + num_iter = -num_iter; + kp = &KS[KS_SIZE-1]; + ks_inc = -sizeof(*kp); + } + + while (--num_iter >= 0) { + loop_count = 8; + do { + +#define SPTAB(t, i) (*(long *)((unsigned char *)t + i*(sizeof(long)/4))) +#if defined(gould) + /* use this if B.b[i] is evaluated just once ... */ +#define DOXOR(x,y,i) x^=SPTAB(SPE[0][i],B.b[i]); y^=SPTAB(SPE[1][i],B.b[i]); +#else +#if defined(pdp11) + /* use this if your "long" int indexing is slow */ +#define DOXOR(x,y,i) j=B.b[i]; x^=SPTAB(SPE[0][i],j); y^=SPTAB(SPE[1][i],j); +#else + /* use this if "k" is allocated to a register ... */ +#define DOXOR(x,y,i) k=B.b[i]; x^=SPTAB(SPE[0][i],k); y^=SPTAB(SPE[1][i],k); +#endif +#endif + +#define CRUNCH(p0, p1, q0, q1) \ + k = (q0 ^ q1) & SALT; \ + B.b32.i0 = k ^ q0 ^ kp->b32.i0; \ + B.b32.i1 = k ^ q1 ^ kp->b32.i1; \ + kp = (C_block *)((char *)kp+ks_inc); \ + \ + DOXOR(p0, p1, 0); \ + DOXOR(p0, p1, 1); \ + DOXOR(p0, p1, 2); \ + DOXOR(p0, p1, 3); \ + DOXOR(p0, p1, 4); \ + DOXOR(p0, p1, 5); \ + DOXOR(p0, p1, 6); \ + DOXOR(p0, p1, 7); + + CRUNCH(L0, L1, R0, R1); + CRUNCH(R0, R1, L0, L1); + } while (--loop_count != 0); + kp = (C_block *)((char *)kp-(ks_inc*KS_SIZE)); + + + /* swap L and R */ + L0 ^= R0; L1 ^= R1; + R0 ^= L0; R1 ^= L1; + L0 ^= R0; L1 ^= R1; + } + + /* store the encrypted (or decrypted) result */ + L0 = ((L0 >> 3) & 0x0f0f0f0fL) | ((L1 << 1) & 0xf0f0f0f0L); + L1 = ((R0 >> 3) & 0x0f0f0f0fL) | ((R1 << 1) & 0xf0f0f0f0L); + STORE(L,L0,L1,B); + PERM6464(L,L0,L1,B.b, (C_block *)CF6464); +#if defined(MUST_ALIGN) + STORE(L,L0,L1,B); + out[0] = B.b[0]; out[1] = B.b[1]; out[2] = B.b[2]; out[3] = B.b[3]; + out[4] = B.b[4]; out[5] = B.b[5]; out[6] = B.b[6]; out[7] = B.b[7]; +#else + STORE(L,L0,L1,*(C_block *)out); +#endif + return (0); } -void definekey(char *k) -{ - key = *((struct block *) k); - transpose(&key, &KeyTr1, 56); +/* + * Initialize various tables. This need only be done once. It could even be + * done at compile time, if the compiler were capable of that sort of thing. + */ +STATIC +init_des() +{ + register int i, j; + register long k; + register int tableno; + static unsigned char perm[64], tmp32[32]; /* "static" for speed */ + + /* + * table that converts chars "./0-9A-Za-z"to integers 0-63. + */ + for (i = 0; i < 64; i++) + a64toi[itoa64[i]] = i; + + /* + * PC1ROT - bit reverse, then PC1, then Rotate, then PC2. + */ + for (i = 0; i < 64; i++) + perm[i] = 0; + for (i = 0; i < 64; i++) { + if ((k = PC2[i]) == 0) + continue; + k += Rotates[0]-1; + if ((k%28) < Rotates[0]) k -= 28; + k = PC1[k]; + if (k > 0) { + k--; + k = (k|07) - (k&07); + k++; + } + perm[i] = k; + } +#ifdef DEBUG + prtab("pc1tab", perm, 8); +#endif + init_perm(PC1ROT, perm, 8, 8); + + /* + * PC2ROT - PC2 inverse, then Rotate (once or twice), then PC2. + */ + for (j = 0; j < 2; j++) { + unsigned char pc2inv[64]; + for (i = 0; i < 64; i++) + perm[i] = pc2inv[i] = 0; + for (i = 0; i < 64; i++) { + if ((k = PC2[i]) == 0) + continue; + pc2inv[k-1] = i+1; + } + for (i = 0; i < 64; i++) { + if ((k = PC2[i]) == 0) + continue; + k += j; + if ((k%28) <= j) k -= 28; + perm[i] = pc2inv[k]; + } +#ifdef DEBUG + prtab("pc2tab", perm, 8); +#endif + init_perm(PC2ROT[j], perm, 8, 8); + } + + /* + * Bit reverse, then initial permutation, then expansion. + */ + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + k = (j < 2)? 0: IP[ExpandTr[i*6+j-2]-1]; + if (k > 32) + k -= 32; + else if (k > 0) + k--; + if (k > 0) { + k--; + k = (k|07) - (k&07); + k++; + } + perm[i*8+j] = k; + } + } +#ifdef DEBUG + prtab("ietab", perm, 8); +#endif + init_perm(IE3264, perm, 4, 8); + + /* + * Compression, then final permutation, then bit reverse. + */ + for (i = 0; i < 64; i++) { + k = IP[CIFP[i]-1]; + if (k > 0) { + k--; + k = (k|07) - (k&07); + k++; + } + perm[k-1] = i+1; + } +#ifdef DEBUG + prtab("cftab", perm, 8); +#endif + init_perm(CF6464, perm, 8, 8); + + /* + * SPE table + */ + for (i = 0; i < 48; i++) + perm[i] = P32Tr[ExpandTr[i]-1]; + for (tableno = 0; tableno < 8; tableno++) { + for (j = 0; j < 64; j++) { + k = (((j >> 0) &01) << 5)| + (((j >> 1) &01) << 3)| + (((j >> 2) &01) << 2)| + (((j >> 3) &01) << 1)| + (((j >> 4) &01) << 0)| + (((j >> 5) &01) << 4); + k = S[tableno][k]; + k = (((k >> 3)&01) << 0)| + (((k >> 2)&01) << 1)| + (((k >> 1)&01) << 2)| + (((k >> 0)&01) << 3); + for (i = 0; i < 32; i++) + tmp32[i] = 0; + for (i = 0; i < 4; i++) + tmp32[4 * tableno + i] = (k >> i) & 01; + k = 0; + for (i = 24; --i >= 0; ) + k = (k<<1) | tmp32[perm[i]-1]; + TO_SIX_BIT(SPE[0][tableno][j], k); + k = 0; + for (i = 24; --i >= 0; ) + k = (k<<1) | tmp32[perm[i+24]-1]; + TO_SIX_BIT(SPE[1][tableno][j], k); + } + } } -void encrypt(char *blck, int edflag) +/* + * Initialize "perm" to represent transformation "p", which rearranges + * (perhaps with expansion and/or contraction) one packed array of bits + * (of size "chars_in" characters) into another array (of size "chars_out" + * characters). + * + * "perm" must be all-zeroes on entry to this routine. + */ +STATIC +init_perm(perm, p, chars_in, chars_out) + C_block perm[64/CHUNKBITS][1<= 0; i--) { - int j = edflag ? i : 15 - i; - register int k; - struct block b, x; - - b = *p; - for (k = 31; k >= 0; k--) { - p->b_data[k] = b.b_data[k + 32]; - } - f(j, &key, p, &x); - for (k = 31; k >= 0; k--) { - p->b_data[k+32] = b.b_data[k] ^ x.b_data[k]; - } - } - transpose(p, &swap, 64); - transpose(p, &FinalTr, 64); + for (k = 0; k < chars_out*8; k++) { /* each output bit position */ + l = p[k] - 1; /* where this bit comes from */ + if (l < 0) + continue; /* output bit is always 0 */ + i = l>>LGCHUNKBITS; /* which chunk this bit comes from */ + l = 1<<(l&(CHUNKBITS-1)); /* mask for this bit */ + for (j = 0; j < (1<>3] |= 1<<(k&07); + } + } } -char *crypt(char *pw, char *salt) +/* + * "setkey" routine (for backwards compatibility) + */ +setkey(key) + register const char *key; { + register int i, j, k; + C_block keyblock; - char pwb[66]; - static char result[16]; - register char *p = pwb; - struct ordering new_etr; - register int i; - - while (*pw && p < &pwb[64]) { - register int j = 7; - - while (j--) { - *p++ = (*pw >> j) & 01; - } - pw++; - *p++ = 0; - } - while (p < &pwb[64]) *p++ = 0; - - definekey(p = pwb); - - while (p < &pwb[66]) *p++ = 0; - - new_etr = etr; - EP = &new_etr; - for (i = 0; i < 2; i++) { - register char c = *salt++; - register int j; - - result[i] = c; - if ( c > 'Z') c -= 6 + 7 + '.'; /* c was a lower case letter */ - else if ( c > '9') c -= 7 + '.';/* c was upper case letter */ - else c -= '.'; /* c was digit, '.' or '/'. */ - /* now, 0 <= c <= 63 */ - for (j = 0; j < 6; j++) { - if ((c >> j) & 01) { - int t = 6*i + j; - int temp = new_etr.o_data[t]; - new_etr.o_data[t] = new_etr.o_data[t+24]; - new_etr.o_data[t+24] = (char) temp; - } - } - } - - if (result[1] == 0) result[1] = result[0]; - - for (i = 0; i < 25; i++) encrypt(pwb,0); - EP = &etr; - - p = pwb; - pw = result+2; - while (p < &pwb[66]) { - register int c = 0; - register int j = 6; - - while (j--) { - c <<= 1; - c |= *p++; - } - c += '.'; /* becomes >= '.' */ - if (c > '9') c += 7; /* not in [./0-9], becomes upper */ - if (c > 'Z') c += 6; /* not in [A-Z], becomes lower */ - *pw++ = (char) c; - } - *pw = 0; - return result; + for (i = 0; i < 8; i++) { + k = 0; + for (j = 0; j < 8; j++) { + k <<= 1; + k |= (unsigned char)*key++; + } + keyblock.b[i] = k; + } + return (des_setkey((char *)keyblock.b)); } + +/* + * "encrypt" routine (for backwards compatibility) + */ +encrypt(block, flag) + register char *block; + int flag; +{ + register int i, j, k; + C_block cblock; + + for (i = 0; i < 8; i++) { + k = 0; + for (j = 0; j < 8; j++) { + k <<= 1; + k |= (unsigned char)*block++; + } + cblock.b[i] = k; + } + if (des_cipher((char *)&cblock, (char *)&cblock, 0L, (flag ? -1: 1))) + return (1); + for (i = 7; i >= 0; i--) { + k = cblock.b[i]; + for (j = 7; j >= 0; j--) { + *--block = k&01; + k >>= 1; + } + } + return (0); +} + +#ifdef DEBUG +STATIC +prtab(s, t, num_rows) + char *s; + unsigned char *t; + int num_rows; +{ + register int i, j; + + (void)printf("%s:\n", s); + for (i = 0; i < num_rows; i++) { + for (j = 0; j < 8; j++) { + (void)printf("%3d", t[i*8+j]); + } + (void)printf("\n"); + } + (void)printf("\n"); +} +#endif diff --git a/missing/erf.c b/missing/erf.c index e30a90051e..541972667a 100644 --- a/missing/erf.c +++ b/missing/erf.c @@ -1,91 +1,392 @@ -/* erf.c -reference - Haruhiko Okumura: C-gengo niyoru saishin algorithm jiten - (New Algorithm handbook in C language) (Gijyutsu hyouron - sha, Tokyo, 1991) p.227 [in Japanese] */ -#include -#include +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ -#ifdef _WIN32 -# include -# if !defined __MINGW32__ || defined __NO_ISOCEXT -# ifndef isnan -# define isnan(x) _isnan(x) -# endif -# ifndef isinf -# define isinf(x) (!_finite(x) && !_isnan(x)) -# endif -# ifndef finite -# define finite(x) _finite(x) -# endif -# endif +#ifndef lint +static char sccsid[] = "@(#)erf.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +/* Modified Nov 30, 1992 P. McILROY: + * Replaced expansions for x >= 1.25 (error 1.7ulp vs ~6ulp) + * Replaced even+odd with direct calculation for x < .84375, + * to avoid destructive cancellation. + * + * Performance of erfc(x): + * In 300000 trials in the range [.83, .84375] the + * maximum observed error was 3.6ulp. + * + * In [.84735,1.25] the maximum observed error was <2.5ulp in + * 100000 runs in the range [1.2, 1.25]. + * + * In [1.25,26] (Not including subnormal results) + * the error is < 1.7ulp. + */ + +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * + * Method: + * 1. Reduce x to |x| by erf(-x) = -erf(x) + * 2. For x in [0, 0.84375] + * erf(x) = x + x*P(x^2) + * erfc(x) = 1 - erf(x) if x<=0.25 + * = 0.5 + ((0.5-x)-x*P) if x in [0.25,0.84375] + * where + * 2 2 4 20 + * P = P(x ) = (p0 + p1 * x + p2 * x + ... + p10 * x ) + * is an approximation to (erf(x)-x)/x with precision + * + * -56.45 + * | P - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fixed + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 3. For x in [0.84375,1.25], let s = x - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = c + P1(s)/Q1(s) + * erfc(x) = (1-c) - P1(s)/Q1(s) + * |P1/Q1 - (erf(x)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 4. For x in [1.25, 2]; [2, 4] + * erf(x) = 1.0 - tiny + * erfc(x) = (1/x)exp(-x*x-(.5*log(pi) -.5z + R(z)/S(z)) + * + * Where z = 1/(x*x), R is degree 9, and S is degree 3; + * + * 5. For x in [4,28] + * erf(x) = 1.0 - tiny + * erfc(x) = (1/x)exp(-x*x-(.5*log(pi)+eps + zP(z)) + * + * Where P is degree 14 polynomial in 1/(x*x). + * + * Notes: + * Here 4 and 5 make use of the asymptotic series + * exp(-x*x) + * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) ); + * x*sqrt(pi) + * + * where for z = 1/(x*x) + * P(z) ~ z/2*(-1 + z*3/2*(1 + z*5/2*(-1 + z*7/2*(1 +...)))) + * + * Thus we use rational approximation to approximate + * erfc*x*exp(x*x) ~ 1/sqrt(pi); + * + * The error bound for the target function, G(z) for + * the interval + * [4, 28]: + * |eps + 1/(z)P(z) - G(z)| < 2**(-56.61) + * for [2, 4]: + * |R(z)/S(z) - G(z)| < 2**(-58.24) + * for [1.25, 2]: + * |R(z)/S(z) - G(z)| < 2**(-58.12) + * + * 6. For inf > x >= 28 + * erf(x) = 1 - tiny (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) + * + * 7. Special cases: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ + +#if defined(vax) || defined(tahoe) +#define _IEEE 0 +#define TRUNC(x) (double) (float) (x) +#else +#define _IEEE 1 +#define TRUNC(x) *(((int *) &x) + 1) &= 0xf8000000 +#define infnan(x) 0.0 #endif -static double q_gamma(double, double, double); +#ifdef _IEEE_LIBM +/* + * redefining "___function" to "function" in _IEEE_LIBM mode + */ +#include "ieee_libm.h" +#endif -/* Incomplete gamma function - 1 / Gamma(a) * Int_0^x exp(-t) t^(a-1) dt */ -static double p_gamma(a, x, loggamma_a) - double a, x, loggamma_a; -{ - int k; - double result, term, previous; - - if (x >= 1 + a) return 1 - q_gamma(a, x, loggamma_a); - if (x == 0) return 0; - result = term = exp(a * log(x) - x - loggamma_a) / a; - for (k = 1; k < 1000; k++) { - term *= x / (a + k); - previous = result; result += term; - if (result == previous) return result; - } - fprintf(stderr, "erf.c:%d:p_gamma() could not converge.", __LINE__); - return result; -} - -/* Incomplete gamma function - 1 / Gamma(a) * Int_x^inf exp(-t) t^(a-1) dt */ -static double q_gamma(a, x, loggamma_a) - double a, x, loggamma_a; -{ - int k; - double result, w, temp, previous; - double la = 1, lb = 1 + x - a; /* Laguerre polynomial */ - - if (x < 1 + a) return 1 - p_gamma(a, x, loggamma_a); - w = exp(a * log(x) - x - loggamma_a); - result = w / lb; - for (k = 2; k < 1000; k++) { - temp = ((k - 1 - a) * (lb - la) + (k + x) * lb) / k; - la = lb; lb = temp; - w *= (k - 1 - a) / k; - temp = w / (la * lb); - previous = result; result += temp; - if (result == previous) return result; - } - fprintf(stderr, "erf.c:%d:q_gamma() could not converge.", __LINE__); - return result; -} - -#define LOG_PI_OVER_2 0.572364942924700087071713675675 /* log_e(PI)/2 */ +static double +tiny = 1e-300, +half = 0.5, +one = 1.0, +two = 2.0, +c = 8.45062911510467529297e-01, /* (float)0.84506291151 */ +/* + * Coefficients for approximation to erf in [0,0.84375] + */ +p0t8 = 1.02703333676410051049867154944018394163280, +p0 = 1.283791670955125638123339436800229927041e-0001, +p1 = -3.761263890318340796574473028946097022260e-0001, +p2 = 1.128379167093567004871858633779992337238e-0001, +p3 = -2.686617064084433642889526516177508374437e-0002, +p4 = 5.223977576966219409445780927846432273191e-0003, +p5 = -8.548323822001639515038738961618255438422e-0004, +p6 = 1.205520092530505090384383082516403772317e-0004, +p7 = -1.492214100762529635365672665955239554276e-0005, +p8 = 1.640186161764254363152286358441771740838e-0006, +p9 = -1.571599331700515057841960987689515895479e-0007, +p10= 1.073087585213621540635426191486561494058e-0008; +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +static double +pa0 = -2.362118560752659485957248365514511540287e-0003, +pa1 = 4.148561186837483359654781492060070469522e-0001, +pa2 = -3.722078760357013107593507594535478633044e-0001, +pa3 = 3.183466199011617316853636418691420262160e-0001, +pa4 = -1.108946942823966771253985510891237782544e-0001, +pa5 = 3.547830432561823343969797140537411825179e-0002, +pa6 = -2.166375594868790886906539848893221184820e-0003, +qa1 = 1.064208804008442270765369280952419863524e-0001, +qa2 = 5.403979177021710663441167681878575087235e-0001, +qa3 = 7.182865441419627066207655332170665812023e-0002, +qa4 = 1.261712198087616469108438860983447773726e-0001, +qa5 = 1.363708391202905087876983523620537833157e-0002, +qa6 = 1.198449984679910764099772682882189711364e-0002; +/* + * log(sqrt(pi)) for large x expansions. + * The tail (lsqrtPI_lo) is included in the rational + * approximations. +*/ +static double + lsqrtPI_hi = .5723649429247000819387380943226; +/* + * lsqrtPI_lo = .000000000000000005132975581353913; + * + * Coefficients for approximation to erfc in [2, 4] +*/ +static double +rb0 = -1.5306508387410807582e-010, /* includes lsqrtPI_lo */ +rb1 = 2.15592846101742183841910806188e-008, +rb2 = 6.24998557732436510470108714799e-001, +rb3 = 8.24849222231141787631258921465e+000, +rb4 = 2.63974967372233173534823436057e+001, +rb5 = 9.86383092541570505318304640241e+000, +rb6 = -7.28024154841991322228977878694e+000, +rb7 = 5.96303287280680116566600190708e+000, +rb8 = -4.40070358507372993983608466806e+000, +rb9 = 2.39923700182518073731330332521e+000, +rb10 = -6.89257464785841156285073338950e-001, +sb1 = 1.56641558965626774835300238919e+001, +sb2 = 7.20522741000949622502957936376e+001, +sb3 = 9.60121069770492994166488642804e+001; +/* + * Coefficients for approximation to erfc in [1.25, 2] +*/ +static double +rc0 = -2.47925334685189288817e-007, /* includes lsqrtPI_lo */ +rc1 = 1.28735722546372485255126993930e-005, +rc2 = 6.24664954087883916855616917019e-001, +rc3 = 4.69798884785807402408863708843e+000, +rc4 = 7.61618295853929705430118701770e+000, +rc5 = 9.15640208659364240872946538730e-001, +rc6 = -3.59753040425048631334448145935e-001, +rc7 = 1.42862267989304403403849619281e-001, +rc8 = -4.74392758811439801958087514322e-002, +rc9 = 1.09964787987580810135757047874e-002, +rc10 = -1.28856240494889325194638463046e-003, +sc1 = 9.97395106984001955652274773456e+000, +sc2 = 2.80952153365721279953959310660e+001, +sc3 = 2.19826478142545234106819407316e+001; +/* + * Coefficients for approximation to erfc in [4,28] + */ +static double +rd0 = -2.1491361969012978677e-016, /* includes lsqrtPI_lo */ +rd1 = -4.99999999999640086151350330820e-001, +rd2 = 6.24999999772906433825880867516e-001, +rd3 = -1.54166659428052432723177389562e+000, +rd4 = 5.51561147405411844601985649206e+000, +rd5 = -2.55046307982949826964613748714e+001, +rd6 = 1.43631424382843846387913799845e+002, +rd7 = -9.45789244999420134263345971704e+002, +rd8 = 6.94834146607051206956384703517e+003, +rd9 = -5.27176414235983393155038356781e+004, +rd10 = 3.68530281128672766499221324921e+005, +rd11 = -2.06466642800404317677021026611e+006, +rd12 = 7.78293889471135381609201431274e+006, +rd13 = -1.42821001129434127360582351685e+007; double erf(x) - double x; + double x; { - if (!finite(x)) { - if (isnan(x)) return x; /* erf(NaN) = NaN */ - return (x>0 ? 1.0 : -1.0); /* erf(+-inf) = +-1.0 */ - } - if (x >= 0) return p_gamma(0.5, x * x, LOG_PI_OVER_2); - else return - p_gamma(0.5, x * x, LOG_PI_OVER_2); + double R,S,P,Q,ax,s,y,z,r,fabs(),exp(); + if(!finite(x)) { /* erf(nan)=nan */ + if (isnan(x)) + return(x); + return (x > 0 ? one : -one); /* erf(+/-inf)= +/-1 */ + } + if ((ax = x) < 0) + ax = - ax; + if (ax < .84375) { + if (ax < 3.7e-09) { + if (ax < 1.0e-308) + return 0.125*(8.0*x+p0t8*x); /*avoid underflow */ + return x + p0*x; + } + y = x*x; + r = y*(p1+y*(p2+y*(p3+y*(p4+y*(p5+ + y*(p6+y*(p7+y*(p8+y*(p9+y*p10))))))))); + return x + x*(p0+r); + } + if (ax < 1.25) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x)-one; + P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); + Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + if (x>=0) + return (c + P/Q); + else + return (-c - P/Q); + } + if (ax >= 6.0) { /* inf>|x|>=6 */ + if (x >= 0.0) + return (one-tiny); + else + return (tiny-one); + } + /* 1.25 <= |x| < 6 */ + z = -ax*ax; + s = -one/z; + if (ax < 2.0) { + R = rc0+s*(rc1+s*(rc2+s*(rc3+s*(rc4+s*(rc5+ + s*(rc6+s*(rc7+s*(rc8+s*(rc9+s*rc10))))))))); + S = one+s*(sc1+s*(sc2+s*sc3)); + } else { + R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+ + s*(rb6+s*(rb7+s*(rb8+s*(rb9+s*rb10))))))))); + S = one+s*(sb1+s*(sb2+s*sb3)); + } + y = (R/S -.5*s) - lsqrtPI_hi; + z += y; + z = exp(z)/ax; + if (x >= 0) + return (one-z); + else + return (z-one); } -double erfc(x) - double x; +double erfc(x) + double x; { - if (!finite(x)) { - if (isnan(x)) return x; /* erfc(NaN) = NaN */ - return (x>0 ? 0.0 : 2.0); /* erfc(+-inf) = 0.0, 2.0 */ - } - if (x >= 0) return q_gamma(0.5, x * x, LOG_PI_OVER_2); - else return 1 + p_gamma(0.5, x * x, LOG_PI_OVER_2); + double R,S,P,Q,s,ax,y,z,r,fabs(),__exp__D(); + if (!finite(x)) { + if (isnan(x)) /* erfc(NaN) = NaN */ + return(x); + else if (x > 0) /* erfc(+-inf)=0,2 */ + return 0.0; + else + return 2.0; + } + if ((ax = x) < 0) + ax = -ax; + if (ax < .84375) { /* |x|<0.84375 */ + if (ax < 1.38777878078144568e-17) /* |x|<2**-56 */ + return one-x; + y = x*x; + r = y*(p1+y*(p2+y*(p3+y*(p4+y*(p5+ + y*(p6+y*(p7+y*(p8+y*(p9+y*p10))))))))); + if (ax < .0625) { /* |x|<2**-4 */ + return (one-(x+x*(p0+r))); + } else { + r = x*(p0+r); + r += (x-half); + return (half - r); + } + } + if (ax < 1.25) { /* 0.84375 <= |x| < 1.25 */ + s = ax-one; + P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); + Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + if (x>=0) { + z = one-c; return z - P/Q; + } else { + z = c+P/Q; return one+z; + } + } + if (ax >= 28) /* Out of range */ + if (x>0) + return (tiny*tiny); + else + return (two-tiny); + z = ax; + TRUNC(z); + y = z - ax; y *= (ax+z); + z *= -z; /* Here z + y = -x^2 */ + s = one/(-z-y); /* 1/(x*x) */ + if (ax >= 4) { /* 6 <= ax */ + R = s*(rd1+s*(rd2+s*(rd3+s*(rd4+s*(rd5+ + s*(rd6+s*(rd7+s*(rd8+s*(rd9+s*(rd10 + +s*(rd11+s*(rd12+s*rd13)))))))))))); + y += rd0; + } else if (ax >= 2) { + R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+ + s*(rb6+s*(rb7+s*(rb8+s*(rb9+s*rb10))))))))); + S = one+s*(sb1+s*(sb2+s*sb3)); + y += R/S; + R = -.5*s; + } else { + R = rc0+s*(rc1+s*(rc2+s*(rc3+s*(rc4+s*(rc5+ + s*(rc6+s*(rc7+s*(rc8+s*(rc9+s*rc10))))))))); + S = one+s*(sc1+s*(sc2+s*sc3)); + y += R/S; + R = -.5*s; + } + /* return exp(-x^2 - lsqrtPI_hi + R + y)/x; */ + s = ((R + y) - lsqrtPI_hi) + z; + y = (((z-s) - lsqrtPI_hi) + R) + y; + r = __exp__D(s, y)/x; + if (x>0) + return r; + else + return two-r; } diff --git a/missing/vsnprintf.c b/missing/vsnprintf.c index 266290a139..e3362a91f9 100644 --- a/missing/vsnprintf.c +++ b/missing/vsnprintf.c @@ -13,11 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. *