diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/conf/files.sh3 src/sys/arch/sh3/conf/files.sh3 --- src.orig/sys/arch/sh3/conf/files.sh3 2008-03-20 11:57:41.000000000 +0900 +++ src/sys/arch/sh3/conf/files.sh3 2008-03-20 12:03:40.000000000 +0900 @@ -37,6 +37,9 @@ file arch/sh3/sh3/mmu.c file arch/sh3/sh3/mmu_sh3.c sh3 file arch/sh3/sh3/mmu_sh4.c sh4 +file arch/sh3/sh3/fpu.c +file arch/sh3/sh3/fpu_sh3.c sh3 +file arch/sh3/sh3/fpu_sh4.c sh4 file arch/sh3/sh3/clock.c file arch/sh3/sh3/devreg.c sh3 & sh4 file arch/sh3/sh3/interrupt.c diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/files.fpu src/sys/arch/sh3/fpu/files.fpu --- src.orig/sys/arch/sh3/fpu/files.fpu 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/files.fpu 2007-01-31 11:48:27.000000000 +0900 @@ -0,0 +1,14 @@ +# $NetBSD$ + +# FPU emulation files. + +file arch/sh3/fpu/fpu_emu.c +file arch/sh3/fpu/fpu_add.c +file arch/sh3/fpu/fpu_compare.c +file arch/sh3/fpu/fpu_div.c +file arch/sh3/fpu/fpu_explode.c +file arch/sh3/fpu/fpu_implode.c +file arch/sh3/fpu/fpu_mul.c +file arch/sh3/fpu/fpu_sqrt.c +file arch/sh3/fpu/fpu_subr.c + diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_add.c src/sys/arch/sh3/fpu/fpu_add.c --- src.orig/sys/arch/sh3/fpu/fpu_add.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_add.c 2007-01-31 11:48:51.000000000 +0900 @@ -0,0 +1,228 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_add.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * Perform an FPU add (return x + y). + * + * To subtract, negate y and call add. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#if defined(DIAGNOSTIC)||defined(DEBUG) +#include +#endif + +#include +#include + +#include +#include +#include + +struct fpn * +fpu_add(struct fpemu *fe) +{ + struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r; + u_int r0, r1, r2, r3; + int rd; + + /* + * Put the `heavier' operand on the right (see fpu_emu.h). + * Then we will have one of the following cases, taken in the + * following order: + * + * - y = NaN. Implied: if only one is a signalling NaN, y is. + * The result is y. + * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN + * case was taken care of earlier). + * If x = -y, the result is NaN. Otherwise the result + * is y (an Inf of whichever sign). + * - y is 0. Implied: x = 0. + * If x and y differ in sign (one positive, one negative), + * the result is +0 except when rounding to -Inf. If same: + * +0 + +0 = +0; -0 + -0 = -0. + * - x is 0. Implied: y != 0. + * Result is y. + * - other. Implied: both x and y are numbers. + * Do addition a la Hennessey & Patterson. + */ + DPRINTF(FPE_REG, ("fpu_add:\n")); + DUMPFPN(FPE_REG, x); + DUMPFPN(FPE_REG, y); + DPRINTF(FPE_REG, ("=>\n")); + ORDER(x, y); + if (ISNAN(y)) { + fe->fe_cx |= FPSCR_VXSNAN; + DUMPFPN(FPE_REG, y); + return (y); + } + if (ISINF(y)) { + if (ISINF(x) && x->fp_sign != y->fp_sign) { + fe->fe_cx |= FPSCR_VXISI; + return (fpu_newnan(fe)); + } + DUMPFPN(FPE_REG, y); + return (y); + } + rd = ((fe->fe_fpscr) & FPSCR_RN); + if (ISZERO(y)) { + if (rd != FSR_RD_RM) /* only -0 + -0 gives -0 */ + y->fp_sign &= x->fp_sign; + else /* any -0 operand gives -0 */ + y->fp_sign |= x->fp_sign; + DUMPFPN(FPE_REG, y); + return (y); + } + if (ISZERO(x)) { + DUMPFPN(FPE_REG, y); + return (y); + } + /* + * We really have two numbers to add, although their signs may + * differ. Make the exponents match, by shifting the smaller + * number right (e.g., 1.011 => 0.1011) and increasing its + * exponent (2^3 => 2^4). Note that we do not alter the exponents + * of x and y here. + */ + r = &fe->fe_f3; + r->fp_class = FPC_NUM; + if (x->fp_exp == y->fp_exp) { + r->fp_exp = x->fp_exp; + r->fp_sticky = 0; + } else { + if (x->fp_exp < y->fp_exp) { + /* + * Try to avoid subtract case iii (see below). + * This also guarantees that x->fp_sticky = 0. + */ + SWAP(x, y); + } + /* now x->fp_exp > y->fp_exp */ + r->fp_exp = x->fp_exp; + r->fp_sticky = fpu_shr(y, x->fp_exp - y->fp_exp); + } + r->fp_sign = x->fp_sign; + if (x->fp_sign == y->fp_sign) { + FPU_DECL_CARRY + + /* + * The signs match, so we simply add the numbers. The result + * may be `supernormal' (as big as 1.111...1 + 1.111...1, or + * 11.111...0). If so, a single bit shift-right will fix it + * (but remember to adjust the exponent). + */ + /* r->fp_mant = x->fp_mant + y->fp_mant */ + FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]); + FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]); + FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]); + FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]); + if ((r->fp_mant[0] = r0) >= FP_2) { + (void) fpu_shr(r, 1); + r->fp_exp++; + } + } else { + FPU_DECL_CARRY + + /* + * The signs differ, so things are rather more difficult. + * H&P would have us negate the negative operand and add; + * this is the same as subtracting the negative operand. + * This is quite a headache. Instead, we will subtract + * y from x, regardless of whether y itself is the negative + * operand. When this is done one of three conditions will + * hold, depending on the magnitudes of x and y: + * case i) |x| > |y|. The result is just x - y, + * with x's sign, but it may need to be normalized. + * case ii) |x| = |y|. The result is 0 (maybe -0) + * so must be fixed up. + * case iii) |x| < |y|. We goofed; the result should + * be (y - x), with the same sign as y. + * We could compare |x| and |y| here and avoid case iii, + * but that would take just as much work as the subtract. + * We can tell case iii has occurred by an overflow. + * + * N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0. + */ + /* r->fp_mant = x->fp_mant - y->fp_mant */ + FPU_SET_CARRY(y->fp_sticky); + FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]); + FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]); + FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]); + FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]); + if (r0 < FP_2) { + /* cases i and ii */ + if ((r0 | r1 | r2 | r3) == 0) { + /* case ii */ + r->fp_class = FPC_ZERO; + r->fp_sign = rd == FSR_RD_RM; + return (r); + } + } else { + /* + * Oops, case iii. This can only occur when the + * exponents were equal, in which case neither + * x nor y have sticky bits set. Flip the sign + * (to y's sign) and negate the result to get y - x. + */ +#ifdef DIAGNOSTIC + if (x->fp_exp != y->fp_exp || r->fp_sticky) + panic("fpu_add"); +#endif + r->fp_sign = y->fp_sign; + FPU_SUBS(r3, 0, r3); + FPU_SUBCS(r2, 0, r2); + FPU_SUBCS(r1, 0, r1); + FPU_SUBC(r0, 0, r0); + } + r->fp_mant[3] = r3; + r->fp_mant[2] = r2; + r->fp_mant[1] = r1; + r->fp_mant[0] = r0; + if (r0 < FP_1) + fpu_norm(r); + } + DUMPFPN(FPE_REG, r); + return (r); +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_arith.h src/sys/arch/sh3/fpu/fpu_arith.h --- src.orig/sys/arch/sh3/fpu/fpu_arith.h 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_arith.h 2007-02-02 15:23:05.000000000 +0900 @@ -0,0 +1,178 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_arith.h 8.1 (Berkeley) 6/11/93 + */ + +/* + * Extended-precision arithmetic. + * + * We hold the notion of a `carry register', which may or may not be a + * machine carry bit or register. On the SPARC, it is just the machine's + * carry bit. + * + * In the worst case, you can compute the carry from x+y as + * (unsigned)(x + y) < (unsigned)x + * and from x+y+c as + * ((unsigned)(x + y + c) <= (unsigned)x && (y|c) != 0) + * for example. + */ + + +#ifndef FPE_USE_ASM + +/* set up for extended-precision arithemtic */ +#define FPU_DECL_CARRY quad_t fpu_carry, fpu_tmp; + +/* + * We have three kinds of add: + * add with carry: r = x + y + c + * add (ignoring current carry) and set carry: c'r = x + y + 0 + * add with carry and set carry: c'r = x + y + c + * The macros use `C' for `use carry' and `S' for `set carry'. + * Note that the state of the carry is undefined after ADDC and SUBC, + * so if all you have for these is `add with carry and set carry', + * that is OK. + * + * The same goes for subtract, except that we compute x - y - c. + * + * Finally, we have a way to get the carry into a `regular' variable, + * or set it from a value. SET_CARRY turns 0 into no-carry, nonzero + * into carry; GET_CARRY sets its argument to 0 or 1. + */ +#define FPU_ADDC(r, x, y) \ + (r) = (x) + (y) + (!!fpu_carry) +#define FPU_ADDS(r, x, y) \ + { \ + fpu_tmp = (quad_t)(x) + (quad_t)(y); \ + (r) = (u_int)fpu_tmp; \ + fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ + } +#define FPU_ADDCS(r, x, y) \ + { \ + fpu_tmp = (quad_t)(x) + (quad_t)(y) + (!!fpu_carry); \ + (r) = (u_int)fpu_tmp; \ + fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ + } +#define FPU_SUBC(r, x, y) \ + (r) = (x) - (y) - (!!fpu_carry) +#define FPU_SUBS(r, x, y) \ + { \ + fpu_tmp = (quad_t)(x) - (quad_t)(y); \ + (r) = (u_int)fpu_tmp; \ + fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ + } +#define FPU_SUBCS(r, x, y) \ + { \ + fpu_tmp = (quad_t)(x) - (quad_t)(y) - (!!fpu_carry); \ + (r) = (u_int)fpu_tmp; \ + fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ + } + +#define FPU_GET_CARRY(r) (r) = (!!fpu_carry) +#define FPU_SET_CARRY(v) fpu_carry = ((v) != 0) + +#else +/* set up for extended-precision arithemtic */ +#define FPU_DECL_CARRY + +/* + * We have three kinds of add: + * add with carry: r = x + y + c + * add (ignoring current carry) and set carry: c'r = x + y + 0 + * add with carry and set carry: c'r = x + y + c + * The macros use `C' for `use carry' and `S' for `set carry'. + * Note that the state of the carry is undefined after ADDC and SUBC, + * so if all you have for these is `add with carry and set carry', + * that is OK. + * + * The same goes for subtract, except that we compute x - y - c. + * + * Finally, we have a way to get the carry into a `regular' variable, + * or set it from a value. SET_CARRY turns 0 into no-carry, nonzero + * into carry; GET_CARRY sets its argument to 0 or 1. + */ +#define FPU_ADDC(r, x, y) \ + __asm volatile( \ + "addc %2, %1 \n\t" \ + "mov %1, %0 \n\t" \ + : "=r"(r) : "r"(x), "r"(y)) +#define FPU_ADDS(r, x, y) \ + __asm volatile( \ + "clrt \n\t" \ + "addc %2, %1 \n\t" \ + "mov %1, %0 \n\t" \ + : "=r"(r) : "r"(x), "r"(y)) +#define FPU_ADDCS(r, x, y) \ + __asm volatile( \ + "addc %2, %1 \n\t" \ + "mov %1, %0 \n\t" \ + : "=r"(r) : "r"(x), "r"(y)) +#define FPU_SUBC(r, x, y) \ + __asm volatile( \ + "subb %2, %1 \n\t" \ + "mov %1, %0 \n\t" \ + : "=r"(r) : "r"(x), "r"(y)) +#define FPU_SUBS(r, x, y) \ + __asm volatile( \ + "clrt \n\t" \ + "subb %2, %1 \n\t" \ + "mov %1, %0 \n\t" \ + : "=r"(r) : "r"(x), "r"(y)) +#define FPU_SUBCS(r, x, y) \ + __asm volatile( \ + "subb %2, %1 \n\t" \ + "mov %1, %0 \n\t" \ + : "=r"(r) : "r"(x), "r"(y)) + +#define FPU_GET_CARRY(r) \ + __asm volatile( \ + "movt %0 \n\t" \ + : "=r"(r)) +#define FPU_SET_CARRY(v) \ +do { \ + int __tmp; \ + __asm volatile( \ + "clrt \n\t" \ + "negc %1, %0 \n\t" \ + : "r"(__tmp) : "r"(v)) \ +} while (/*CONSTCOND*/0) + +#define FPU_SHL1_BY_ADD /* shift left 1 faster by ADDC than (a<<1)|(b>>31) */ +#endif diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_compare.c src/sys/arch/sh3/fpu/fpu_compare.c --- src.orig/sys/arch/sh3/fpu/fpu_compare.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_compare.c 2007-01-31 19:14:16.000000000 +0900 @@ -0,0 +1,159 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_compare.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * FCMPU and FCMPO instructions. + * + * These rely on the fact that our internal wide format is achieved by + * adding zero bits to the end of narrower mantissas. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include + +#include +#include + +/* + * Perform a compare instruction (with or without unordered exception). + * This updates the fcc field in the fsr. + * + * If either operand is NaN, the result is unordered. For ordered, this + * causes an NV exception. Everything else is ordered: + * |Inf| > |numbers| > |0|. + * We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0), + * so we get this directly. Note, however, that two zeros compare equal + * regardless of sign, while everything else depends on sign. + * + * Incidentally, two Infs of the same sign compare equal (per the 80387 + * manual---it would be nice if the SPARC documentation were more + * complete). + */ +int +fpu_compare(struct fpemu *fe) +{ + struct fpn *a, *b, *r; + int res; + + a = &fe->fe_f1; + b = &fe->fe_f2; + r = &fe->fe_f3; + res = 0; + + if (ISNAN(a) || ISNAN(b)) { + /* + * In any case, we already got an exception for signalling + * NaNs; here we may replace that one with an identical + * exception, but so what?. + */ + if (ISSNAN(a) || ISSNAN(b)) + res = FCMP_INVALID; + else /* qNaN */ + res = FCMP_UO; + goto done; + } + + /* + * Must handle both-zero early to avoid sign goofs. Otherwise, + * at most one is 0, and if the signs differ we are done. + */ + if (ISZERO(a) && ISZERO(b)) { + res = FCMP_EQ; + goto done; + } + if (a->fp_sign) { /* a < 0 (or -0) */ + if (!b->fp_sign) { /* b >= 0 (or if a = -0, b > 0) */ + res = FCMP_LT; + goto done; + } + } else { /* a > 0 (or +0) */ + if (b->fp_sign) { /* b <= -0 (or if a = +0, b < 0) */ + res = FCMP_GT; + goto done; + } + } + + /* + * Now the signs are the same (but may both be negative). All + * we have left are these cases: + * + * |a| < |b| [classes or values differ] + * |a| > |b| [classes or values differ] + * |a| == |b| [classes and values identical] + * + * We define `diff' here to expand these as: + * + * |a| < |b|, a,b >= 0: a < b => LT + * |a| < |b|, a,b < 0: a > b => GT + * |a| > |b|, a,b >= 0: a > b => GT + * |a| > |b|, a,b < 0: a < b => LT + */ +#define opposite_cc(cc) ((cc) == FCMP_GT ? FCMP_LT : FCMP_GT) +#define diff(magnitude) (a->fp_sign ? opposite_cc(magnitude) : (magnitude)) + if (a->fp_class < b->fp_class) { /* |a| < |b| */ + res = diff(FCMP_LT); + goto done; + } + if (a->fp_class > b->fp_class) { /* |a| > |b| */ + res = diff(FCMP_GT); + goto done; + } + /* now none can be 0: only Inf and numbers remain */ + if (ISINF(a)) { /* |Inf| = |Inf| */ + res = FCMP_EQ; + goto done; + } + fpu_sub(fe); + if (ISZERO(r)) + res = FCMP_EQ; + else if (r->fp_sign) + res = FCMP_LT; + else + res = FCMP_GT; +done: + return res; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_div.c src/sys/arch/sh3/fpu/fpu_div.c --- src.orig/sys/arch/sh3/fpu/fpu_div.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_div.c 2007-01-31 12:14:44.000000000 +0900 @@ -0,0 +1,294 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_div.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * Perform an FPU divide (return x / y). + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#if defined(DIAGNOSTIC)||defined(DEBUG) +#include +#endif + +#include +#include + +#include +#include + +/* + * Division of normal numbers is done as follows: + * + * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e. + * If X and Y are the mantissas (1.bbbb's), the quotient is then: + * + * q = (X / Y) * 2^((x exponent) - (y exponent)) + * + * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y) + * will be in [0.5,2.0). Moreover, it will be less than 1.0 if and only + * if X < Y. In that case, it will have to be shifted left one bit to + * become a normal number, and the exponent decremented. Thus, the + * desired exponent is: + * + * left_shift = x->fp_mant < y->fp_mant; + * result_exp = x->fp_exp - y->fp_exp - left_shift; + * + * The quotient mantissa X/Y can then be computed one bit at a time + * using the following algorithm: + * + * Q = 0; -- Initial quotient. + * R = X; -- Initial remainder, + * if (left_shift) -- but fixed up in advance. + * R *= 2; + * for (bit = FP_NMANT; --bit >= 0; R *= 2) { + * if (R >= Y) { + * Q |= 1 << bit; + * R -= Y; + * } + * } + * + * The subtraction R -= Y always removes the uppermost bit from R (and + * can sometimes remove additional lower-order 1 bits); this proof is + * left to the reader. + * + * This loop correctly calculates the guard and round bits since they are + * included in the expanded internal representation. The sticky bit + * is to be set if and only if any other bits beyond guard and round + * would be set. From the above it is obvious that this is true if and + * only if the remainder R is nonzero when the loop terminates. + * + * Examining the loop above, we can see that the quotient Q is built + * one bit at a time ``from the top down''. This means that we can + * dispense with the multi-word arithmetic and just build it one word + * at a time, writing each result word when it is done. + * + * Furthermore, since X and Y are both in [1.0,2.0), we know that, + * initially, R >= Y. (Recall that, if X < Y, R is set to X * 2 and + * is therefore at in [2.0,4.0).) Thus Q is sure to have bit FP_NMANT-1 + * set, and R can be set initially to either X - Y (when X >= Y) or + * 2X - Y (when X < Y). In addition, comparing R and Y is difficult, + * so we will simply calculate R - Y and see if that underflows. + * This leads to the following revised version of the algorithm: + * + * R = X; + * bit = FP_1; + * D = R - Y; + * if (D >= 0) { + * result_exp = x->fp_exp - y->fp_exp; + * R = D; + * q = bit; + * bit >>= 1; + * } else { + * result_exp = x->fp_exp - y->fp_exp - 1; + * q = 0; + * } + * R <<= 1; + * do { + * D = R - Y; + * if (D >= 0) { + * q |= bit; + * R = D; + * } + * R <<= 1; + * } while ((bit >>= 1) != 0); + * Q[0] = q; + * for (i = 1; i < 4; i++) { + * q = 0, bit = 1 << 31; + * do { + * D = R - Y; + * if (D >= 0) { + * q |= bit; + * R = D; + * } + * R <<= 1; + * } while ((bit >>= 1) != 0); + * Q[i] = q; + * } + * + * This can be refined just a bit further by moving the `R <<= 1' + * calculations to the front of the do-loops and eliding the first one. + * The process can be terminated immediately whenever R becomes 0, but + * this is relatively rare, and we do not bother. + */ + +struct fpn * +fpu_div(struct fpemu *fe) +{ + struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; + u_int q, bit; + u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3; + FPU_DECL_CARRY + + /* + * Since divide is not commutative, we cannot just use ORDER. + * Check either operand for NaN first; if there is at least one, + * order the signalling one (if only one) onto the right, then + * return it. Otherwise we have the following cases: + * + * Inf / Inf = NaN, plus NV exception + * Inf / num = Inf [i.e., return x] + * Inf / 0 = Inf [i.e., return x] + * 0 / Inf = 0 [i.e., return x] + * 0 / num = 0 [i.e., return x] + * 0 / 0 = NaN, plus NV exception + * num / Inf = 0 + * num / num = num (do the divide) + * num / 0 = Inf, plus DZ exception + */ + DPRINTF(FPE_REG, ("fpu_div:\n")); + DUMPFPN(FPE_REG, x); + DUMPFPN(FPE_REG, y); + DPRINTF(FPE_REG, ("=>\n")); + if (ISNAN(x) || ISNAN(y)) { + ORDER(x, y); + fe->fe_cx |= FPSCR_VXSNAN; + DUMPFPN(FPE_REG, y); + return (y); + } + /* + * Need to split the following out cause they generate different + * exceptions. + */ + if (ISINF(x)) { + if (x->fp_class == y->fp_class) { + fe->fe_cx |= FPSCR_VXIDI; + return (fpu_newnan(fe)); + } + DUMPFPN(FPE_REG, x); + return (x); + } + if (ISZERO(x)) { + fe->fe_cx |= FPSCR_ZX; + if (x->fp_class == y->fp_class) { + fe->fe_cx |= FPSCR_VXZDZ; + return (fpu_newnan(fe)); + } + DUMPFPN(FPE_REG, x); + return (x); + } + + /* all results at this point use XOR of operand signs */ + x->fp_sign ^= y->fp_sign; + if (ISINF(y)) { + x->fp_class = FPC_ZERO; + DUMPFPN(FPE_REG, x); + return (x); + } + if (ISZERO(y)) { + fe->fe_cx = FPSCR_ZX; + x->fp_class = FPC_INF; + DUMPFPN(FPE_REG, x); + return (x); + } + + /* + * Macros for the divide. See comments at top for algorithm. + * Note that we expand R, D, and Y here. + */ + +#define SUBTRACT /* D = R - Y */ \ + FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \ + FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0) + +#define NONNEGATIVE /* D >= 0 */ \ + ((int)d0 >= 0) + +#ifdef FPU_SHL1_BY_ADD +#define SHL1 /* R <<= 1 */ \ + FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \ + FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0) +#else +#define SHL1 \ + r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \ + r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1 +#endif + +#define LOOP /* do ... while (bit >>= 1) */ \ + do { \ + SHL1; \ + SUBTRACT; \ + if (NONNEGATIVE) { \ + q |= bit; \ + r0 = d0, r1 = d1, r2 = d2, r3 = d3; \ + } \ + } while ((bit >>= 1) != 0) + +#define WORD(r, i) /* calculate r->fp_mant[i] */ \ + q = 0; \ + bit = 1 << 31; \ + LOOP; \ + (x)->fp_mant[i] = q + + /* Setup. Note that we put our result in x. */ + r0 = x->fp_mant[0]; + r1 = x->fp_mant[1]; + r2 = x->fp_mant[2]; + r3 = x->fp_mant[3]; + y0 = y->fp_mant[0]; + y1 = y->fp_mant[1]; + y2 = y->fp_mant[2]; + y3 = y->fp_mant[3]; + + bit = FP_1; + SUBTRACT; + if (NONNEGATIVE) { + x->fp_exp -= y->fp_exp; + r0 = d0, r1 = d1, r2 = d2, r3 = d3; + q = bit; + bit >>= 1; + } else { + x->fp_exp -= y->fp_exp + 1; + q = 0; + } + LOOP; + x->fp_mant[0] = q; + WORD(x, 1); + WORD(x, 2); + WORD(x, 3); + x->fp_sticky = r0 | r1 | r2 | r3; + + DUMPFPN(FPE_REG, x); + return (x); +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_emu.c src/sys/arch/sh3/fpu/fpu_emu.c --- src.orig/sys/arch/sh3/fpu/fpu_emu.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_emu.c 2007-02-02 16:09:53.000000000 +0900 @@ -0,0 +1,665 @@ +/* $NetBSD$ */ + +/* + * Copyright 2001 Wasabi Systems, Inc. + * All rights reserved. + * + * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed for the NetBSD Project by + * Wasabi Systems, Inc. + * 4. The name of Wasabi Systems, Inc. may not be used to endorse + * or promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu.c 8.1 (Berkeley) 6/11/93 + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#ifdef DDB +#include +#endif + +#include +#include + +#ifdef DEBUG +int fpe_debug = 0; + +/* + * Dump a `fpn' structure. + */ +void +fpu_dumpfpn(struct fpn *fp) +{ + static const char *class[] = { + "SNAN", "QNAN", "ZERO", "NUM", "INF" + }; + + printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2], + fp->fp_sign ? '-' : ' ', + fp->fp_mant[0], fp->fp_mant[1], + fp->fp_mant[2], fp->fp_mant[3], + fp->fp_exp); +} +#endif + +/* + * fpu_execute returns the following error numbers (0 = no error): + */ +#define FPE 1 /* take a floating point exception */ +#define NOTFPU 2 /* not an FPU instruction */ +#define FAULT 3 + + +/* + * Emulate a floating-point instruction. + * Return zero for success, else signal number. + * (Typically: zero, SIGFPE, SIGILL, SIGSEGV) + */ +int +fpu_emulate(struct trapframe *frame, struct fpreg *fpf) +{ + static uint16_t insn; + static struct fpemu fe; + static int lastill = 0; + int sig; + + /* initialize insn.is_datasize to tell it is *not* initialized */ + fe.fe_fpstate = fpf; + fe.fe_cx = 0; + fe.fe_rm = fpf->fpr_fpscr & FPSCR_RM; + + /* always set this (to avoid a warning) */ + + if (copyin((void *)frame->tf_spc, &insn, sizeof(insn))) { +#ifdef DEBUG + printf("fpu_emulate: fault reading opcode\n"); +#endif + return SIGSEGV; + } + + DPRINTF(FPE_EX, ("fpu_emulate: emulating insn %x at %p\n", + insn, (void *)frame->tf_spc)); + + sig = 0; + switch (fpu_execute(frame, &fe, &insn)) { + case 0: + DPRINTF(FPE_EX, ("fpu_emulate: success\n")); + frame->tf_spc += 2; + break; + + case FPE: + DPRINTF(FPE_EX, ("fpu_emulate: SIGFPE\n")); + sig = SIGFPE; + break; + + case FAULT: + DPRINTF(FPE_EX, ("fpu_emulate: SIGSEGV\n")); + sig = SIGSEGV; + break; + + case NOTFPU: + default: + DPRINTF(FPE_EX, ("fpu_emulate: SIGILL\n")); +#ifdef DEBUG + if (fpe_debug & FPE_EX) { + printf("fpu_emulate: illegal insn %x at %p:", + insn, (void *)frame->tf_spc); +#ifdef DDB + db_disasm((db_addr_t)(frame->tf_spc), 0); +#endif + } +#endif + /* + * XXXX retry an illegal insn once due to cache issues. + */ + if (lastill == frame->tf_spc) { + sig = SIGILL; +#if defined(DEBUG) && defined(DDB) + if (fpe_debug & FPE_EX) + Debugger(); +#endif + } + lastill = frame->tf_spc; + break; + } + + return sig; +} + +/* trapframe register offset table */ +static const int tf_regoff[16] = { + offsetof(struct trapframe, tf_r0), + offsetof(struct trapframe, tf_r1), + offsetof(struct trapframe, tf_r2), + offsetof(struct trapframe, tf_r2), + offsetof(struct trapframe, tf_r3), + offsetof(struct trapframe, tf_r4), + offsetof(struct trapframe, tf_r5), + offsetof(struct trapframe, tf_r6), + offsetof(struct trapframe, tf_r7), + offsetof(struct trapframe, tf_r8), + offsetof(struct trapframe, tf_r9), + offsetof(struct trapframe, tf_r10), + offsetof(struct trapframe, tf_r11), + offsetof(struct trapframe, tf_r12), + offsetof(struct trapframe, tf_r13), + offsetof(struct trapframe, tf_r14), + offsetof(struct trapframe, tf_r15) +}; +#define tf_reg(tf,r) *(u_int *)((char *)(tf) + tr_regoff[(r)]) + +#define check_xdreg(fe, type, reg) \ +do { \ + if ((type) == FTYPE_DBL) { \ + if ((reg) & 1) { \ + return NOTFPU; \ + } \ + } \ +} while (/*CONSTCOND*/0) + + +/* + * Execute an FPU instruction (one that runs entirely in the FPU; not + * FBfcc or STF, for instance). On return, fe->fe_fs->fs_fsr will be + * modified to reflect the setting the hardware would have left. + * + * Note that we do not catch all illegal opcodes, so you can, for instance, + * multiply two integers this way. + */ +int +fpu_execute(struct trapframe *tf, struct fpemu *fe, uint16_t *insn) +{ + union { + double d; + int i; + int a[2]; + } buf; + struct fpn *fp; + struct fpreg *fs; + int *a; + vaddr_t addr; + int type, size, bank; + int ra, rb, rt, real_ra, real_rb; + int res, sett, t; + int fsr, cx, enable; + uint16_t instr = *insn; + +#if defined(DDB) && defined(DEBUG) + if (fpe_debug & FPE_EX) { + db_addr_t loc = tf->tf_spc; + + printf("Trying to emulate: %p ", (void *)loc); + db_disasm(loc, 0); + } +#endif + + /* Setup work. */ + fp = NULL; + fs = fe->fe_fpstate; + fe->fe_fpscr = fs->fpr_fpscr; + + type = (fe->fe_fpscr & FPSCR_PR) ? FTYPE_DBL : FTYPE_SNG; + size = (fe->fe_fpscr & FPSCR_SZ) ? FSIZE_DBL : FSIZE_SNG; + bank = (fe->fe_fpscr & FPSCR_FR); + real_ra = ra = (instr >> 8) & 0xf; + real_rb = rb = (instr >> 4) & 0xf; + if (bank) { + ra += FPREGS_PER_BANK; + rb += FPREGS_PER_BANK; + } + rt = FPREG_INVALID; + sett = 0; + t = 0; + + /* + * `Decode' and execute instruction. + */ + + switch ((instr >> 12) & 0xf) { + case 0x0: + switch (instr & 0xff) { + case 0x5a: /* STS FPUL,Rn */ + tf_reg(tf, real_ra) = fs->fpr_fpul; + break; + case 0x6a: /* STS FPSCR,Rn */ + tf_reg(tf, real_ra) = fe->fe_fpscr; + break; + + default: + return NOTFPU; + } + break; + + case 0x4: + switch (instr & 0xff) { + case 0x52: /* STS.L FPUL,@-Rn */ + addr = (vaddr_t)tf_reg(tf, real_ra) - 4; + buf.i = fs->fpr_fpul; + if (copyout(&buf.i, (void *)addr, sizeof(buf.i))) + return FAULT; + (vaddr_t)tf_reg(tf, real_ra) = addr; + break; + case 0x56: /* LDS.L @Rm+,FPUL */ + addr = (vaddr_t)tf_reg(tf, real_ra); + if (copyin((void *)addr, &buf.i sizeof(buf.i))) + return FAULT; + fe->fe_fpscr = buf.i; + (vaddr_t)tf_reg(tf, real_ra) = addr + 4; + break; + case 0x5a: /* LDS Rm,FPUL */ + fs->fpr_fpul = tf_reg(tf, real_ra); + break; + case 0x62: /* STS.L FPSCR,@-Rn */ + addr = (vaddr_t)tf_reg(tf, real_ra) - 4; + buf.i = fe->fe_fpscr; + if (copyout(&buf.i, (void *)addr, sizeof(buf.i))) + return FAULT; + (vaddr_t)tf_reg(tf, real_ra) = addr; + break; + case 0x66: /* LDS.L @Rm+,FPSCR */ + addr = (vaddr_t)tf_reg(tf, real_ra); + if (copyin((void *)addr, &buf.i sizeof(buf.i))) + return FAULT; + fe->fe_fpscr = buf.i & FPSCR_MASK; + (vaddr_t)tf_reg(tf, real_ra) = addr + 4; + break; + case 0x6a: /* LDS Rm,FPSCR */ + fe->fe_fpscr = tf_reg(tf, real_ra) & FPSCR_MASK; + break; + + default: + return NOTFPU; + } + break; + + case 0xf: + switch (instr & 0xf) { + case 0x0: /* FADD FRm,FRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FADD\n")); + check_xdreg(ra|rb); + fpu_explode(fe, &fe->fe_f1, type, ra); + fpu_explode(fe, &fe->fe_f2, type, rb); + fp = fpu_add(fe); + rt = ra; + break; + case 0x1: /* FSUB FRm,FRn / FSUB DRm,DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FSUB\n")); + check_xdreg(ra|rb); + fpu_explode(fe, &fe->fe_f1, type, ra); + fpu_explode(fe, &fe->fe_f2, type, rb); + fp = fpu_sub(fe); + rt = ra; + break; + case 0x2: /* FMUL FRm,FRn / FMUL DRm,DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FMUL\n")); + check_xdreg(ra|rb); + fpu_explode(fe, &fe->fe_f1, type, ra); + fpu_explode(fe, &fe->fe_f2, type, rc); + fp = fpu_mul(fe); + rt = ra; + break; + case 0x3: /* FDIV FRm,FRn / FDIV DRm,DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FDIV\n")); + check_xdreg(ra|rb); + fpu_explode(fe, &fe->fe_f1, type, ra); + fpu_explode(fe, &fe->fe_f2, type, rb); + fp = fpu_div(fe); + rt = ra; + break; + case 0x4: /* FCMP/EQ FRm,FRn / FCMP/EQ DRm,DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FCMP/EQ\n")); + check_xdreg(ra|rb); + fpu_explode(fe, &fe->fe_f1, type, ra); + fpu_explode(fe, &fe->fe_f2, type, rb); + res = fpu_compare(fe); + if (res == FCMP_INVALID) + fe->fe_cx |= FP_V_BIT; + t = (res == FCMP_EQ) ? PSL_TBIT : 0; + sett = 1; + break; + case 0x5: /* FCMP/GT FRm,FRn / FCMP/GT DRm,DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FCMP/GT\n")); + check_xdreg(ra|rb); + fpu_explode(fe, &fe->fe_f1, type, ra); + fpu_explode(fe, &fe->fe_f2, type, rb); + res = fpu_compare(fe); + if (res == FCMP_INVALID || res == FCMP_UO) + fe->fe_cx |= FP_V_BIT; + t = (res == FCMP_GT) ? PSL_TBIT : 0; + sett = 1; + break; + case 0x6: /* FMOV.S @(R0,Rm),FRn / FMOV @(R0,Rm),DRn / FMOV @(R0,Rm),XDn */ + addr = tf->tf_r0 + (vaddr_t)tf_reg(tf, real_rb); + if (copyin((void *)addr, &buf, size)) + return FAULT; + if (size == FSIZE_SNG) { + fs->fpr_fr[ra] = buf.i; + } else { + if (ra & 1) + ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */ + a = &fs->fpr_fr[ra]; + a[DBL_LOWORD] = buf.a[0]; + a[DBL_HIWORD] = buf.a[1]; + } + break; + case 0x7: /* FMOV.S FRm,@(R0,Rn) / FMOV DRm,@(R0,Rn) / FMOV XDm,@(R0,Rn) */ + addr = tf->tf_r0 + (vaddr_t)tf_reg(tf, real_ra); + if (size == FSIZE_SNG) { + buf.i = fs->fpr_fr[rb]; + } else { + if (rb & 1) + rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */ + buf.a[DBL_HIWORD] = fs->fpr_fr[rb]; + buf.a[DBL_LOWORD] = fs->fpr_fr[rb + 1]; + } + if (copyout(&buf, (void *)addr, size)) + return FAULT; + break; + case 0x8: /* FMOV.S @Rm,FRn / FMOV @Rm,DRn / FMOV @Rm,XDn */ + addr = (vaddr_t)tf_reg(tf, real_rb); + if (copyin((void *)addr, &buf, size)) + return FAULT; + if (size == FSIZE_SNG) { + fs->fpr_fr[ra] = buf.i; + } else { + if (ra & 1) + ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */ + a = &fs->fpr_fr[ra]; + a[DBL_LOWORD] = buf.a[0]; + a[DBL_HIWORD] = buf.a[1]; + } + break; + case 0x9: /* FMOV.S @Rm+,FRn / FMOV @Rm+,DRn / FMOV @Rm+,XDn */ + addr = (vaddr_t)tf_reg(tf, real_rb); + if (copyin((void *)addr, &buf, size)) + return FAULT; + if (size == FSIZE_SNG) { + fs->fpr_fr[ra] = buf.i; + } else { + if (ra & 1) + ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */ + a = &fs->fpr_fr[ra]; + a[DBL_LOWORD] = buf.a[0]; + a[DBL_HIWORD] = buf.a[1]; + } + tf_reg(tf, real_rb) = addr + 4; + break; + case 0xa: /* FMOV.S FRm,@Rn / FMOV DRm,@Rn / FMOV XDm,@Rn */ + addr = (vaddr_t)tf_reg(tf, real_ra); + if (size == FSIZE_SNG) { + buf.i = fs->fpr_fr[rb]; + } else { + if (rb & 1) + rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */ + buf.a[DBL_LOWORD] = fs->fpr_fr[rb]; + buf.a[DBL_HIWORD] = fs->fpr_fr[rb + 1]; + } + if (copyout(&buf, (void *)addr, size)) + return FAULT; + break; + case 0xb: /* FMOV.S FRm,@-Rn / FMOV DRm,@-Rn / FMOV XDm,@-Rn */ + addr = (vaddr_t)tf_reg(tf, real_ra) - 4; + if (size == FSIZE_SNG) { + buf.i = fs->fpr_fr[rb]; + } else { + if (rb & 1) + rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */ + buf.a[DBL_LOWORD] = fs->fpr_fr[rb]; + buf.a[DBL_HIWORD] = fs->fpr_fr[rb + 1]; + } + if (copyout(&buf, (void *)addr, size)) + return FAULT; + tf_reg(tf, real_ra) = addr; + break; + case 0xc: /* FMOV FRm,FRn / FMOV DRm,DRn / FMOV XDm,DRn / FMOV DRm,XDn / FMOV XDm,XDn */ + if (size == FSIZE_SNG) { + fs->fpr_fr[ra] = fs->fpr_fr[rb]; + } else { + if (ra & 1) + ra = (ra ^ FP_BANK_BIT) & ~1; /* XDn */ + if (rb & 1) + rb = (rb ^ FP_BANK_BIT) & ~1; /* XDm */ + fs->fpr_fr[ra] = fs->fpr_fr[rb]; + fs->fpr_fr[ra + 1] = fs->fpr_fr[rb + 1]; + } + break; + case 0xd: + switch ((instr >> 4) & 0xf) { + case 0x0: /* FSTS FPUL,FRn */ + fs->fpr_fr[ra] = fs->fpr_fpul; + break; + case 0x1: /* FLDS FRm,FPUL */ + fs->fpr_fpul = fs->fpr_fr[ra]; + break; + case 0x2: /* FLOAT FPUL,FRn / FLOAT FPUL,DRn */ + fpu_explode(fe, &fe->fe_f1, FTYPE_INT, + FPREG_FPUL); + fs = &fe->fe_f1; + rt = ra; + break; + case 0x3: /* FTRC FRm,FPUL / FTRC DRm,FPUL */ + fpu_explode(fe, &fe->fe_f1, type, ra); + fs = &fe->fe_f1; + rt = FPREG_FPUL; + type = FTYPE_INT; + if (type == FTYPE_DBL) + fe->fe_rm = RM_RZ; + break; + case 0x4: /* FNEG FRn / FNEG DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FNEG\n")); + check_xdreg(ra); + fs->fpr_fr[ra] ^= (1 << 31); + break; + case 0x5: /* FABS FRn / FABS DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FABS\n")); + check_xdreg(ra); + fs->fpr_fr[ra] &= ~(1 << 31); + break; + case 0x6: /* FSQRT FRn / FSQRT DRn */ + DPRINTF(FPE_INSN, ("fpu_execute: FSQRT\n")); + check_xdreg(ra|rb); + fpu_explode(fe, &fe->fe_f1, type, ra); + fpu_explode(fe, &fe->fe_f2, type, rb); + fp = fpu_sqrt(fe); + rt = ra; + break; + case 0x8: /* FLDI0 FRn */ + if (type != FTYPE_SNG) + return NOTFPU; + fs->fpr_fr[ra] = 0x00000000; + break; + case 0x9: /* FLDI1 FRn */ + if (type != FTYPE_SNG) + return NOTFPU; + fs->fpr_fr[ra] = 0x3f800000; + break; + case 0xa: /* FCNVSD FPUL,DRn */ + if (type != FTYPE_DBL) + return NOTFPU; + fpu_explode(fe, &fe->fe_f1, FTYPE_SNG, + FPREG_FPUL); + fs = &fe->fe_f1; + rt = ra; + break; + case 0xb: /* FCNVDS DRm,FPUL */ + if (type != FTYPE_DBL) + return NOTFPU; + fpu_explode(fe, &fe->fe_f1, type, ra); + fs = &fe->fe_f1; + rt = FPREG_FPUL; + type = FTYPE_SNG; + break; + case 0xe: /* FIPR FVm,FVn */ + if (type != FTYPE_SNG) + return NOTFPU; + return NOTFPU; /* XXX */ + break; + case 0xf: + switch ((instr >> 8) & 0xf) { + case 0x3: /* FSCHG */ + fe->fe_fpscr ^= FPSCR_SZ; + break; + case 0xb: /* FRCHG */ + fe->fe_fpscr ^= FPSCR_FR; + break; + + case 0x1: case 0x5: case 0x9: case 0xd: + /* FTRV XMTRX,FVn */ + return NOTFPU; /* XXX */ + break; + + default: + return NOTFPU; + } + break; + + default: + return NOTFPU; + } + break; + case 0xe: /* FMAC FR0,FRm,FRn */ + if (type != FTYPE_SNG) + return NOTFPU; + fpu_explode(fe, &fe->fe_f1, type, 0); + fpu_explode(fe, &fe->fe_f2, type, rb); + (void)fpu_mul(fe); + fe->fe_f1 = fe->fe_f3; + fpu_explode(fe, &fe->fe_f2, type, ra); + fs = fpu_add(fe); + rt = ra; + break; + + default: + return NOTFPU; + } + break; + + default: + return NOTFPU; + } + + if ((fp == NULL) && !sett) + return 0; + + /* + * ALU operation is complete. Collapse the result and then check + * for exceptions. If we got any, and they are enabled, do not + * alter the destination register, just stop with an exception. + * Otherwise set new current exceptions and accrue. + */ + + if (fs) + fpu_implode(fe, fp, type, (u_int *)buf.a); + + fsr = fe->fe_fpscr & ~(FP_CAUSE_MASK << FP_CAUSE_SHIFT); + enable = FP_E_BIT | ((fsr >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK); + + /* V->E->Z->O/U/I */ + if (fe->fe_cx & FP_V_BIT) + fe->fe_cx = FP_V_BIT; + else if (fe->fe_cx & FP_E_BIT) + fe->fe_cx = FP_E_BIT; + else if (fe->fe_cx & FP_Z_BIT) + fe->fe_cx = FP_Z_BIT; + + cx = fe->fe_cx & enable; + fsr |= (cx & FP_CAUSE_MASK) << FP_CAUSE_SHIFT; + fsr |= (fe->fe_cx & FP_FLAG_MASK) << FP_FLAG_SHIFT; + fs->fpr_fpscr = fsr; + + if (cx != 0) + return FPE; + + if (sett) { + tf->tf_ssr &= ~PSL_TBIT; + tf->tf_ssr |= t; + } + + if (fs) { + if ((rt >= 0) && (rt < 32)) { + fs->fpr_fr[rt] = buf.a[0]; + if (type == FTYPE_DBL) + fs->fpr_fr[rt + 1] = buf.a[1]; + } else if (rt == FPREG_FPUL) { + fs->fpr_fpul = buf.a[0]; + } else { + panic("fpu_execute(): rt = %d", rt); + } + } + return 0; /* success */ +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_emu.h src/sys/arch/sh3/fpu/fpu_emu.h --- src.orig/sys/arch/sh3/fpu/fpu_emu.h 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_emu.h 2007-02-02 15:44:05.000000000 +0900 @@ -0,0 +1,220 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_emu.h 8.1 (Berkeley) 6/11/93 + */ + +/* + * Floating point emulator (tailored for SPARC, but structurally + * machine-independent). + * + * Floating point numbers are carried around internally in an `expanded' + * or `unpacked' form consisting of: + * - sign + * - unbiased exponent + * - mantissa (`1.' + 112-bit fraction + guard + round) + * - sticky bit + * Any implied `1' bit is inserted, giving a 113-bit mantissa that is + * always nonzero. Additional low-order `guard' and `round' bits are + * scrunched in, making the entire mantissa 115 bits long. This is divided + * into four 32-bit words, with `spare' bits left over in the upper part + * of the top word (the high bits of fp_mant[0]). An internal `exploded' + * number is thus kept within the half-open interval [1.0,2.0) (but see + * the `number classes' below). This holds even for denormalized numbers: + * when we explode an external denorm, we normalize it, introducing low-order + * zero bits, so that the rest of the code always sees normalized values. + * + * Note that a number of our algorithms use the `spare' bits at the top. + * The most demanding algorithm---the one for sqrt---depends on two such + * bits, so that it can represent values up to (but not including) 8.0, + * and then it needs a carry on top of that, so that we need three `spares'. + * + * The sticky-word is 32 bits so that we can use `OR' operators to goosh + * whole words from the mantissa into it. + * + * All operations are done in this internal extended precision. According + * to Hennesey & Patterson, Appendix A, rounding can be repeated---that is, + * it is OK to do a+b in extended precision and then round the result to + * single precision---provided single, double, and extended precisions are + * `far enough apart' (they always are), but we will try to avoid any such + * extra work where possible. + */ +struct fpn { + int fp_class; /* see below */ + int fp_sign; /* 0 => positive, 1 => negative */ + int fp_exp; /* exponent (unbiased) */ + int fp_sticky; /* nonzero bits lost at right end */ + u_int fp_mant[4]; /* 115-bit mantissa */ +}; + +#define FP_NMANT 115 /* total bits in mantissa (incl g,r) */ +#define FP_NG 2 /* number of low-order guard bits */ +#define FP_LG ((FP_NMANT - 1) & 31) /* log2(1.0) for fp_mant[0] */ +#define FP_LG2 ((FP_NMANT - 1) & 63) /* log2(1.0) for fp_mant[0] and fp_mant[1] */ +#define FP_QUIETBIT (1 << (FP_LG - 1)) /* Quiet bit in NaNs (0.5) */ +#define FP_1 (1 << FP_LG) /* 1.0 in fp_mant[0] */ +#define FP_2 (1 << (FP_LG + 1)) /* 2.0 in fp_mant[0] */ + +/* + * Number classes. Since zero, Inf, and NaN cannot be represented using + * the above layout, we distinguish these from other numbers via a class. + * In addition, to make computation easier and to follow Appendix N of + * the SPARC Version 8 standard, we give each kind of NaN a separate class. + */ +#define FPC_SNAN -2 /* signalling NaN (sign irrelevant) */ +#define FPC_QNAN -1 /* quiet NaN (sign irrelevant) */ +#define FPC_ZERO 0 /* zero (sign matters) */ +#define FPC_NUM 1 /* number (sign matters) */ +#define FPC_INF 2 /* infinity (sign matters) */ + +#define ISSNAN(fp) ((fp)->fp_class == FPC_SNAN) +#define ISQNAN(fp) ((fp)->fp_class == FPC_QNAN) +#define ISNAN(fp) ((fp)->fp_class < 0) +#define ISZERO(fp) ((fp)->fp_class == 0) +#define ISINF(fp) ((fp)->fp_class == FPC_INF) + +/* + * ORDER(x,y) `sorts' a pair of `fpn *'s so that the right operand (y) points + * to the `more significant' operand for our purposes. Appendix N says that + * the result of a computation involving two numbers are: + * + * If both are SNaN: operand 2, converted to Quiet + * If only one is SNaN: the SNaN operand, converted to Quiet + * If both are QNaN: operand 2 + * If only one is QNaN: the QNaN operand + * + * In addition, in operations with an Inf operand, the result is usually + * Inf. The class numbers are carefully arranged so that if + * (unsigned)class(op1) > (unsigned)class(op2) + * then op1 is the one we want; otherwise op2 is the one we want. + */ +#define ORDER(x, y) { \ + if ((u_int)(x)->fp_class > (u_int)(y)->fp_class) \ + SWAP(x, y); \ +} +#define SWAP(x, y) { \ + struct fpn *swap; \ + swap = (x), (x) = (y), (y) = swap; \ +} + +/* + * Emulator state. + */ +struct fpemu { + struct fpreg *fe_fpstate; /* registers, etc */ + int fe_fpscr; /* fpscr copy (modified during op) */ + int fe_cx; /* keep track of exceptions */ + int fe_rm; /* round mode */ + struct fpn fe_f1; /* operand 1 */ + struct fpn fe_f2; /* operand 2, if required */ + struct fpn fe_f3; /* available storage for result */ +}; + +/* + * Arithmetic functions. + * Each of these may modify its inputs (f1,f2) and/or the temporary. + * Each returns a pointer to the result and/or sets exceptions. + */ +struct fpn *fpu_add(struct fpemu *); +#define fpu_sub(fe) ((fe)->fe_f2.fp_sign ^= 1, fpu_add(fe)) +struct fpn *fpu_mul(struct fpemu *); +struct fpn *fpu_div(struct fpemu *); +struct fpn *fpu_sqrt(struct fpemu *); + +/* + * Other functions. + */ + +/* Perform a compare instruction (with or without unordered exception). */ +int fpu_compare(struct fpemu *); + +/* compare result */ +#define FCMP_EQ 0 +#define FCMP_LT 1 +#define FCMP_GT 2 +#define FCMP_UO 3 /* Unorderd */ +#define FCMP_INVALID 4 + +/* Build a new Quiet NaN (sign=0, frac=all 1's). */ +struct fpn *fpu_newnan(struct fpemu *); + +/* + * Shift a number right some number of bits, taking care of round/sticky. + * Note that the result is probably not a well-formed number (it will lack + * the normal 1-bit mant[0]&FP_1). + */ +int fpu_shr(struct fpn *, int); + +void fpu_explode(struct fpemu *, struct fpn *, int, int); +void fpu_implode(struct fpemu *, struct fpn *, int, u_int *); + +/* FPU data types. */ +#define FTYPE_INT 0 /* data = 32-bit signed integer */ +#define FTYPE_SNG 1 /* data = 32-bit float */ +#define FTYPE_DBL 2 /* data = 64-bit double */ + +/* FPU fmov size */ +#define FSIZE_SNG 4 /* 32bit */ +#define FSIZE_DBL 8 /* 32bit pair (64bit) */ + +/* FPU register index */ +#define FPREG_INVALID (-1) +#define FPREG_FPUL (32) + +#if _BYTE_ORDER == _LITTLE_ENDIAN +#define DBL_LOWORD 1 +#define DBL_HIWORD 0 +#else +#define DBL_LOWORD 0 +#define DBL_HIWORD 1 +#endif + +#ifdef DEBUG +#define FPE_EX 0x1 +#define FPE_INSN 0x2 +#define FPE_OP 0x4 +#define FPE_REG 0x8 +extern int fpe_debug; +void fpu_dumpfpn(struct fpn *); +#define DPRINTF(x, y) if (fpe_debug & (x)) printf y +#define DUMPFPN(x, f) if (fpe_debug & (x)) fpu_dumpfpn((f)) +#else +#define DPRINTF(x, y) +#define DUMPFPN(x, f) +#endif diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_explode.c src/sys/arch/sh3/fpu/fpu_explode.c --- src.orig/sys/arch/sh3/fpu/fpu_explode.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_explode.c 2007-02-02 13:00:30.000000000 +0900 @@ -0,0 +1,243 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_explode.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * FPU subroutines: `explode' the machine's `packed binary' format numbers + * into our internal format. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include +#include +#include + +#include +#include +#include + +/* + * N.B.: in all of the following, we assume the FP format is + * + * --------------------------- + * | s | exponent | fraction | + * --------------------------- + * + * (which represents -1**s * 1.fraction * 2**exponent), so that the + * sign bit is way at the top (bit 31), the exponent is next, and + * then the remaining bits mark the fraction. A zero exponent means + * zero or denormalized (0.fraction rather than 1.fraction), and the + * maximum possible exponent, 2bias+1, signals inf (fraction==0) or NaN. + * + * Since the sign bit is always the topmost bit---this holds even for + * integers---we set that outside all the *tof functions. Each function + * returns the class code for the new number (but note that we use + * FPC_QNAN for all NaNs; fpu_explode will fix this if appropriate). + */ + +/* + * int -> fpn. + */ +int +fpu_itof(struct fpn *fp, u_int i) +{ + + if (i == 0) + return (FPC_ZERO); + /* + * The value FP_1 represents 2^FP_LG, so set the exponent + * there and let normalization fix it up. Convert negative + * numbers to sign-and-magnitude. Note that this relies on + * fpu_norm()'s handling of `supernormals'; see fpu_subr.c. + */ + fp->fp_exp = FP_LG; + fp->fp_mant[0] = (int)i < 0 ? -i : i; + fp->fp_mant[1] = 0; + fp->fp_mant[2] = 0; + fp->fp_mant[3] = 0; + fpu_norm(fp); + return (FPC_NUM); +} + +#define mask(nbits) ((1L << (nbits)) - 1) + +/* + * All external floating formats convert to internal in the same manner, + * as defined here. Note that only normals get an implied 1.0 inserted. + */ +#define FP_TOF(exp, expbias, allfrac, f0, f1, f2, f3) \ + if (exp == 0) { \ + if (allfrac == 0) \ + return (FPC_ZERO); \ + fp->fp_exp = 1 - expbias; \ + fp->fp_mant[0] = f0; \ + fp->fp_mant[1] = f1; \ + fp->fp_mant[2] = f2; \ + fp->fp_mant[3] = f3; \ + fpu_norm(fp); \ + return (FPC_NUM); \ + } \ + if (exp == (2 * expbias + 1)) { \ + if (allfrac == 0) \ + return (FPC_INF); \ + fp->fp_mant[0] = f0; \ + fp->fp_mant[1] = f1; \ + fp->fp_mant[2] = f2; \ + fp->fp_mant[3] = f3; \ + return (FPC_QNAN); \ + } \ + fp->fp_exp = exp - expbias; \ + fp->fp_mant[0] = FP_1 | f0; \ + fp->fp_mant[1] = f1; \ + fp->fp_mant[2] = f2; \ + fp->fp_mant[3] = f3; \ + return (FPC_NUM) + +/* + * 32-bit single precision -> fpn. + * We assume a single occupies at most (64-FP_LG) bits in the internal + * format: i.e., needs at most fp_mant[0] and fp_mant[1]. + */ +int +fpu_stof(struct fpn *fp, u_int i) +{ + int exp; + u_int frac, f0, f1; +#define SNG_SHIFT (SNG_FRACBITS - FP_LG) + + exp = (i >> (32 - 1 - SNG_EXPBITS)) & mask(SNG_EXPBITS); + frac = i & mask(SNG_FRACBITS); + f0 = frac >> SNG_SHIFT; + f1 = frac << (32 - SNG_SHIFT); + FP_TOF(exp, SNG_EXP_BIAS, frac, f0, f1, 0, 0); +} + +/* + * 64-bit double -> fpn. + * We assume this uses at most (96-FP_LG) bits. + */ +int +fpu_dtof(struct fpn *fp, u_int i, u_int j) +{ + int exp; + u_int frac, f0, f1, f2; +#define DBL_SHIFT (DBL_FRACBITS - 32 - FP_LG) + + exp = (i >> (32 - 1 - DBL_EXPBITS)) & mask(DBL_EXPBITS); + frac = i & mask(DBL_FRACBITS - 32); + f0 = frac >> DBL_SHIFT; + f1 = (frac << (32 - DBL_SHIFT)) | (j >> DBL_SHIFT); + f2 = j << (32 - DBL_SHIFT); + frac |= j; + FP_TOF(exp, DBL_EXP_BIAS, frac, f0, f1, f2, 0); +} + +/* + * Explode the contents of a register / regpair / regquad. + * If the input is a signalling NaN, an NV (invalid) exception + * will be set. (Note that nothing but NV can occur until ALU + * operations are performed.) + */ +void +fpu_explode(struct fpemu *fe, struct fpn *fp, int type, int reg) +{ + u_int s, *space; + + if (reg >= 0 && reg < 32) { + space = (u_int *)&fe->fe_fpstate->fpr_fr[reg]; + } else if (reg == FPREG_FPUL) { + space = (u_int *)&fe->fe_fpstate->fpr_fpul; + } else { + panic("fpu_explode(): reg = %d"); + /*NOTREACHED*/ + } + s = space[0]; + + fp->fp_sign = s >> 31; + fp->fp_sticky = 0; + switch (type) { + case FTYPE_INT: + s = fpu_itof(fp, s); + break; + + case FTYPE_SNG: + s = fpu_stof(fp, s); + break; + + case FTYPE_DBL: + if (reg == FPREG_FPUL) { + panic("fpu_explode(): FTYPE_DBL: reg == FPREG_FPUL"); + /*NOTREACHED*/ + } + s = fpu_dtof(fp, s, space[1]); + break; + + default: + panic("fpu_explode: invalid type %d", type); + /*NOTREACHED*/ + } + + if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) { + /* + * Input is a signalling NaN. All operations that return + * an input NaN operand put it through a ``NaN conversion'', + * which basically just means ``turn on the quiet bit''. + * We do this here so that all NaNs internally look quiet + * (we can tell signalling ones by their class). + */ + fp->fp_mant[0] |= FP_QUIETBIT; + fe->fe_cx = FP_V_BIT; /* assert invalid operand */ + s = FPC_SNAN; + } + fp->fp_class = s; + + DPRINTF(FPE_REG, ("fpu_explode: %%%c%d => ", + ((type == FTYPE_INT) ? 'i' : + ((type == FTYPE_SNG) ? 's' : + ((type == FTYPE_DBL) ? 'd' : '?'))), reg)); + DUMPFPN(FPE_REG, fp); + DPRINTF(FPE_REG, ("\n")); +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_extern.h src/sys/arch/sh3/fpu/fpu_extern.h --- src.orig/sys/arch/sh3/fpu/fpu_extern.h 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_extern.h 2007-01-31 14:37:50.000000000 +0900 @@ -0,0 +1,81 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 1995 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +struct proc; +struct fpreg; +struct trapframe; +struct fpemu; +struct fpn; + +/* fpu.c */ +int fpu_emulate(struct trapframe *, struct fpreg *); +int fpu_execute(struct trapframe *, struct fpemu *, uint16_t *); + +/* fpu_add.c */ +struct fpn *fpu_add(struct fpemu *); + +/* fpu_compare.c */ +void fpu_compare(struct fpemu *, int); + +/* fpu_div.c */ +struct fpn *fpu_div(struct fpemu *); + +/* fpu_explode.c */ +int fpu_itof(struct fpn *, u_int); +int fpu_xtof(struct fpn *, u_int64_t); +int fpu_stof(struct fpn *, u_int); +int fpu_dtof(struct fpn *, u_int, u_int); +void fpu_explode(struct fpemu *, struct fpn *, int, int); + +/* fpu_implode.c */ +u_int fpu_ftoi(struct fpemu *, struct fpn *); +u_int fpu_ftox(struct fpemu *, struct fpn *, u_int *); +u_int fpu_ftos(struct fpemu *, struct fpn *); +u_int fpu_ftod(struct fpemu *, struct fpn *, u_int *); +void fpu_implode(struct fpemu *, struct fpn *, int, u_int *); + +/* fpu_mul.c */ +struct fpn *fpu_mul(struct fpemu *); + +/* fpu_sqrt.c */ +struct fpn *fpu_sqrt(struct fpemu *); + +/* fpu_subr.c */ +int fpu_shr(struct fpn *, int); +void fpu_norm(struct fpn *); +struct fpn *fpu_newnan(struct fpemu *); diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_implode.c src/sys/arch/sh3/fpu/fpu_implode.c --- src.orig/sys/arch/sh3/fpu/fpu_implode.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_implode.c 2007-02-01 19:02:05.000000000 +0900 @@ -0,0 +1,375 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_implode.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * FPU subroutines: `implode' internal format numbers into the machine's + * `packed binary' format. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include +#include +#include + +#include +#include +#include + +static int round(struct fpemu *, struct fpn *); +static int toinf(struct fpemu *, int); + +/* + * Round a number (algorithm from Motorola MC68882 manual, modified for + * our internal format). Set inexact exception if rounding is required. + * Return true iff we rounded up. + * + * After rounding, we discard the guard and round bits by shifting right + * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky). + * This saves effort later. + * + * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's + * responsibility to fix this if necessary. + */ +static int +round(struct fpemu *fe, struct fpn *fp) +{ + u_int m0, m1, m2, m3; + int gr, s; + FPU_DECL_CARRY; + + m0 = fp->fp_mant[0]; + m1 = fp->fp_mant[1]; + m2 = fp->fp_mant[2]; + m3 = fp->fp_mant[3]; + gr = m3 & 3; + s = fp->fp_sticky; + + /* mant >>= FP_NG */ + m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG)); + m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG)); + m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG)); + m0 >>= FP_NG; + + if ((gr | s) == 0) /* result is exact: no rounding needed */ + goto rounddown; + + fe->fe_cx |= FP_I_BIT; /* inexact */ + + /* Go to rounddown to round down; break to round up. */ + switch (fe->fe_rm) { + case RM_NEAREST: + /* + * Round only if guard is set (gr & 2). If guard is set, + * but round & sticky both clear, then we want to round + * but have a tie, so round to even, i.e., add 1 iff odd. + */ + if ((gr & 2) == 0) + goto rounddown; + if ((gr & 1) || fp->fp_sticky || (m3 & 1)) + break; + goto rounddown; + + case RM_ZERO: + default: + /* Round towards zero, i.e., down. */ + goto rounddown; + } + + FPU_ADDS(m3, m3, 1); + FPU_ADDCS(m2, m2, 0); + FPU_ADDCS(m1, m1, 0); + FPU_ADDC(m0, m0, 0); + fp->fp_mant[0] = m0; + fp->fp_mant[1] = m1; + fp->fp_mant[2] = m2; + fp->fp_mant[3] = m3; + return (1); + +rounddown: + fp->fp_mant[0] = m0; + fp->fp_mant[1] = m1; + fp->fp_mant[2] = m2; + fp->fp_mant[3] = m3; + return (0); +} + +/* + * For overflow: return true if overflow is to go to +/-Inf, according + * to the sign of the overflowing result. If false, overflow is to go + * to the largest magnitude value instead. + */ +static int +toinf(struct fpemu *fe, int sign) +{ + int inf; + + /* look at rounding direction */ + switch (fe->fe_rm) { + case RM_NEAREST: /* the nearest value is always Inf */ + inf = 1; + break; + + default: + case RM_ZERO: /* toward 0 => never towards Inf */ + inf = 0; + break; + } + if (inf) + fe->fe_cx |= FP_O_BIT; + return (inf); +} + +/* + * fpn -> int (int value returned as return value). + * + * N.B.: this conversion always rounds towards zero (this is a peculiarity + * of the SPARC instruction set). + */ +u_int +fpu_ftoi(struct fpemu *fe, struct fpn *fp) +{ + u_int i; + int sign, exp; + + sign = fp->fp_sign; + switch (fp->fp_class) { + case FPC_ZERO: + return (0); + + case FPC_NUM: + /* + * If exp >= 2^32, overflow. Otherwise shift value right + * into last mantissa word (this will not exceed 0xffffffff), + * shifting any guard and round bits out into the sticky + * bit. Then ``round'' towards zero, i.e., just set an + * inexact exception if sticky is set (see round()). + * If the result is > 0x80000000, or is positive and equals + * 0x80000000, overflow; otherwise the last fraction word + * is the result. + */ + if ((exp = fp->fp_exp) >= 32) + break; + /* NB: the following includes exp < 0 cases */ + if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0) + fe->fe_cx |= FP_U_BIT; + i = fp->fp_mant[3]; + if (i >= ((u_int)0x80000000 + sign)) + break; + return (sign ? -i : i); + + default: /* Inf, qNaN, sNaN */ + break; + } + /* overflow: replace any inexact exception with invalid */ + fe->fe_cx |= FP_O_BIT; + return (0x7fffffff + sign); +} + +/* + * fpn -> single (32 bit single returned as return value). + * We assume <= 29 bits in a single-precision fraction (1.f part). + */ +u_int +fpu_ftos(struct fpemu *fe, struct fpn *fp) +{ + u_int sign = fp->fp_sign << 31; + int exp; + +#define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */ +#define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */ + + /* Take care of non-numbers first. */ + if (ISNAN(fp)) { + /* + * Preserve upper bits of NaN, per SPARC V8 appendix N. + * Note that fp->fp_mant[0] has the quiet bit set, + * even if it is classified as a signalling NaN. + */ + (void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS); + exp = SNG_EXP_INFNAN; + goto done; + } + if (ISINF(fp)) + return (sign | SNG_EXP(SNG_EXP_INFNAN)); + if (ISZERO(fp)) + return (sign); + + /* + * Normals (including subnormals). Drop all the fraction bits + * (including the explicit ``implied'' 1 bit) down into the + * single-precision range. If the number is subnormal, move + * the ``implied'' 1 into the explicit range as well, and shift + * right to introduce leading zeroes. Rounding then acts + * differently for normals and subnormals: the largest subnormal + * may round to the smallest normal (1.0 x 2^minexp), or may + * remain subnormal. In the latter case, signal an underflow + * if the result was inexact or if underflow traps are enabled. + * + * Rounding a normal, on the other hand, always produces another + * normal (although either way the result might be too big for + * single precision, and cause an overflow). If rounding a + * normal produces 2.0 in the fraction, we need not adjust that + * fraction at all, since both 1.0 and 2.0 are zero under the + * fraction mask. + * + * Note that the guard and round bits vanish from the number after + * rounding. + */ + if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */ + /* -NG for g,r; -SNG_FRACBITS-exp for fraction */ + (void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp); + if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) + return (sign | SNG_EXP(1) | 0); + fe->fe_cx |= FP_U_BIT; + return (sign | SNG_EXP(0) | fp->fp_mant[3]); + } + /* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */ + (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS); +#ifdef DIAGNOSTIC + if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0) + panic("fpu_ftos"); +#endif + if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2)) + exp++; + if (exp >= SNG_EXP_INFNAN) { + /* overflow to inf or to max single */ + if (toinf(fe, sign)) + return (sign | SNG_EXP(SNG_EXP_INFNAN)); + return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK); + } +done: + /* phew, made it */ + return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK)); +} + +/* + * fpn -> double (32 bit high-order result returned; 32-bit low order result + * left in res[1]). Assumes <= 61 bits in double precision fraction. + * + * This code mimics fpu_ftos; see it for comments. + */ +u_int +fpu_ftod(struct fpemu *fe, struct fpn *fp, u_int *res) +{ + u_int sign = fp->fp_sign << 31; + int exp; + +#define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31)) +#define DBL_MASK (DBL_EXP(1) - 1) + + if (ISNAN(fp)) { + (void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS); + exp = DBL_EXP_INFNAN; + goto done; + } + if (ISINF(fp)) { + sign |= DBL_EXP(DBL_EXP_INFNAN); + goto zero; + } + if (ISZERO(fp)) { +zero: res[1] = 0; + return (sign); + } + + if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) { + (void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp); + if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) { + res[1] = 0; + return (sign | DBL_EXP(1) | 0); + } + fe->fe_cx |= FP_U_BIT; + exp = 0; + goto done; + } + (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS); + if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2)) + exp++; + if (exp >= DBL_EXP_INFNAN) { + fe->fe_cx |= FPSCR_OX | FPSCR_UX; + if (toinf(fe, sign)) { + res[1] = 0; + return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0); + } + res[1] = ~0; + return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK); + } +done: + res[1] = fp->fp_mant[3]; + return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK)); +} + +/* + * Implode an fpn, writing the result into the given space. + */ +void +fpu_implode(struct fpemu *fe, struct fpn *fp, int type, u_int *space) +{ + + switch (type) { + case FTYPE_INT: + space[0] = fpu_ftoi(fe, fp); + DPRINTF(FPE_REG, ("fpu_implode: int %x\n", + space[1])); + break; + + case FTYPE_SNG: + space[0] = fpu_ftos(fe, fp); + DPRINTF(FPE_REG, ("fpu_implode: single %x\n", + space[0])); + break; + + case FTYPE_DBL: + space[0] = fpu_ftod(fe, fp, space); + DPRINTF(FPE_REG, ("fpu_implode: double %x %x\n", + space[0], space[1])); + break; + + default: + panic("fpu_implode: invalid type %d", type); + /*NOTREACHED*/ + } +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_mul.c src/sys/arch/sh3/fpu/fpu_mul.c --- src.orig/sys/arch/sh3/fpu/fpu_mul.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_mul.c 2007-01-31 12:15:04.000000000 +0900 @@ -0,0 +1,241 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_mul.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * Perform an FPU multiply (return x * y). + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#if defined(DIAGNOSTIC)||defined(DEBUG) +#include +#endif + +#include +#include + +#include +#include + +/* + * The multiplication algorithm for normal numbers is as follows: + * + * The fraction of the product is built in the usual stepwise fashion. + * Each step consists of shifting the accumulator right one bit + * (maintaining any guard bits) and, if the next bit in y is set, + * adding the multiplicand (x) to the accumulator. Then, in any case, + * we advance one bit leftward in y. Algorithmically: + * + * A = 0; + * for (bit = 0; bit < FP_NMANT; bit++) { + * sticky |= A & 1, A >>= 1; + * if (Y & (1 << bit)) + * A += X; + * } + * + * (X and Y here represent the mantissas of x and y respectively.) + * The resultant accumulator (A) is the product's mantissa. It may + * be as large as 11.11111... in binary and hence may need to be + * shifted right, but at most one bit. + * + * Since we do not have efficient multiword arithmetic, we code the + * accumulator as four separate words, just like any other mantissa. + * We use local variables in the hope that this is faster than memory. + * We keep x->fp_mant in locals for the same reason. + * + * In the algorithm above, the bits in y are inspected one at a time. + * We will pick them up 32 at a time and then deal with those 32, one + * at a time. Note, however, that we know several things about y: + * + * - the guard and round bits at the bottom are sure to be zero; + * + * - often many low bits are zero (y is often from a single or double + * precision source); + * + * - bit FP_NMANT-1 is set, and FP_1*2 fits in a word. + * + * We can also test for 32-zero-bits swiftly. In this case, the center + * part of the loop---setting sticky, shifting A, and not adding---will + * run 32 times without adding X to A. We can do a 32-bit shift faster + * by simply moving words. Since zeros are common, we optimize this case. + * Furthermore, since A is initially zero, we can omit the shift as well + * until we reach a nonzero word. + */ +struct fpn * +fpu_mul(struct fpemu *fe) +{ + struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; + u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m; + int sticky; + FPU_DECL_CARRY; + + /* + * Put the `heavier' operand on the right (see fpu_emu.h). + * Then we will have one of the following cases, taken in the + * following order: + * + * - y = NaN. Implied: if only one is a signalling NaN, y is. + * The result is y. + * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN + * case was taken care of earlier). + * If x = 0, the result is NaN. Otherwise the result + * is y, with its sign reversed if x is negative. + * - x = 0. Implied: y is 0 or number. + * The result is 0 (with XORed sign as usual). + * - other. Implied: both x and y are numbers. + * The result is x * y (XOR sign, multiply bits, add exponents). + */ + DPRINTF(FPE_REG, ("fpu_mul:\n")); + DUMPFPN(FPE_REG, x); + DUMPFPN(FPE_REG, y); + DPRINTF(FPE_REG, ("=>\n")); + + ORDER(x, y); + if (ISNAN(y)) { + y->fp_sign ^= x->fp_sign; + fe->fe_cx |= FPSCR_VXSNAN; + DUMPFPN(FPE_REG, y); + return (y); + } + if (ISINF(y)) { + if (ISZERO(x)) { + fe->fe_cx |= FPSCR_VXIMZ; + return (fpu_newnan(fe)); + } + y->fp_sign ^= x->fp_sign; + DUMPFPN(FPE_REG, y); + return (y); + } + if (ISZERO(x)) { + x->fp_sign ^= y->fp_sign; + DUMPFPN(FPE_REG, x); + return (x); + } + + /* + * Setup. In the code below, the mask `m' will hold the current + * mantissa byte from y. The variable `bit' denotes the bit + * within m. We also define some macros to deal with everything. + */ + x3 = x->fp_mant[3]; + x2 = x->fp_mant[2]; + x1 = x->fp_mant[1]; + x0 = x->fp_mant[0]; + sticky = a3 = a2 = a1 = a0 = 0; + +#define ADD /* A += X */ \ + FPU_ADDS(a3, a3, x3); \ + FPU_ADDCS(a2, a2, x2); \ + FPU_ADDCS(a1, a1, x1); \ + FPU_ADDC(a0, a0, x0) + +#define SHR1 /* A >>= 1, with sticky */ \ + sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \ + a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1 + +#define SHR32 /* A >>= 32, with sticky */ \ + sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0 + +#define STEP /* each 1-bit step of the multiplication */ \ + SHR1; if (bit & m) { ADD; }; bit <<= 1 + + /* + * We are ready to begin. The multiply loop runs once for each + * of the four 32-bit words. Some words, however, are special. + * As noted above, the low order bits of Y are often zero. Even + * if not, the first loop can certainly skip the guard bits. + * The last word of y has its highest 1-bit in position FP_NMANT-1, + * so we stop the loop when we move past that bit. + */ + if ((m = y->fp_mant[3]) == 0) { + /* SHR32; */ /* unneeded since A==0 */ + } else { + bit = 1 << FP_NG; + do { + STEP; + } while (bit != 0); + } + if ((m = y->fp_mant[2]) == 0) { + SHR32; + } else { + bit = 1; + do { + STEP; + } while (bit != 0); + } + if ((m = y->fp_mant[1]) == 0) { + SHR32; + } else { + bit = 1; + do { + STEP; + } while (bit != 0); + } + m = y->fp_mant[0]; /* definitely != 0 */ + bit = 1; + do { + STEP; + } while (bit <= m); + + /* + * Done with mantissa calculation. Get exponent and handle + * 11.111...1 case, then put result in place. We reuse x since + * it already has the right class (FP_NUM). + */ + m = x->fp_exp + y->fp_exp; + if (a0 >= FP_2) { + SHR1; + m++; + } + x->fp_sign ^= y->fp_sign; + x->fp_exp = m; + x->fp_sticky = sticky; + x->fp_mant[3] = a3; + x->fp_mant[2] = a2; + x->fp_mant[1] = a1; + x->fp_mant[0] = a0; + + DUMPFPN(FPE_REG, x); + return (x); +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_sqrt.c src/sys/arch/sh3/fpu/fpu_sqrt.c --- src.orig/sys/arch/sh3/fpu/fpu_sqrt.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_sqrt.c 2007-01-31 12:15:09.000000000 +0900 @@ -0,0 +1,417 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_sqrt.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * Perform an FPU square root (return sqrt(x)). + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#if defined(DIAGNOSTIC)||defined(DEBUG) +#include +#endif + +#include +#include + +#include +#include + +/* + * Our task is to calculate the square root of a floating point number x0. + * This number x normally has the form: + * + * exp + * x = mant * 2 (where 1 <= mant < 2 and exp is an integer) + * + * This can be left as it stands, or the mantissa can be doubled and the + * exponent decremented: + * + * exp-1 + * x = (2 * mant) * 2 (where 2 <= 2 * mant < 4) + * + * If the exponent `exp' is even, the square root of the number is best + * handled using the first form, and is by definition equal to: + * + * exp/2 + * sqrt(x) = sqrt(mant) * 2 + * + * If exp is odd, on the other hand, it is convenient to use the second + * form, giving: + * + * (exp-1)/2 + * sqrt(x) = sqrt(2 * mant) * 2 + * + * In the first case, we have + * + * 1 <= mant < 2 + * + * and therefore + * + * sqrt(1) <= sqrt(mant) < sqrt(2) + * + * while in the second case we have + * + * 2 <= 2*mant < 4 + * + * and therefore + * + * sqrt(2) <= sqrt(2*mant) < sqrt(4) + * + * so that in any case, we are sure that + * + * sqrt(1) <= sqrt(n * mant) < sqrt(4), n = 1 or 2 + * + * or + * + * 1 <= sqrt(n * mant) < 2, n = 1 or 2. + * + * This root is therefore a properly formed mantissa for a floating + * point number. The exponent of sqrt(x) is either exp/2 or (exp-1)/2 + * as above. This leaves us with the problem of finding the square root + * of a fixed-point number in the range [1..4). + * + * Though it may not be instantly obvious, the following square root + * algorithm works for any integer x of an even number of bits, provided + * that no overflows occur: + * + * let q = 0 + * for k = NBITS-1 to 0 step -1 do -- for each digit in the answer... + * x *= 2 -- multiply by radix, for next digit + * if x >= 2q + 2^k then -- if adding 2^k does not + * x -= 2q + 2^k -- exceed the correct root, + * q += 2^k -- add 2^k and adjust x + * fi + * done + * sqrt = q / 2^(NBITS/2) -- (and any remainder is in x) + * + * If NBITS is odd (so that k is initially even), we can just add another + * zero bit at the top of x. Doing so means that q is not going to acquire + * a 1 bit in the first trip around the loop (since x0 < 2^NBITS). If the + * final value in x is not needed, or can be off by a factor of 2, this is + * equivalant to moving the `x *= 2' step to the bottom of the loop: + * + * for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done + * + * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2). + * (Since the algorithm is destructive on x, we will call x's initial + * value, for which q is some power of two times its square root, x0.) + * + * If we insert a loop invariant y = 2q, we can then rewrite this using + * C notation as: + * + * q = y = 0; x = x0; + * for (k = NBITS; --k >= 0;) { + * #if (NBITS is even) + * x *= 2; + * #endif + * t = y + (1 << k); + * if (x >= t) { + * x -= t; + * q += 1 << k; + * y += 1 << (k + 1); + * } + * #if (NBITS is odd) + * x *= 2; + * #endif + * } + * + * If x0 is fixed point, rather than an integer, we can simply alter the + * scale factor between q and sqrt(x0). As it happens, we can easily arrange + * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q. + * + * In our case, however, x0 (and therefore x, y, q, and t) are multiword + * integers, which adds some complication. But note that q is built one + * bit at a time, from the top down, and is not used itself in the loop + * (we use 2q as held in y instead). This means we can build our answer + * in an integer, one word at a time, which saves a bit of work. Also, + * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are + * `new' bits in y and we can set them with an `or' operation rather than + * a full-blown multiword add. + * + * We are almost done, except for one snag. We must prove that none of our + * intermediate calculations can overflow. We know that x0 is in [1..4) + * and therefore the square root in q will be in [1..2), but what about x, + * y, and t? + * + * We know that y = 2q at the beginning of each loop. (The relation only + * fails temporarily while y and q are being updated.) Since q < 2, y < 4. + * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and. + * Furthermore, we can prove with a bit of work that x never exceeds y by + * more than 2, so that even after doubling, 0 <= x < 8. (This is left as + * an exercise to the reader, mostly because I have become tired of working + * on this comment.) + * + * If our floating point mantissas (which are of the form 1.frac) occupy + * B+1 bits, our largest intermediary needs at most B+3 bits, or two extra. + * In fact, we want even one more bit (for a carry, to avoid compares), or + * three extra. There is a comment in fpu_emu.h reminding maintainers of + * this, so we have some justification in assuming it. + */ +struct fpn * +fpu_sqrt(struct fpemu *fe) +{ + struct fpn *x = &fe->fe_f1; + u_int bit, q, tt; + u_int x0, x1, x2, x3; + u_int y0, y1, y2, y3; + u_int d0, d1, d2, d3; + int e; + FPU_DECL_CARRY; + + /* + * Take care of special cases first. In order: + * + * sqrt(NaN) = NaN + * sqrt(+0) = +0 + * sqrt(-0) = -0 + * sqrt(x < 0) = NaN (including sqrt(-Inf)) + * sqrt(+Inf) = +Inf + * + * Then all that remains are numbers with mantissas in [1..2). + */ + DPRINTF(FPE_REG, ("fpu_sqer:\n")); + DUMPFPN(FPE_REG, x); + DPRINTF(FPE_REG, ("=>\n")); + if (ISNAN(x)) { + fe->fe_cx |= FPSCR_VXSNAN; + DUMPFPN(FPE_REG, x); + return (x); + } + if (ISZERO(x)) { + fe->fe_cx |= FPSCR_ZX; + x->fp_class = FPC_INF; + DUMPFPN(FPE_REG, x); + return (x); + } + if (x->fp_sign) { + return (fpu_newnan(fe)); + } + if (ISINF(x)) { + fe->fe_cx |= FPSCR_VXSQRT; + DUMPFPN(FPE_REG, 0); + return (0); + } + + /* + * Calculate result exponent. As noted above, this may involve + * doubling the mantissa. We will also need to double x each + * time around the loop, so we define a macro for this here, and + * we break out the multiword mantissa. + */ +#ifdef FPU_SHL1_BY_ADD +#define DOUBLE_X { \ + FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \ + FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \ +} +#else +#define DOUBLE_X { \ + x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \ + x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \ +} +#endif +#if (FP_NMANT & 1) != 0 +# define ODD_DOUBLE DOUBLE_X +# define EVEN_DOUBLE /* nothing */ +#else +# define ODD_DOUBLE /* nothing */ +# define EVEN_DOUBLE DOUBLE_X +#endif + x0 = x->fp_mant[0]; + x1 = x->fp_mant[1]; + x2 = x->fp_mant[2]; + x3 = x->fp_mant[3]; + e = x->fp_exp; + if (e & 1) /* exponent is odd; use sqrt(2mant) */ + DOUBLE_X; + /* THE FOLLOWING ASSUMES THAT RIGHT SHIFT DOES SIGN EXTENSION */ + x->fp_exp = e >> 1; /* calculates (e&1 ? (e-1)/2 : e/2 */ + + /* + * Now calculate the mantissa root. Since x is now in [1..4), + * we know that the first trip around the loop will definitely + * set the top bit in q, so we can do that manually and start + * the loop at the next bit down instead. We must be sure to + * double x correctly while doing the `known q=1.0'. + * + * We do this one mantissa-word at a time, as noted above, to + * save work. To avoid `(1 << 31) << 1', we also do the top bit + * outside of each per-word loop. + * + * The calculation `t = y + bit' breaks down into `t0 = y0, ..., + * t3 = y3, t? |= bit' for the appropriate word. Since the bit + * is always a `new' one, this means that three of the `t?'s are + * just the corresponding `y?'; we use `#define's here for this. + * The variable `tt' holds the actual `t?' variable. + */ + + /* calculate q0 */ +#define t0 tt + bit = FP_1; + EVEN_DOUBLE; + /* if (x >= (t0 = y0 | bit)) { */ /* always true */ + q = bit; + x0 -= bit; + y0 = bit << 1; + /* } */ + ODD_DOUBLE; + while ((bit >>= 1) != 0) { /* for remaining bits in q0 */ + EVEN_DOUBLE; + t0 = y0 | bit; /* t = y + bit */ + if (x0 >= t0) { /* if x >= t then */ + x0 -= t0; /* x -= t */ + q |= bit; /* q += bit */ + y0 |= bit << 1; /* y += bit << 1 */ + } + ODD_DOUBLE; + } + x->fp_mant[0] = q; +#undef t0 + + /* calculate q1. note (y0&1)==0. */ +#define t0 y0 +#define t1 tt + q = 0; + y1 = 0; + bit = 1 << 31; + EVEN_DOUBLE; + t1 = bit; + FPU_SUBS(d1, x1, t1); + FPU_SUBC(d0, x0, t0); /* d = x - t */ + if ((int)d0 >= 0) { /* if d >= 0 (i.e., x >= t) then */ + x0 = d0, x1 = d1; /* x -= t */ + q = bit; /* q += bit */ + y0 |= 1; /* y += bit << 1 */ + } + ODD_DOUBLE; + while ((bit >>= 1) != 0) { /* for remaining bits in q1 */ + EVEN_DOUBLE; /* as before */ + t1 = y1 | bit; + FPU_SUBS(d1, x1, t1); + FPU_SUBC(d0, x0, t0); + if ((int)d0 >= 0) { + x0 = d0, x1 = d1; + q |= bit; + y1 |= bit << 1; + } + ODD_DOUBLE; + } + x->fp_mant[1] = q; +#undef t1 + + /* calculate q2. note (y1&1)==0; y0 (aka t0) is fixed. */ +#define t1 y1 +#define t2 tt + q = 0; + y2 = 0; + bit = 1 << 31; + EVEN_DOUBLE; + t2 = bit; + FPU_SUBS(d2, x2, t2); + FPU_SUBCS(d1, x1, t1); + FPU_SUBC(d0, x0, t0); + if ((int)d0 >= 0) { + x0 = d0, x1 = d1, x2 = d2; + q |= bit; + y1 |= 1; /* now t1, y1 are set in concrete */ + } + ODD_DOUBLE; + while ((bit >>= 1) != 0) { + EVEN_DOUBLE; + t2 = y2 | bit; + FPU_SUBS(d2, x2, t2); + FPU_SUBCS(d1, x1, t1); + FPU_SUBC(d0, x0, t0); + if ((int)d0 >= 0) { + x0 = d0, x1 = d1, x2 = d2; + q |= bit; + y2 |= bit << 1; + } + ODD_DOUBLE; + } + x->fp_mant[2] = q; +#undef t2 + + /* calculate q3. y0, t0, y1, t1 all fixed; y2, t2, almost done. */ +#define t2 y2 +#define t3 tt + q = 0; + y3 = 0; + bit = 1 << 31; + EVEN_DOUBLE; + t3 = bit; + FPU_SUBS(d3, x3, t3); + FPU_SUBCS(d2, x2, t2); + FPU_SUBCS(d1, x1, t1); + FPU_SUBC(d0, x0, t0); + ODD_DOUBLE; + if ((int)d0 >= 0) { + x0 = d0, x1 = d1, x2 = d2; + q |= bit; + y2 |= 1; + } + while ((bit >>= 1) != 0) { + EVEN_DOUBLE; + t3 = y3 | bit; + FPU_SUBS(d3, x3, t3); + FPU_SUBCS(d2, x2, t2); + FPU_SUBCS(d1, x1, t1); + FPU_SUBC(d0, x0, t0); + if ((int)d0 >= 0) { + x0 = d0, x1 = d1, x2 = d2; + q |= bit; + y3 |= bit << 1; + } + ODD_DOUBLE; + } + x->fp_mant[3] = q; + + /* + * The result, which includes guard and round bits, is exact iff + * x is now zero; any nonzero bits in x represent sticky bits. + */ + x->fp_sticky = x0 | x1 | x2 | x3; + DUMPFPN(FPE_REG, x); + return (x); +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/fpu/fpu_subr.c src/sys/arch/sh3/fpu/fpu_subr.c --- src.orig/sys/arch/sh3/fpu/fpu_subr.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/fpu/fpu_subr.c 2007-02-02 12:31:42.000000000 +0900 @@ -0,0 +1,222 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fpu_subr.c 8.1 (Berkeley) 6/11/93 + */ + +/* + * FPU subroutines. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#if defined(DIAGNOSTIC)||defined(DEBUG) +#include +#endif + +#include +#include + +#include +#include +#include + +/* + * Shift the given number right rsh bits. Any bits that `fall off' will get + * shoved into the sticky field; we return the resulting sticky. Note that + * shifting NaNs is legal (this will never shift all bits out); a NaN's + * sticky field is ignored anyway. + */ +int +fpu_shr(struct fpn *fp, int rsh) +{ + u_int m0, m1, m2, m3, s; + int lsh; + +#ifdef DIAGNOSTIC + if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp))) + panic("fpu_rightshift 1"); +#endif + + m0 = fp->fp_mant[0]; + m1 = fp->fp_mant[1]; + m2 = fp->fp_mant[2]; + m3 = fp->fp_mant[3]; + + /* If shifting all the bits out, take a shortcut. */ + if (rsh >= FP_NMANT) { +#ifdef DIAGNOSTIC + if ((m0 | m1 | m2 | m3) == 0) + panic("fpu_rightshift 2"); +#endif + fp->fp_mant[0] = 0; + fp->fp_mant[1] = 0; + fp->fp_mant[2] = 0; + fp->fp_mant[3] = 0; +#ifdef notdef + if ((m0 | m1 | m2 | m3) == 0) + fp->fp_class = FPC_ZERO; + else +#endif + fp->fp_sticky = 1; + return (1); + } + + /* Squish out full words. */ + s = fp->fp_sticky; + if (rsh >= 32 * 3) { + s |= m3 | m2 | m1; + m3 = m0, m2 = 0, m1 = 0, m0 = 0; + } else if (rsh >= 32 * 2) { + s |= m3 | m2; + m3 = m1, m2 = m0, m1 = 0, m0 = 0; + } else if (rsh >= 32) { + s |= m3; + m3 = m2, m2 = m1, m1 = m0, m0 = 0; + } + + /* Handle any remaining partial word. */ + if ((rsh &= 31) != 0) { + lsh = 32 - rsh; + s |= m3 << lsh; + m3 = (m3 >> rsh) | (m2 << lsh); + m2 = (m2 >> rsh) | (m1 << lsh); + m1 = (m1 >> rsh) | (m0 << lsh); + m0 >>= rsh; + } + fp->fp_mant[0] = m0; + fp->fp_mant[1] = m1; + fp->fp_mant[2] = m2; + fp->fp_mant[3] = m3; + fp->fp_sticky = s; + return (s); +} + +/* + * Force a number to be normal, i.e., make its fraction have all zero + * bits before FP_1, then FP_1, then all 1 bits. This is used for denorms + * and (sometimes) for intermediate results. + * + * Internally, this may use a `supernormal' -- a number whose fp_mant + * is greater than or equal to 2.0 -- so as a side effect you can hand it + * a supernormal and it will fix it (provided fp->fp_mant[3] == 0). + */ +void +fpu_norm(struct fpn *fp) +{ + u_int m0, m1, m2, m3, top, sup, nrm; + int lsh, rsh, exp; + + exp = fp->fp_exp; + m0 = fp->fp_mant[0]; + m1 = fp->fp_mant[1]; + m2 = fp->fp_mant[2]; + m3 = fp->fp_mant[3]; + + /* Handle severe subnormals with 32-bit moves. */ + if (m0 == 0) { + if (m1) + m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32; + else if (m2) + m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32; + else if (m3) + m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32; + else { + fp->fp_class = FPC_ZERO; + return; + } + } + + /* Now fix any supernormal or remaining subnormal. */ + nrm = FP_1; + sup = nrm << 1; + if (m0 >= sup) { + /* + * We have a supernormal number. We need to shift it right. + * We may assume m3==0. + */ + for (rsh = 1, top = m0 >> 1; top >= sup; rsh++) /* XXX slow */ + top >>= 1; + exp += rsh; + lsh = 32 - rsh; + m3 = m2 << lsh; + m2 = (m2 >> rsh) | (m1 << lsh); + m1 = (m1 >> rsh) | (m0 << lsh); + m0 = top; + } else if (m0 < nrm) { + /* + * We have a regular denorm (a subnormal number), and need + * to shift it left. + */ + for (lsh = 1, top = m0 << 1; top < nrm; lsh++) /* XXX slow */ + top <<= 1; + exp -= lsh; + rsh = 32 - lsh; + m0 = top | (m1 >> rsh); + m1 = (m1 << lsh) | (m2 >> rsh); + m2 = (m2 << lsh) | (m3 >> rsh); + m3 <<= lsh; + } + + fp->fp_exp = exp; + fp->fp_mant[0] = m0; + fp->fp_mant[1] = m1; + fp->fp_mant[2] = m2; + fp->fp_mant[3] = m3; +} + +/* + * Concoct a `fresh' Quiet NaN per Appendix N. + * As a side effect, we set NV (invalid) for the current exceptions. + */ +struct fpn * +fpu_newnan(struct fpemu *fe) +{ + struct fpn *fp; + + fp = &fe->fe_f3; + fp->fp_class = FPC_QNAN; + fp->fp_sign = 0; + fp->fp_mant[0] = FP_1 - 1; + fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0; + DUMPFPN(FPE_REG, fp); + return (fp); +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/Makefile src/sys/arch/sh3/include/Makefile --- src.orig/sys/arch/sh3/include/Makefile 2007-02-10 06:55:12.000000000 +0900 +++ src/sys/arch/sh3/include/Makefile 2007-02-10 11:17:30.000000000 +0900 @@ -7,7 +7,7 @@ cdefs.h coff_machdep.h cpu.h cputypes.h \ disklabel.h \ elf_machdep.h endian.h endian_machdep.h \ - float.h frame.h \ + float.h fpu.h frame.h \ ieee.h ieeefp.h \ int_const.h int_fmtio.h int_limits.h int_mwgwtypes.h int_types.h \ intr.h \ diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/cpu.h src/sys/arch/sh3/include/cpu.h --- src.orig/sys/arch/sh3/include/cpu.h 2008-03-23 13:30:39.000000000 +0900 +++ src/sys/arch/sh3/include/cpu.h 2008-03-23 13:48:11.000000000 +0900 @@ -60,6 +60,7 @@ int ci_mtx_oldspl; int ci_want_resched; int ci_idepth; + struct lwp *ci_fpulwp; /* current owner of FPU */ }; extern struct cpu_info cpu_info_store; diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/cputypes.h src/sys/arch/sh3/include/cputypes.h --- src.orig/sys/arch/sh3/include/cputypes.h 2008-04-29 11:39:26.000000000 +0900 +++ src/sys/arch/sh3/include/cputypes.h 2008-04-29 11:48:25.000000000 +0900 @@ -48,13 +48,14 @@ #define CPU_PRODUCT_7708R 3 #define CPU_PRODUCT_7709 4 #define CPU_PRODUCT_7709A 5 +#define CPU_PRODUCT_7706 6 /* SH4 series */ -#define CPU_PRODUCT_7750 6 -#define CPU_PRODUCT_7750S 7 -#define CPU_PRODUCT_7750R 8 -#define CPU_PRODUCT_7751 9 -#define CPU_PRODUCT_7751R 10 +#define CPU_PRODUCT_7750 7 +#define CPU_PRODUCT_7750S 8 +#define CPU_PRODUCT_7750R 9 +#define CPU_PRODUCT_7751 10 +#define CPU_PRODUCT_7751R 11 #ifndef _LOCORE diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/fpu.h src/sys/arch/sh3/include/fpu.h --- src.orig/sys/arch/sh3/include/fpu.h 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/include/fpu.h 2007-02-04 00:36:05.000000000 +0900 @@ -0,0 +1,169 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SH3_FPU_H_ +#define _SH3_FPU_H_ + +#if defined(_KERNEL) + +struct lwp; +struct ksiginfo; +struct trapframe; + +void sh_fpu_init(void); + +#ifdef SH3 +void sh3_fpu_enable(void); +void sh3_fpu_save_lwp(struct lwp *, int); +int sh3_fpu_exception(struct lwp *, struct trapframe *, struct ksiginfo *); +#endif + +#ifdef SH4 +void sh4_fpu_enable(void); +void sh4_fpu_save_lwp(struct lwp *, int); +int sh4_fpu_exception(struct lwp *, struct trapframe *, struct ksiginfo *); +#endif + +#ifdef SH4FPE +void sh4fpe_fpu_enable(void); +void sh4fpe_fpu_save_lwp(struct lwp *, int); +int sh4fpe_fpu_exception(struct lwp *, struct trapframe *, struct ksiginfo *); +#endif + +#if defined(SH3) && defined(SH4) +extern void (*__sh_fpu_enable)(void); +extern void (*__sh_fpu_save_lwp)(struct lwp *, int); +extern int (*__sh_fpu_exception)(struct lwp *, struct trapframe *, + struct ksiginfo *); + +#define sh_fpu_enable() (*__sh_fpu_enable)() +#define sh_fpu_save_lwp(l, d) (*__sh_fpu_save_lwp)(l, d) +#define sh_fpu_exception(l,t,s) (*__sh_fpu_exception)(l,t,s) +#define CPU_HAS_FPU (CPU_IS_SH4) + +#elif defined(SH3) + +#define sh_fpu_enable() sh3_fpu_enable() +#define sh_fpu_save_lwp(l, d) sh3_fpu_save_lwp(l, d) +#define sh_fpu_exception(l,t,s) sh3_fpu_exception(l,t,s) +#define CPU_HAS_FPU (0) /* XXX: SH3E */ + +#elif defined(SH4) + +#define sh_fpu_enable() sh4_fpu_enable() +#define sh_fpu_save_lwp(l, d) sh4_fpu_save_lwp(l, d) +#define sh_fpu_exception(l,t,s) sh4_fpu_exception(l,t,s) +#define CPU_HAS_FPU (1) + +#endif /* SH3 && SH4 */ + +#endif /* _KERNEL */ + +#if !defined(__ASSEMBLER__) +/* FPU control register access */ +static __inline int __unused +get_fpscr(void) +{ + int r; + + __asm volatile ("sts fpscr, %0" : "=r"(r)); + + return r; +} + +static __inline void __unused +set_fpscr(int r) +{ + + __asm volatile ("lds %0, fpscr" :: "r"(r)); +} + +static __inline int __unused +get_fpul(void) +{ + int r; + + __asm volatile ("sts fpul, %0" : "=r"(r)); + + return r; +} + +static __inline void __unused +set_fpul(int r) +{ + + __asm volatile ("lds %0, fpul" :: "r"(r)); +} +#endif /* !__ASSEMBLER__ */ + +/* + * FPU register definition + */ +#define FPREGS_PER_BANK 0x10 +#define FP_BANK_BIT 0x10 + +/* fpscr bit */ +#define FPSCR_RM (0x03 << 0) /* Round mode */ +#define RM_NEAREST (0x00 << 0) /* nearest (SH4 only) */ +#define RM_ZERO (0x01 << 0) /* round to zero */ +#define FPSCR_FLAG (0x1f << 2) /* FPU exception flag: VZOUI */ +#define FPSCR_ENABLE (0x1f << 7) /* FPU exception enable: VZOUI */ +#define FPSCR_CAUSE (0x3f << 12) /* FPU exception cause: EVZOUI */ +#define FPSCR_DN (0x01 << 18) /* Denormal mode: 0=denormal (SH4 only), 1=0 */ +#define FPSCR_PR (0x01 << 19) /* precision (SH4 only): 0=float, 1=double */ +#define FPSCR_SZ (0x01 << 20) /* fmov size (SH4 only): 0=32, 1=64 */ +#define FPSCR_FR (0x01 << 21) /* register bank (SH4 only) */ +#define FPSCR_MASK (0x003fffff) + +/* FPU exception flag/enable/cause bit */ +#define FP_I_BIT (1 << 0) /* inexact result (SH4 only) */ +#define FP_U_BIT (1 << 1) /* underflow (SH4 only) */ +#define FP_O_BIT (1 << 2) /* overflow (SH4 only) */ +#define FP_Z_BIT (1 << 3) /* divide by zero */ +#define FP_V_BIT (1 << 4) /* invalid operation */ +#define FP_E_BIT (1 << 5) /* FPU error (SH4 only) */ +#define FP_ALL_BIT (FP_I_BIT|FP_U_BIT|FP_O_BIT|FP_Z_BIT|FP_V_BIT) + +/* FPU exception flag/enable/cause shift bits */ +#define FP_FLAG_SHIFT 2 +#define FP_ENABLE_SHIFT 7 +#define FP_CAUSE_SHIFT 12 + +#define FP_FLAG_MASK FP_ALL_BIT +#define FP_ENABLE_MASK FP_ALL_BIT +#define FP_CAUSE_MASK (FP_ALL_BIT|FP_E_BIT) + +#define FP_FLAG(r) (((r) >> FP_FLAG_SHIFT) & FP_FLAG_MASK) +#define FP_ENABLE(r) (((r) >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK) +#define FP_CAUSE(r) (((r) >> FP_CAUSE_SHIFT) & FP_CAUSE_MASK) + +/* fpscr initial value */ +#define SH3_FPSCR_INIT (RM_ZERO|FPSCR_DN) +#define SH4_FPSCR_INIT (RM_NEAREST) + +#endif /* !_SH3_FPU_H_ */ diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/mcontext.h src/sys/arch/sh3/include/mcontext.h --- src.orig/sys/arch/sh3/include/mcontext.h 2008-04-29 11:39:26.000000000 +0900 +++ src/sys/arch/sh3/include/mcontext.h 2008-04-29 11:48:26.000000000 +0900 @@ -65,9 +65,43 @@ /* Convenience synonym */ #define _REG_SP _REG_R15 +#define _REG_FPSCR 0 +#define _REG_FPUL 1 +#define _REG_FR0 2 +#define _REG_FR1 3 +#define _REG_FR2 4 +#define _REG_FR3 5 +#define _REG_FR4 6 +#define _REG_FR5 7 +#define _REG_FR6 8 +#define _REG_FR7 9 +#define _REG_FR8 10 +#define _REG_FR9 11 +#define _REG_FR10 12 +#define _REG_FR11 13 +#define _REG_FR12 14 +#define _REG_FR13 15 +#define _REG_FR14 16 +#define _REG_FR15 17 +#define _REG_FR16 18 +#define _REG_FR17 19 +#define _REG_FR18 20 +#define _REG_FR19 21 +#define _REG_FR20 22 +#define _REG_FR21 23 +#define _REG_FR22 24 +#define _REG_FR23 25 +#define _REG_FR24 26 +#define _REG_FR25 27 +#define _REG_FR26 28 +#define _REG_FR27 29 +#define _REG_FR28 30 +#define _REG_FR29 31 +#define _REG_FR30 32 +#define _REG_FR31 33 + /* * FPU state description. - * XXX: kernel doesn't support FPU yet, so this is just a placeholder. */ typedef struct { int __fpr_fpscr; diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/pcb.h src/sys/arch/sh3/include/pcb.h --- src.orig/sys/arch/sh3/include/pcb.h 2008-04-29 11:39:26.000000000 +0900 +++ src/sys/arch/sh3/include/pcb.h 2008-04-29 11:48:26.000000000 +0900 @@ -33,12 +33,17 @@ #define _SH3_PCB_H_ #include +#include struct pcb { struct switchframe pcb_sf; /* kernel context for resume */ void * pcb_onfault; /* for copyin/out fault */ int pcb_faultbail; /* bail out before call uvm_fault. */ + struct cpu_info * volatile pcb_fpcpu; /* CPU with our FP state */ + __fpregset_t pcb_fpu; /* floating point processer */ + int pcb_fpu_flags; /* floating point denormal flag */ }; extern struct pcb *curpcb; + #endif /* !_SH3_PCB_H_ */ diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/proc.h src/sys/arch/sh3/include/proc.h --- src.orig/sys/arch/sh3/include/proc.h 2008-02-16 07:22:38.000000000 +0900 +++ src/sys/arch/sh3/include/proc.h 2008-02-16 07:28:39.000000000 +0900 @@ -57,7 +57,7 @@ }; /* md_flags */ -#define MDP_USEDFPU 0x0001 /* has used the FPU */ +#define MDL_USEDFPU 0x0001 /* has used the FPU */ struct lwp; diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/psl.h src/sys/arch/sh3/include/psl.h --- src.orig/sys/arch/sh3/include/psl.h 2008-01-05 11:05:53.000000000 +0900 +++ src/sys/arch/sh3/include/psl.h 2008-01-05 11:22:22.000000000 +0900 @@ -45,16 +45,17 @@ #define PSL_IMASK 0x000000f0 /* Interrupt Mask bit */ #define PSL_QBIT 0x00000100 /* Q bit */ #define PSL_MBIT 0x00000200 /* M bit */ +#define PSL_FDBIT 0x00008000 /* FD bit (SH4 only) */ #define PSL_BL 0x10000000 /* Exception Block bit */ #define PSL_RB 0x20000000 /* Register Bank bit */ #define PSL_MD 0x40000000 /* Processor Mode bit */ /* 1 = kernel, 0 = user */ #define PSL_MBO 0x00000000 /* must be one bits */ -#define PSL_MBZ 0x8ffffc0c /* must be zero bits */ +#define PSL_MBZ 0x8fff7c0c /* must be zero bits */ #define PSL_USERSET 0 -#define PSL_USERSTATIC (PSL_BL|PSL_RB|PSL_MD|PSL_IMASK|PSL_MBO|PSL_MBZ) +#define PSL_USERSTATIC (PSL_BL|PSL_RB|PSL_MD|PSL_FDBIT|PSL_IMASK|PSL_MBO|PSL_MBZ) #define KERNELMODE(sr) ((sr) & PSL_MD) diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/ptrace.h src/sys/arch/sh3/include/ptrace.h --- src.orig/sys/arch/sh3/include/ptrace.h 2008-01-25 23:45:35.000000000 +0900 +++ src/sys/arch/sh3/include/ptrace.h 2008-02-02 23:48:22.000000000 +0900 @@ -36,8 +36,12 @@ #define PT_GETREGS (PT_FIRSTMACH + 1) #define PT_SETREGS (PT_FIRSTMACH + 2) +#define PT_GETFPREGS (PT_FIRSTMACH + 3) +#define PT_SETFPREGS (PT_FIRSTMACH + 4) #define PT_MACHDEP_STRINGS \ "(unused)", \ "PT_GETREGS", \ - "PT_SETREGS", + "PT_SETREGS", \ + "PT_GETFPREGS", \ + "PT_SETFPREGS", diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/include/reg.h src/sys/arch/sh3/include/reg.h --- src.orig/sys/arch/sh3/include/reg.h 2005-12-11 21:18:58.000000000 +0900 +++ src/sys/arch/sh3/include/reg.h 2007-01-30 23:51:20.000000000 +0900 @@ -99,4 +99,10 @@ int r_r0; }; +struct fpreg { + int fpr_fpscr; + int fpr_fpul; + int fpr_fr[32]; +}; + #endif /* !_SH3_REG_H_ */ diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/cache_sh3.c src/sys/arch/sh3/sh3/cache_sh3.c --- src.orig/sys/arch/sh3/sh3/cache_sh3.c 2008-04-29 11:39:27.000000000 +0900 +++ src/sys/arch/sh3/sh3/cache_sh3.c 2008-04-29 11:48:26.000000000 +0900 @@ -79,6 +79,9 @@ case CPU_PRODUCT_7709A: cache_size = 16 * 1024; break; + case CPU_PRODUCT_7706: + cache_size = 16 * 1024; + break; } r = _reg_read_4(SH3_CCR); diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/exception.c src/sys/arch/sh3/sh3/exception.c --- src.orig/sys/arch/sh3/sh3/exception.c 2008-02-23 10:46:34.000000000 +0900 +++ src/sys/arch/sh3/sh3/exception.c 2008-02-23 10:52:46.000000000 +0900 @@ -101,6 +101,7 @@ #include #include +#include #include #include #include @@ -208,6 +209,17 @@ ksi.ksi_addr = (void *)tf->tf_spc; goto trapsignal; + case EXPEVT_FPU_DISABLE | EXP_USER: /* FALLTHROUGH */ + case EXPEVT_FPU_SLOT_DISABLE | EXP_USER: + sh_fpu_enable(); + break; + + case EXPEVT_FPU | EXP_USER: + KSI_INIT_TRAP(&ksi); + if (sh_fpu_exception(l, tf, &ksi)) + goto do_panic; + goto trapsignal; + default: goto do_panic; } diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/fpu.c src/sys/arch/sh3/sh3/fpu.c --- src.orig/sys/arch/sh3/sh3/fpu.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/sh3/fpu.c 2007-02-01 15:46:34.000000000 +0900 @@ -0,0 +1,70 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include + +#if defined(SH3) && defined(SH4) +void (*__sh_fpu_enable)(void); +void (*__sh_fpu_save_lwp)(struct lwp *, int); +int (*__sh_fpu_exception)(struct lwp *, struct trapframe *, struct ksiginfo *); +#endif /* SH3 && SH4 */ + +void +sh_fpu_init(void) +{ + + /* + * Assign function hooks but only if both SH3 and SH4 are defined. + * They are called directly otherwise. See . + */ +#if defined(SH3) && defined(SH4) + if (CPU_IS_SH3) { + __sh_fpu_enable = sh3_fpu_enable; + __sh_fpu_save_lwp = sh3_fpu_save_lwp; + __sh_fpu_exception = sh3_fpu_exception; + } else if (CPU_IS_SH4) { + __sh_fpu_enable = sh4_fpu_enable; + __sh_fpu_save_lwp = sh4_fpu_save_lwp; + __sh_fpu_exception = sh4_fpu_exception; + } else + panic("sh_fpu_init: unknown CPU type"); +#endif /* SH3 && SH4 */ +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/fpu_sh3.c src/sys/arch/sh3/sh3/fpu_sh3.c --- src.orig/sys/arch/sh3/sh3/fpu_sh3.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/sh3/fpu_sh3.c 2007-02-01 15:47:11.000000000 +0900 @@ -0,0 +1,76 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include + +#include +#include + +/* + * SH3E FPU + */ + +void +sh3_fpu_enable(void) +{ + + /* Nothing to do. */ +} + +/*ARGSUSED*/ +void +sh3_fpu_save_lwp(struct lwp *l, int discard) +{ + + panic("sh3_fpu_save_lwp: not supported"); +} + +int +sh3_fpu_exception(struct lwp *l, struct trapframe *tf, struct ksiginfo *ksi) +{ + + __unused(l); + + ksi->ksi_signo = SIGFPE; + ksi->ksi_code = FPE_FLTINV; + ksi->ksi_addr = (void *)tf->tf_spc; + + return 0; /* trapsignal */ +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/fpu_sh4.c src/sys/arch/sh3/sh3/fpu_sh4.c --- src.orig/sys/arch/sh3/sh3/fpu_sh4.c 1970-01-01 09:00:00.000000000 +0900 +++ src/sys/arch/sh3/sh3/fpu_sh4.c 2007-02-04 18:49:14.000000000 +0900 @@ -0,0 +1,325 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include + +#include + +static inline int +get_sr(void) +{ + int r; + + __asm volatile ("stc sr, %0" : "=r"(r)); + + return r; +} + +static inline void +set_sr(int r) +{ + + __asm volatile ("ldc %0, sr" :: "r"(r)); +} + +static void +sh4_fpu_save_regs(__fpregset_t *fp) +{ + int fpscr; + + fpscr = get_fpscr(); + + /* save FPU register */ + set_fpscr((fpscr | FPSCR_FR) & ~FPSCR_SZ); + __asm volatile ( + "mov %0, r4\n\t" + "fmov.s fr15, @-r4\n\t" + "fmov.s fr14, @-r4\n\t" + "fmov.s fr13, @-r4\n\t" + "fmov.s fr12, @-r4\n\t" + "fmov.s fr11, @-r4\n\t" + "fmov.s fr10, @-r4\n\t" + "fmov.s fr9, @-r4\n\t" + "fmov.s fr8, @-r4\n\t" + "fmov.s fr7, @-r4\n\t" + "fmov.s fr6, @-r4\n\t" + "fmov.s fr5, @-r4\n\t" + "fmov.s fr4, @-r4\n\t" + "fmov.s fr3, @-r4\n\t" + "fmov.s fr2, @-r4\n\t" + "fmov.s fr1, @-r4\n\t" + "fmov.s fr0, @-r4\n\t" + "frchg\n\t" + "fmov.s fr15, @-r4\n\t" + "fmov.s fr14, @-r4\n\t" + "fmov.s fr13, @-r4\n\t" + "fmov.s fr12, @-r4\n\t" + "fmov.s fr11, @-r4\n\t" + "fmov.s fr10, @-r4\n\t" + "fmov.s fr9, @-r4\n\t" + "fmov.s fr8, @-r4\n\t" + "fmov.s fr7, @-r4\n\t" + "fmov.s fr6, @-r4\n\t" + "fmov.s fr5, @-r4\n\t" + "fmov.s fr4, @-r4\n\t" + "fmov.s fr3, @-r4\n\t" + "fmov.s fr2, @-r4\n\t" + "fmov.s fr1, @-r4\n\t" + "fmov.s fr0, @-r4" + :: "r"(&fp->__fpr_regs[32])); + + /* save FPU control register */ + fp->__fpr_fpul = get_fpul(); + fp->__fpr_fpscr = fpscr; + + set_fpscr(fpscr); +} + +static void +sh4_fpu_load_regs(__fpregset_t *fp) +{ + int fpscr; + + fpscr = get_fpscr(); + + /* load FPU registers */ + set_fpscr(fpscr & ~(FPSCR_FR|FPSCR_SZ)); + __asm volatile ( + "mov %0, r4\n\t" + "fmov.s @r4+, fr0\n\t" + "fmov.s @r4+, fr1\n\t" + "fmov.s @r4+, fr2\n\t" + "fmov.s @r4+, fr3\n\t" + "fmov.s @r4+, fr4\n\t" + "fmov.s @r4+, fr5\n\t" + "fmov.s @r4+, fr6\n\t" + "fmov.s @r4+, fr7\n\t" + "fmov.s @r4+, fr8\n\t" + "fmov.s @r4+, fr9\n\t" + "fmov.s @r4+, fr10\n\t" + "fmov.s @r4+, fr11\n\t" + "fmov.s @r4+, fr12\n\t" + "fmov.s @r4+, fr13\n\t" + "fmov.s @r4+, fr14\n\t" + "fmov.s @r4+, fr15\n\t" + "frchg\n\t" + "fmov.s @r4+, fr0\n\t" + "fmov.s @r4+, fr1\n\t" + "fmov.s @r4+, fr2\n\t" + "fmov.s @r4+, fr3\n\t" + "fmov.s @r4+, fr4\n\t" + "fmov.s @r4+, fr5\n\t" + "fmov.s @r4+, fr6\n\t" + "fmov.s @r4+, fr7\n\t" + "fmov.s @r4+, fr8\n\t" + "fmov.s @r4+, fr9\n\t" + "fmov.s @r4+, fr10\n\t" + "fmov.s @r4+, fr11\n\t" + "fmov.s @r4+, fr12\n\t" + "fmov.s @r4+, fr13\n\t" + "fmov.s @r4+, fr14\n\t" + "fmov.s @r4+, fr15\n\t" + :: "r"(&fp->__fpr_regs[0])); + + /* load FPU control register */ + set_fpul(fp->__fpr_fpul); + set_fpscr(fp->__fpr_fpscr); +} + +static void +sh4_fpu_save_cpu(void) +{ + struct cpu_info *ci = curcpu(); + struct lwp *l; + struct pcb *pcb; + int s; + int sr; + + s = _cpu_intr_suspend(); + sr = get_sr(); + set_sr(sr & ~PSL_FDBIT); + + l = ci->ci_fpulwp; + if (l == NULL) + goto out; + + pcb = &l->l_addr->u_pcb; + + sh4_fpu_save_regs(&pcb->pcb_fpu); + + pcb->pcb_fpcpu = NULL; + ci->ci_fpulwp = NULL; +out: + set_sr(sr); + _cpu_intr_resume(s); +} + +void +sh4_fpu_enable(void) +{ + struct cpu_info *ci = curcpu(); + struct lwp *l = curlwp; + struct pcb *pcb = &l->l_addr->u_pcb; + struct trapframe *tf = l->l_md.md_regs; + int s; + int sr; + + KASSERT(pcb->pcb_fpcpu == NULL); + if ((l->l_md.md_flags & MDL_USEDFPU) == 0) { + memset(&pcb->pcb_fpu, 0, sizeof(pcb->pcb_fpu)); + pcb->pcb_fpu.__fpr_fpscr = SH4_FPSCR_INIT; + l->l_md.md_flags |= MDL_USEDFPU; + } + + /* + * If we own the CPU but FP is disabled, simply enable it and return. + */ + if (ci->ci_fpulwp == l) { + tf->tf_ssr &= ~PSL_FDBIT; + return; + } + + s = _cpu_intr_suspend(); + sr = get_sr(); + set_sr(sr & ~PSL_FDBIT); + + if (ci->ci_fpulwp != NULL) { + sh4_fpu_save_cpu(); + } + KASSERT(ci->ci_fpulwp == NULL); + + sh4_fpu_load_regs(&pcb->pcb_fpu); + + tf->tf_ssr &= ~PSL_FDBIT; + ci->ci_fpulwp = l; + pcb->pcb_fpcpu = ci; + + set_sr(sr); + _cpu_intr_resume(s); +} + +void +sh4_fpu_save_lwp(struct lwp *l, int discard) +{ + struct pcb *pcb = &l->l_addr->u_pcb; + struct cpu_info * const ci = curcpu(); + + /* + * If it's already in the PCB, there's nothing to do. + */ + if (pcb->pcb_fpcpu == NULL) + return; + + /* + * If we simply need to discard the information, then don't + * to save anything. + */ + if (discard) { +#ifndef MULTIPROCESSOR + KASSERT(ci == pcb->pcb_fpcpu); +#endif + KASSERT(l == pcb->pcb_fpcpu->ci_fpulwp); + pcb->pcb_fpcpu->ci_fpulwp = NULL; + pcb->pcb_fpcpu = NULL; + return; + } + + /* + * If the state is in the current CPU, + * just flush the current CPU's state. + */ + if (ci->ci_fpulwp == l) { + sh4_fpu_save_cpu(); + return; + } + +#ifdef MULTIPROCESSOR + /* + * It must be on another CPU, flush it from there. + */ + /* XXX */ +#endif +} + +int +sh4_fpu_exception(struct lwp *l, struct trapframe *tf, struct ksiginfo *ksi) +{ + static const int cause2sigcode[6] = { + FPE_FLTRES, /* FP_I_BIT */ + FPE_FLTUND, /* FP_U_BIT */ + FPE_FLTOVF, /* FP_O_BIT */ + FPE_FLTDIV, /* FP_Z_BIT */ + FPE_FLTINV, /* FP_V_BIT */ + FPE_FLTRES /* FP_E_BIT */ + }; + struct pcb *pcb = &l->l_addr->u_pcb; + int fpscr; + int cause; + int i; + + fpscr = get_fpscr(); + + cause = FP_CAUSE(fpscr); + cause &= FP_ENABLE(fpscr) | FP_E_BIT; + +#ifdef DEBUG + printf("fpscr = %x, cause = %x\n", fpscr, cause); +#endif + + ksi->ksi_signo = SIGFPE; + ksi->ksi_addr = (void *)tf->tf_spc; + + for (i = 0; i < __arraycount(cause2sigcode); i++) { + if (cause & (1 << i)) { + ksi->ksi_code = cause2sigcode[i]; + if (i == 5) { + /* FP_E_BIT: denormal exception */ + pcb->pcb_fpu_flags |= FP_E_BIT; + } + break; + } + } + if (i == __arraycount(cause2sigcode)) { + ksi->ksi_code = FPE_FLTINV; + } + + return 0; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/interrupt.c src/sys/arch/sh3/sh3/interrupt.c --- src.orig/sys/arch/sh3/sh3/interrupt.c 2008-04-29 11:39:27.000000000 +0900 +++ src/sys/arch/sh3/sh3/interrupt.c 2008-04-29 11:48:26.000000000 +0900 @@ -76,6 +76,7 @@ #ifdef SH3 case CPU_PRODUCT_7709: case CPU_PRODUCT_7709A: + case CPU_PRODUCT_7706: _reg_write_2(SH7709_IPRC, 0); _reg_write_2(SH7709_IPRD, 0); _reg_write_2(SH7709_IPRE, 0); @@ -127,7 +128,7 @@ /* Priority */ intc_intr_priority(evtcode, level); - /* Sense select (SH7709, SH7709A only) XXX notyet */ + /* Sense select (SH7709, SH7709A, SH7706 only) XXX notyet */ return (ih); } diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/process_machdep.c src/sys/arch/sh3/sh3/process_machdep.c --- src.orig/sys/arch/sh3/sh3/process_machdep.c 2007-03-04 15:00:41.000000000 +0900 +++ src/sys/arch/sh3/sh3/process_machdep.c 2007-03-05 09:16:46.000000000 +0900 @@ -110,6 +110,7 @@ #include #include +#include #include #include @@ -189,6 +190,46 @@ } int +process_read_fpregs(struct lwp *l, struct fpreg *fpregs) +{ + __fpregset_t *fp; + int i; + + if (CPU_HAS_FPU) { + fp = &l->l_md.md_pcb->pcb_fpu; + + fpregs->fpr_fpscr = fp->__fpr_fpscr; + fpregs->fpr_fpul = fp->__fpr_fpul; + for (i = 0; i < __arraycount(fpregs->fpr_fr); i++) { + fpregs->fpr_fr[i] = fp->__fpr_regs[i]; + } + } else { + memset(fpregs, 0, sizeof(struct fpreg)); + } + + return (0); +} + +int +process_write_fpregs(struct lwp *l, const struct fpreg *fpregs) +{ + __fpregset_t *fp; + int i; + + if (CPU_HAS_FPU) { + fp = &l->l_md.md_pcb->pcb_fpu; + + fp->__fpr_fpscr = fpregs->fpr_fpscr; + fp->__fpr_fpul = fpregs->fpr_fpul; + for (i = 0; i < __arraycount(fp->__fpr_regs); i++) { + fp->__fpr_regs[i] = fpregs->fpr_fr[i]; + } + } + + return (0); +} + +int process_sstep(struct lwp *l, int sstep) { diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/sys/arch/sh3/sh3/sh3_machdep.c src/sys/arch/sh3/sh3/sh3_machdep.c --- src.orig/sys/arch/sh3/sh3/sh3_machdep.c 2008-04-29 11:39:28.000000000 +0900 +++ src/sys/arch/sh3/sh3/sh3_machdep.c 2008-04-29 11:48:27.000000000 +0900 @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include @@ -160,6 +161,9 @@ /* MMU access ops. */ sh_mmu_init(); + /* FPU access ops. */ + sh_fpu_init(); + /* Hardclock, RTC initialize. */ machine_clock_init(); @@ -579,6 +583,7 @@ unsigned int *flags; { const struct trapframe *tf = l->l_md.md_regs; + const struct pcb *pcb = &l->l_addr->u_pcb; __greg_t *gr = mcp->__gregs; __greg_t ras_pc; @@ -612,8 +617,12 @@ *flags |= _UC_CPU; - /* FPU context is currently not handled by the kernel. */ - memset(&mcp->__fpregs, 0, sizeof (mcp->__fpregs)); + if ((l->l_md.md_flags & MDL_USEDFPU) != 0) { + sh_fpu_save_lwp(l, 0); + memcpy(&mcp->__fpregs, &pcb->pcb_fpu, sizeof(mcp->__fpregs)); + *flags |= _UC_FPU; + } else + memset(&mcp->__fpregs, 0, sizeof (mcp->__fpregs)); } int @@ -623,6 +632,7 @@ unsigned int flags; { struct trapframe *tf = l->l_md.md_regs; + struct pcb *pcb = &l->l_addr->u_pcb; const __greg_t *gr = mcp->__gregs; struct proc *p = l->l_proc; @@ -656,12 +666,10 @@ tf->tf_r15 = gr[_REG_R15]; } -#if 0 - /* XXX: FPU context is currently not handled by the kernel. */ - if (flags & _UC_FPU) { - /* TODO */; + if ((flags & _UC_FPU) != 0) { + sh_fpu_save_lwp(l, 1); + memcpy(&pcb->pcb_fpu, &mcp->__fpregs, sizeof(pcb->pcb_fpu)); } -#endif mutex_enter(p->p_lock); if (flags & _UC_SETSTACK) @@ -681,7 +689,7 @@ { struct trapframe *tf; - l->l_md.md_flags &= ~MDP_USEDFPU; + l->l_md.md_flags &= ~MDL_USEDFPU; tf = l->l_md.md_regs; @@ -702,6 +710,8 @@ tf->tf_r14 = 0; tf->tf_spc = pack->ep_entry; tf->tf_ssr = PSL_USERSET; + if (CPU_IS_SH4) + tf->tf_ssr |= PSL_FDBIT; /* disable FPU */ tf->tf_r15 = stack; } diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/csu/sh3_elf/crt0.c src/lib/csu/sh3_elf/crt0.c --- src.orig/lib/csu/sh3_elf/crt0.c 2004-08-27 06:16:41.000000000 +0900 +++ src/lib/csu/sh3_elf/crt0.c 2007-02-04 00:15:16.000000000 +0900 @@ -92,3 +92,12 @@ #endif /* LIBC_SCCS and not lint */ #include "common.c" + +#ifdef __SH4__ +#include + +int __fpscr_values[2] = { + SH4_FPSCR_INIT & ~(FPSCR_PR | FPSCR_SZ), /* float */ + (SH4_FPSCR_INIT | FPSCR_PR) & ~(FPSCR_SZ) /* double */ +}; +#endif diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/Makefile.inc src/lib/libc/arch/sh3/Makefile.inc --- src.orig/lib/libc/arch/sh3/Makefile.inc 2006-06-18 03:04:23.000000000 +0900 +++ src/lib/libc/arch/sh3/Makefile.inc 2007-02-26 22:57:38.000000000 +0900 @@ -2,5 +2,7 @@ SRCS+= __sigaction14_sigtramp.c __sigtramp2.S +#.if "${MKSOFTFLOAT}" != "no" CPPFLAGS+= -DSOFTFLOAT # -DSOFTFLOAT_NEED_FIXUNS .include +#.endif diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/Makefile.inc src/lib/libc/arch/sh3/gen/Makefile.inc --- src.orig/lib/libc/arch/sh3/gen/Makefile.inc 2006-06-28 23:46:32.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/Makefile.inc 2007-02-26 22:57:38.000000000 +0900 @@ -18,6 +18,11 @@ SRCS+= nanf.c +#.if "${MKSOFTFLOAT}" == "no" +#SRCS+= fpgetmask.c fpgetround.c fpgetsticky.c \ +# fpsetmask.c fpsetround.c fpsetsticky.c +#.endif + SRCS.sh3.gen= Lint__setjmp.c Lint___setjmp14.c Lint___sigsetjmp14.c \ Lint_swapcontext.c diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fabs.c src/lib/libc/arch/sh3/gen/fabs.c --- src.orig/lib/libc/arch/sh3/gen/fabs.c 2006-05-21 11:51:15.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/fabs.c 2007-02-04 00:35:25.000000000 +0900 @@ -35,12 +35,26 @@ * fabs(x) returns the absolute value of x. */ +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD$"); +#endif /* LIBC_SCCS and not lint */ + +#include + double fabs(double x); double fabs(double x) { +#if defined(SOFTFLOAT) || !defined(__SH4__) if (x < 0) x = -x; +#else /* !SOFTFLOAT && __SH4__ */ + extern int __fpscr_values[2]; + + set_fpscr(__fpscr_values[1]); + __asm volatile ("fabs %0" : "=f"(x) : "f"(x)); +#endif /* SOFTFLOAT || !__SH4__*/ return(x); } diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpgetmask.c src/lib/libc/arch/sh3/gen/fpgetmask.c --- src.orig/lib/libc/arch/sh3/gen/fpgetmask.c 1970-01-01 09:00:00.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/fpgetmask.c 2007-01-30 22:09:10.000000000 +0900 @@ -0,0 +1,78 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD$"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#include + +#ifdef __weak_alias +__weak_alias(fpgetmask,_fpgetmask) +#endif + +fp_except +fpgetmask(void) +{ + fp_except mask = 0; + int r, e; + + r = get_fpscr(); + +#ifdef __SH4__ + if ((r & FPSCR_DN) == 0) + mask |= FP_X_DNML; +#endif + + e = (r >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK; +#ifdef __SH4__ + if (e & FP_I_BIT) + mask |= FP_X_IMP; + if (e & FP_U_BIT) + mask |= FP_X_UFL; + if (e & FP_O_BIT) + mask |= FP_X_OFL; +#endif + if (e & FP_Z_BIT) + mask |= FP_X_DZ; + if (e & FP_V_BIT) + mask |= FP_X_INV; + + return mask; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpgetround.c src/lib/libc/arch/sh3/gen/fpgetround.c --- src.orig/lib/libc/arch/sh3/gen/fpgetround.c 1970-01-01 09:00:00.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/fpgetround.c 2007-01-30 22:09:13.000000000 +0900 @@ -0,0 +1,63 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD$"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#include + +#ifdef __weak_alias +__weak_alias(fpgetround,_fpgetround) +#endif + +fp_rnd +fpgetround(void) +{ +#ifdef __SH4__ + int r; + + r = get_fpscr(); + + r &= FPSCR_RM; + if (r == RM_NEAREST) + return FP_RN; +#endif + return FP_RZ; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpgetsticky.c src/lib/libc/arch/sh3/gen/fpgetsticky.c --- src.orig/lib/libc/arch/sh3/gen/fpgetsticky.c 1970-01-01 09:00:00.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/fpgetsticky.c 2007-02-01 16:16:58.000000000 +0900 @@ -0,0 +1,78 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD$"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#include + +#ifdef __weak_alias +__weak_alias(fpgetsticky,_fpgetsticky) +#endif + +fp_except +fpgetsticky(void) +{ + fp_except flags = 0; + int r, f; + + r = get_fpscr(); + +#if 0 /* SH don't have Denormal flag at fpscr. */ + if (0) + flags |= FP_X_DNML; +#endif + + f = (r >> FP_FLAG_SHIFT) & FP_FLAG_MASK; +#ifdef __SH4__ + if (f & FP_I_BIT) + flags |= FP_X_IMP; + if (f & FP_U_BIT) + flags |= FP_X_UFL; + if (f & FP_O_BIT) + flags |= FP_X_OFL; +#endif + if (f & FP_Z_BIT) + flags |= FP_X_DZ; + if (f & FP_V_BIT) + flags |= FP_X_INV; + + return flags; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpsetmask.c src/lib/libc/arch/sh3/gen/fpsetmask.c --- src.orig/lib/libc/arch/sh3/gen/fpsetmask.c 1970-01-01 09:00:00.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/fpsetmask.c 2007-02-04 00:38:15.000000000 +0900 @@ -0,0 +1,111 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD$"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#include + +#ifdef __weak_alias +__weak_alias(fpsetmask,_fpsetmask) +#endif + +void __set_fpscr(int fpscr); + +fp_except +fpsetmask(mask) + fp_except mask; +{ + fp_except old = 0; + int o, n, e; + int b = 0; + + o = get_fpscr(); + + /* new mask */ + n = o; + n &= ~(FP_ENABLE_MASK << FP_ENABLE_SHIFT); + +#ifdef __SH4__ + if (mask & FP_X_DNML) + n &= ~FPSCR_DN; + else +#endif + n |= FPSCR_DN; + +#ifdef __SH4__ + if (mask & FP_X_IMP) + b |= FP_I_BIT; + if (mask & FP_X_UFL) + b |= FP_U_BIT; + if (mask & FP_X_OFL) + b |= FP_O_BIT; +#endif + if (mask & FP_X_DZ) + b |= FP_Z_BIT; + if (mask & FP_X_INV) + b |= FP_V_BIT; + n |= (b << FP_ENABLE_SHIFT); /* enable FPU exception */ + n &= ~(b << FP_FLAG_SHIFT); /* clear FPU exception flags */ + + __set_fpscr(n); + + /* old mask */ +#ifdef __SH4__ + if ((o & FPSCR_DN) == 0) /* DN */ + old |= FP_X_DNML; +#endif + + e = (o >> FP_ENABLE_SHIFT) & FP_ENABLE_MASK; +#ifdef __SH4__ + if (e & FP_I_BIT) + old |= FP_X_IMP; + if (e & FP_U_BIT) + old |= FP_X_UFL; + if (e & FP_O_BIT) + old |= FP_X_OFL; +#endif + if (e & FP_Z_BIT) + old |= FP_X_DZ; + if (e & FP_V_BIT) + old |= FP_X_INV; + + return old; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpsetround.c src/lib/libc/arch/sh3/gen/fpsetround.c --- src.orig/lib/libc/arch/sh3/gen/fpsetround.c 1970-01-01 09:00:00.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/fpsetround.c 2007-02-04 00:38:38.000000000 +0900 @@ -0,0 +1,88 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD$"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#include + +#ifdef __weak_alias +__weak_alias(fpsetround,_fpsetround) +#endif + +void __set_fpscr(int fpscr); + +fp_rnd +fpsetround(rnd_dir) + fp_rnd rnd_dir; +{ + int old, new; + int r; + + old = get_fpscr(); + + /* new dir */ + new = old & ~FPSCR_RM; + + switch (rnd_dir) { + case FP_RN: +#ifdef __SH4__ + r = RM_NEAREST; + break; +#endif + case FP_RZ: + case FP_RM: + case FP_RP: + default: + r = RM_ZERO; + break; + } + new |= r; + + __set_fpscr(new); + + /* old dir */ +#ifdef __SH4__ + old &= FPSCR_RM; + if (old == RM_NEAREST) + return FP_RN; +#endif + return FP_RZ; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/fpsetsticky.c src/lib/libc/arch/sh3/gen/fpsetsticky.c --- src.orig/lib/libc/arch/sh3/gen/fpsetsticky.c 1970-01-01 09:00:00.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/fpsetsticky.c 2007-02-04 00:38:25.000000000 +0900 @@ -0,0 +1,103 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD$"); +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" + +#include +#include + +#ifdef __weak_alias +__weak_alias(fpsetsticky,_fpsetsticky) +#endif + +void __set_fpscr(int fpscr); + +fp_except +fpsetsticky(sticky) + fp_except sticky; +{ + fp_except old = 0; + int o, n, f; + int b = 0; + + o = get_fpscr(); + + /* new flags */ + n = o; + +#ifdef __SH4__ + if (sticky & FP_X_IMP) + b |= FP_I_BIT; + if (sticky & FP_X_UFL) + b |= FP_U_BIT; + if (sticky & FP_X_OFL) + b |= FP_O_BIT; +#endif + if (sticky & FP_X_DZ) + b |= FP_Z_BIT; + if (sticky & FP_X_INV) + b |= FP_V_BIT; + + n &= ~(b << FP_FLAG_SHIFT); + + __set_fpscr(n); + + /* old flags */ +#if 0 /* SH don't have Denormal flag at fpscr. */ + if (0) + flags |= FP_X_DNML; +#endif + + f = (o >> FP_FLAG_SHIFT) & FP_FLAG_MASK; +#ifdef __SH4__ + if (f & FP_I_BIT) + old |= FP_X_IMP; + if (f & FP_U_BIT) + old |= FP_X_UFL; + if (f & FP_O_BIT) + old |= FP_X_OFL; +#endif + if (f & FP_Z_BIT) + old |= FP_X_DZ; + if (f & FP_V_BIT) + old |= FP_X_INV; + + return old; +} diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libc/arch/sh3/gen/nanf.c src/lib/libc/arch/sh3/gen/nanf.c --- src.orig/lib/libc/arch/sh3/gen/nanf.c 2005-04-16 07:39:11.000000000 +0900 +++ src/lib/libc/arch/sh3/gen/nanf.c 2007-01-29 17:14:01.000000000 +0900 @@ -10,8 +10,16 @@ /* bytes for quiet NaN (IEEE single precision) */ const union __float_u __nanf = +#if defined(SOFTFLOAT) #if BYTE_ORDER == BIG_ENDIAN { { 0x7f, 0xa0, 0, 0 } }; #else { { 0, 0, 0xa0, 0x7f } }; #endif +#else /* !SOFTFLOAT */ +#if BYTE_ORDER == BIG_ENDIAN + { { 0x7f, 0xbf, 0xff, 0xff } }; +#else + { { 0xff, 0xff, 0xbf, 0x7f } }; +#endif +#endif /* SOFTFLOAT */ diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libpthread/arch/sh3/_context_u.S src/lib/libpthread/arch/sh3/_context_u.S --- src.orig/lib/libpthread/arch/sh3/_context_u.S 2006-01-05 02:44:53.000000000 +0900 +++ src/lib/libpthread/arch/sh3/_context_u.S 2007-02-04 00:39:44.000000000 +0900 @@ -36,6 +36,75 @@ */ #define NOTREACHED trapa #0xc3 +/* + * FPU context save/restore + */ +#ifdef __SH4__ +#define FPUFLAG _UC_FPU +#define GETFP(uc) \ + mov.l .L_uc_fpregs_offset, r1 ; \ + add uc, r1 /* uc->uc_mcontext.__fpregs */ ; \ + \ + sts fpscr, r0 ; \ + sts fpul, r2 ; \ + mov.l r0, @((_REG_FPSCR * 4), r1) ; \ + mov.l r2, @((_REG_FPUL * 4), r1) ; \ + \ + mov #8, r2 ; \ + swap.w r2, r2 ; \ + shll2 r2 /* FPSCR_FR */ ; \ + tst r2, r0 ; \ + bf/s 1f ; \ + add #(_REG_FR0 * 4), r1 ; \ + add #(16 * 4), r1 /* bank1 */ ; \ +1: shlr r2 /* FPSCR_SZ */ ; \ + not r2, r2 ; \ + and r0, r2 ; \ + lds r2, fpscr ; \ + fmov.s fr12, @((12 * 4), r1) ; \ + fmov.s fr13, @((13 * 4), r1) ; \ + fmov.s fr14, @((14 * 4), r1) ; \ + fmov.s fr15, @((15 * 4), r1) ; \ + lds r0, fpscr + +#define SETFP(uc) \ + mov.l @(UC_FLAGS, uc), r0 ; \ + mov.l .L_uc_fpuflags, r2 ; \ + tst r2, r0 ; \ + bf .Lsetfp_end ; \ + \ + mov.l .L_uc_fpregs_offset, r1 ; \ + add uc, r1 /* uc->uc_mcontext.__fpregs */ ; \ + \ + mov.l @((_REG_FPSCR * 4), r1), r0 ; \ + mov.l @((_REG_FPUL * 4), r1), r2 ; \ + lds r2, fpul ; \ + \ + mov #8, r2 ; \ + swap.w r2, r2 ; \ + shll2 r2 /* FPSCR_FR */ ; \ + tst r2, r0 ; \ + bf/s 2f ; \ + add #(_REG_FR0 * 4), r1 ; \ + add #(16 * 4), r1 /* bank1 */ ; \ +2: shlr r2 /* FPSCR_SZ */ ; \ + not r2, r2 ; \ + and r0, r2 ; \ + lds r2, fpscr ; \ + fmov.s @((12 * 4), r1), fr12 ; \ + fmov.s @((13 * 4), r1), fr13 ; \ + fmov.s @((14 * 4), r1), fr14 ; \ + fmov.s @((15 * 4), r1), fr15 ; \ + lds r0, fpscr ; \ + \ + mov.l .L_uc_gregs_offset, r1 ; \ + add uc, r1 /* uc->uc_mcontext.__gregs */ ; \ +.Lsetfp_end: +#else /* !__SH4__ */ +#define FPUFLAG 0 +#define GETFP(uc) +#define SETFP(uc) +#endif /* __SH4__ */ /* * Only save/restore registers that are callee saved, i.e for which @@ -64,13 +133,12 @@ sts macl, r0 ; \ mov.l r0, @((_REG_MACL * 4), r1) ; \ \ - mov.l .L_uc_flags, r0 ; \ - \ add #(_REG_R15 * 4), r1 ; \ mov.l r15, @r1 ; \ \ - /* XXX: FP registers fr12..fr15? */ \ + GETFP(uc) ; \ \ + mov.l .L_uc_flags, r0 ; \ mov.l r0, @(UC_FLAGS, uc) @@ -99,7 +167,7 @@ lds r0, mach ; \ lds r2, macl ; \ \ - /* XXX: FP registers fr12..fr15? */ \ + SETFP(uc) ; \ \ mov.l @((_REG_PR * 4), r1), r0 ; \ mov.l @((_REG_PC * 4), r1), r2 ; \ @@ -186,8 +254,12 @@ .align 2 .L_uc_gregs_offset: .long UC_REGS -.L_uc_flags: .long _UC_USER | _UC_CPU +.L_uc_fpregs_offset: .long UC_FPREGS +.L_uc_flags: .long _UC_USER | _UC_CPU | FPUFLAG .L_uc_user: .long _UC_USER +#ifdef __SH4__ +.L_uc_fpuflags: .long FPUFLAG +#endif #ifndef PIC .L_setcontext: .long _C_LABEL(setcontext) #endif diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libpthread/arch/sh3/genassym.cf src/lib/libpthread/arch/sh3/genassym.cf --- src.orig/lib/libpthread/arch/sh3/genassym.cf 2008-04-29 11:37:22.000000000 +0900 +++ src/lib/libpthread/arch/sh3/genassym.cf 2008-04-29 11:46:38.000000000 +0900 @@ -72,3 +72,38 @@ define _REG_R0 _REG_R0 define _REG_R15 _REG_R15 define _REG_SP _REG_SP + +define _REG_FPSCR _REG_FPSCR +define _REG_FPUL _REG_FPUL +define _REG_FR0 _REG_FR0 +define _REG_FR1 _REG_FR1 +define _REG_FR2 _REG_FR2 +define _REG_FR3 _REG_FR3 +define _REG_FR4 _REG_FR4 +define _REG_FR5 _REG_FR5 +define _REG_FR6 _REG_FR6 +define _REG_FR7 _REG_FR7 +define _REG_FR8 _REG_FR8 +define _REG_FR9 _REG_FR9 +define _REG_FR10 _REG_FR10 +define _REG_FR11 _REG_FR11 +define _REG_FR12 _REG_FR12 +define _REG_FR13 _REG_FR13 +define _REG_FR14 _REG_FR14 +define _REG_FR15 _REG_FR15 +define _REG_FR16 _REG_FR16 +define _REG_FR17 _REG_FR17 +define _REG_FR18 _REG_FR18 +define _REG_FR19 _REG_FR19 +define _REG_FR20 _REG_FR20 +define _REG_FR21 _REG_FR21 +define _REG_FR22 _REG_FR22 +define _REG_FR23 _REG_FR23 +define _REG_FR24 _REG_FR24 +define _REG_FR25 _REG_FR25 +define _REG_FR26 _REG_FR26 +define _REG_FR27 _REG_FR27 +define _REG_FR28 _REG_FR28 +define _REG_FR29 _REG_FR29 +define _REG_FR30 _REG_FR30 +define _REG_FR31 _REG_FR31 diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/lib/libpthread/arch/sh3/pthread_md.h src/lib/libpthread/arch/sh3/pthread_md.h --- src.orig/lib/libpthread/arch/sh3/pthread_md.h 2008-02-16 07:21:54.000000000 +0900 +++ src/lib/libpthread/arch/sh3/pthread_md.h 2008-02-16 07:27:56.000000000 +0900 @@ -76,7 +76,7 @@ (uc)->uc_flags = ((uc)->uc_flags | _UC_CPU) & ~_UC_USER; \ } while (/*CONSTCOND*/0) -#if 0 /* no struct fpreg!!! */ +#ifdef __SH4__ #define PTHREAD_UCONTEXT_TO_FPREG(freg, uc) \ memcpy((freg), &(uc)->uc_mcontext.__fpregs, sizeof(*(freg))); diff --exclude=CVS --exclude='obj.*' --exclude=compile --exclude='*.user' -uNr src.orig/libexec/ld.elf_so/arch/sh3/rtld_start.S src/libexec/ld.elf_so/arch/sh3/rtld_start.S --- src.orig/libexec/ld.elf_so/arch/sh3/rtld_start.S 2008-04-29 11:37:25.000000000 +0900 +++ src/libexec/ld.elf_so/arch/sh3/rtld_start.S 2008-04-29 11:46:43.000000000 +0900 @@ -30,6 +30,7 @@ */ #include +#include .text .align 2 @@ -93,6 +94,25 @@ mov.l r7,@-r15 sts.l mach,@-r15 sts.l macl,@-r15 +#if defined(__SH4__) + sts fpscr,r4 + mov #8,r5 + swap.w r5,r5 + shll r5 /* FPSCR_SZ */ + not r5,r5 + and r4,r5 + lds r5,fpscr + fmov.s fr11,@-r15 + fmov.s fr10,@-r15 + fmov.s fr9,@-r15 + fmov.s fr8,@-r15 + fmov.s fr7,@-r15 + fmov.s fr6,@-r15 + fmov.s fr5,@-r15 + fmov.s fr4,@-r15 + sts.l fpul,@-r15 + mov.l r4,@-r15 +#endif sts.l pr,@-r15 mov r0,r4 /* copy of obj */ @@ -101,6 +121,26 @@ mov r1,r5 /* copy of reloff */ 4: lds.l @r15+,pr /* restore registers */ +#if defined(__SH4__) + mov.l @r15+,r4 + lds r4,fpscr + mov #8,r5 + swap.w r5,r5 + shll r5 /* FPSCR_SZ */ + not r5,r5 + and r4,r5 + lds r5,fpscr + lds.l @r15+,fpul + fmov.s @r15+,fr4 + fmov.s @r15+,fr5 + fmov.s @r15+,fr6 + fmov.s @r15+,fr7 + fmov.s @r15+,fr8 + fmov.s @r15+,fr9 + fmov.s @r15+,fr10 + fmov.s @r15+,fr11 + lds r4,fpscr +#endif lds.l @r15+,macl lds.l @r15+,mach mov.l @r15+,r7 --- src.orig/gnu/dist/gcc4/gcc/config/sh/lib1funcs.asm 2006-04-20 18:49:36.000000000 +0900 +++ src/gnu/dist/gcc4/gcc/config/sh/lib1funcs.asm 2007-01-21 01:10:31.000000000 +0900 @@ -41,8 +41,13 @@ #ifdef __ELF__ #define LOCAL(X) .L_##X #define FUNC(X) .type X,@function +#ifdef PIC #define HIDDEN_FUNC(X) FUNC(X); .hidden X #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X) +#else +#define HIDDEN_FUNC(X) FUNC(X); +#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); +#endif #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X #define ENDFUNC(X) ENDFUNC0(X) #else @@ -1035,7 +1040,7 @@ #ifdef L_sdivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) +#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__NetBSD__) !! !! Steve Chamberlain !! sac@cygnus.com @@ -1486,7 +1491,7 @@ #ifdef L_udivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) +#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined(__NetBSD__) !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit .global GLOBAL(udivsi3) --- src.orig/distrib/sets/lists/comp/ad.sh3 2007-02-10 07:22:32.000000000 +0900 +++ src/distrib/sets/lists/comp/ad.sh3 2007-02-10 11:15:00.000000000 +0900 @@ -17,6 +17,7 @@ ./usr/include/sh3/endian.h comp-c-include ./usr/include/sh3/endian_machdep.h comp-c-include ./usr/include/sh3/float.h comp-c-include +./usr/include/sh3/fpu.h comp-c-include ./usr/include/sh3/frame.h comp-c-include ./usr/include/sh3/ieee.h comp-c-include ./usr/include/sh3/ieeefp.h comp-c-include