Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

floatem.c

Go to the documentation of this file.
00001 00010 /*++ 00011 00012 Copyright (c) 1996 Intel Corporation 00013 00014 Module Name: 00015 00016 floatem.c 00017 00018 Abstract: 00019 00020 This module implements IA64 machine dependent floating point emulation 00021 functions to support the IEEE floating point standard. 00022 00023 Author: 00024 00025 Marius Cornea-Hasegan Sep-96 00026 00027 Environment: 00028 00029 Kernel mode only. 00030 00031 Revision History: 00032 00033 Modfied Jan. 97, Jan 98, Jun 98 (new API) 00034 00035 --*/ 00036 00037 #if 0 00038 /* #define this in floatem.c, fedefs.h and fesupport.c */ 00039 #define DEBUG_UNIX 00040 #endif 00041 00042 #include "ki.h" 00043 #include "fedefs.h" 00044 #include "fetypes.h" 00045 #include "fesupprt.h" 00046 #include "feproto.h" 00047 00048 #ifdef TRUE 00049 #undef TRUE 00050 #endif 00051 #define TRUE 0 00052 00053 #ifdef FALSE 00054 #undef FALSE 00055 #endif 00056 #define FALSE 1 00057 00058 #define FP_EMUL_ERROR -1 00059 #define FAULT_TO_TRAP 2 00060 #define SIMD_INSTRUCTION 4 00061 #define FPFLT 1 00062 #define FPTRAP 0 00063 #define FP_REG_EMIN -65534 00064 #define FP_REG_EMAX 65535 00065 #define N64 64 00066 00067 typedef struct _BUNDLE { 00068 EM_uint64_t BundleLow; 00069 EM_uint64_t BundleHigh; 00070 } BUNDLE; 00071 00072 #ifdef WIN32_OR_WIN64 00073 typedef struct __declspec(align(16)) _FLOAT128_TYPE { 00074 #else 00075 typedef struct _FLOAT128_TYPE { 00076 #endif 00077 EM_uint64_t loFlt64; 00078 EM_uint64_t hiFlt64; 00079 } FLOAT128_TYPE; 00080 00081 00082 #ifndef CONST_FORMAT 00083 00084 #ifndef WIN32_OR_WIN64 00085 #define CONST_FORMAT(num) num##LL 00086 #else 00087 #define CONST_FORMAT(num) ((EM_uint64_t)(num)) 00088 #endif 00089 00090 #endif 00091 00092 00093 // Functions (static or external) 00094 00095 int 00096 swa_trap (EM_opcode_sf_type sf, EM_uint64_t FPSR, EM_uint_t ISRlow); 00097 00098 EM_fp_reg_type 00099 FP128ToFPReg ( 00100 FLOAT128_TYPE f128 00101 ); 00102 00103 FLOAT128_TYPE 00104 FPRegToFP128 ( 00105 EM_fp_reg_type fpreg 00106 ); 00107 00108 FLOAT128_TYPE 00109 get_fp_register ( 00110 int reg, 00111 void *fp_state 00112 ); 00113 00114 void 00115 set_fp_register ( 00116 int reg, 00117 FLOAT128_TYPE value, 00118 void *fp_state 00119 ); 00120 00121 00122 void run_fms (EM_uint64_t *fpsr, 00123 FLOAT128_TYPE *d, FLOAT128_TYPE *a, FLOAT128_TYPE *b, FLOAT128_TYPE *c); 00124 void thmF (EM_uint64_t *fpsr, FLOAT128_TYPE *a, FLOAT128_TYPE *b, 00125 FLOAT128_TYPE *c); 00126 void thmL (EM_uint64_t *fpsr, FLOAT128_TYPE *a, FLOAT128_TYPE *s); 00127 00128 00129 00130 // Masks and patterns for the different faulting FP instruction types 00131 // Note: Fn_MIN_MASK and Fn_PATTERN need to be checked if new opcodes 00132 // are inserted in this function 00133 00134 00135 #define F1_MIN_MASK CONST_FORMAT(0x010000000000) 00136 #define F1_PATTERN CONST_FORMAT(0x010000000000) 00137 00138 #define F1_MASK CONST_FORMAT(0x01F000000000) 00139 00140 #define FMA_PATTERN CONST_FORMAT(0x010000000000) 00141 #define FMA_S_PATTERN CONST_FORMAT(0x011000000000) 00142 #define FMA_D_PATTERN CONST_FORMAT(0x012000000000) 00143 #define FPMA_PATTERN CONST_FORMAT(0x013000000000) 00144 00145 #define FMS_PATTERN CONST_FORMAT(0x014000000000) 00146 #define FMS_S_PATTERN CONST_FORMAT(0x015000000000) 00147 #define FMS_D_PATTERN CONST_FORMAT(0x016000000000) 00148 #define FPMS_PATTERN CONST_FORMAT(0x017000000000) 00149 00150 #define FNMA_PATTERN CONST_FORMAT(0x018000000000) 00151 #define FNMA_S_PATTERN CONST_FORMAT(0x019000000000) 00152 #define FNMA_D_PATTERN CONST_FORMAT(0x01A000000000) 00153 #define FPNMA_PATTERN CONST_FORMAT(0x01B000000000) 00154 00155 00156 #define F4_MIN_MASK CONST_FORMAT(0x018000000000) 00157 #define F4_PATTERN CONST_FORMAT(0x008000000000) 00158 00159 #define F4_MASK CONST_FORMAT(0x01F200001000) 00160 00161 #define FCMP_EQ_PATTERN CONST_FORMAT(0x008000000000) 00162 #define FCMP_LT_PATTERN CONST_FORMAT(0x009000000000) 00163 #define FCMP_LE_PATTERN CONST_FORMAT(0x008200000000) 00164 #define FCMP_UNORD_PATTERN CONST_FORMAT(0x009200000000) 00165 #define FCMP_EQ_UNC_PATTERN CONST_FORMAT(0x008000001000) 00166 #define FCMP_LT_UNC_PATTERN CONST_FORMAT(0x009000001000) 00167 #define FCMP_LE_UNC_PATTERN CONST_FORMAT(0x008200001000) 00168 #define FCMP_UNORD_UNC_PATTERN CONST_FORMAT(0x009200001000) 00169 00170 00171 #define F6_MIN_MASK CONST_FORMAT(0x019200000000) 00172 #define F6_PATTERN CONST_FORMAT(0x000200000000) 00173 00174 #define F6_MASK CONST_FORMAT(0x01F200000000) 00175 00176 #define FRCPA_PATTERN CONST_FORMAT(0x000200000000) 00177 #define FPRCPA_PATTERN CONST_FORMAT(0x002200000000) 00178 00179 00180 #define F7_MIN_MASK CONST_FORMAT(0x019200000000) 00181 #define F7_PATTERN CONST_FORMAT(0x001200000000) 00182 00183 #define F7_MASK CONST_FORMAT(0x01F200000000) 00184 00185 #define FRSQRTA_PATTERN CONST_FORMAT(0x001200000000) 00186 #define FPRSQRTA_PATTERN CONST_FORMAT(0x003200000000) 00187 00188 00189 #define F8_MIN_MASK CONST_FORMAT(0x018240000000) 00190 #define F8_PATTERN CONST_FORMAT(0x000000000000) 00191 00192 #define F8_MASK CONST_FORMAT(0x01E3F8000000) 00193 00194 #define FMIN_PATTERN CONST_FORMAT(0x0000A0000000) 00195 #define FMAX_PATTERN CONST_FORMAT(0x0000A8000000) 00196 #define FAMIN_PATTERN CONST_FORMAT(0x0000B0000000) 00197 #define FAMAX_PATTERN CONST_FORMAT(0x0000B8000000) 00198 #define FPMIN_PATTERN CONST_FORMAT(0x0020A0000000) 00199 #define FPMAX_PATTERN CONST_FORMAT(0x0020A8000000) 00200 #define FPAMIN_PATTERN CONST_FORMAT(0x0020B0000000) 00201 #define FPAMAX_PATTERN CONST_FORMAT(0x0020B8000000) 00202 #define FPCMP_EQ_PATTERN CONST_FORMAT(0x002180000000) 00203 #define FPCMP_LT_PATTERN CONST_FORMAT(0x002188000000) 00204 #define FPCMP_LE_PATTERN CONST_FORMAT(0x002190000000) 00205 #define FPCMP_UNORD_PATTERN CONST_FORMAT(0x002198000000) 00206 #define FPCMP_NEQ_PATTERN CONST_FORMAT(0x0021A0000000) 00207 #define FPCMP_NLT_PATTERN CONST_FORMAT(0x0021A8000000) 00208 #define FPCMP_NLE_PATTERN CONST_FORMAT(0x0021B0000000) 00209 #define FPCMP_ORD_PATTERN CONST_FORMAT(0x0021B8000000) 00210 00211 00212 #define F10_MIN_MASK CONST_FORMAT(0x018240000000) 00213 #define F10_PATTERN CONST_FORMAT(0x000040000000) 00214 00215 #define F10_MASK CONST_FORMAT(0x01E3F8000000) 00216 00217 #define FCVT_FX_PATTERN CONST_FORMAT(0x0000C0000000) 00218 #define FCVT_FXU_PATTERN CONST_FORMAT(0x0000C8000000) 00219 #define FCVT_FX_TRUNC_PATTERN CONST_FORMAT(0x0000D0000000) 00220 #define FCVT_FXU_TRUNC_PATTERN CONST_FORMAT(0x0000D8000000) 00221 #define FPCVT_FX_PATTERN CONST_FORMAT(0x0020C0000000) 00222 #define FPCVT_FXU_PATTERN CONST_FORMAT(0x0020C8000000) 00223 #define FPCVT_FX_TRUNC_PATTERN CONST_FORMAT(0x0020D0000000) 00224 #define FPCVT_FXU_TRUNC_PATTERN CONST_FORMAT(0x0020D8000000) 00225 00226 00227 00228 // minimum and maximum values of the exponent 00229 00230 #define EMIN_08_BITS -126 00231 #define EMIN_11_BITS -1022 00232 #define EMIN_15_BITS -16382 00233 #define EMIN_17_BITS -65534 00234 00235 00236 00237 int 00238 fp_emulate ( 00239 int trap_type, 00240 BUNDLE *pbundle, 00241 EM_int64_t *pipsr, 00242 EM_int64_t *pfpsr, 00243 EM_int64_t *pisr, 00244 EM_int64_t *ppreds, 00245 EM_int64_t *pifs, 00246 void *fp_state 00247 ) 00248 00249 { 00250 00251 EM_uint64_t BundleHigh; 00252 EM_uint64_t BundleLow; 00253 EM_uint_t ISRlow; 00254 EM_uint_t ei; 00255 EM_uint64_t OpCode; 00256 00257 EM_uint_t fault_ISR_code; 00258 EM_uint_t trap_ISR_code; 00259 00260 EM_uint64_t FPSR; 00261 EM_uint64_t FPSR1; 00262 EM_uint64_t CFM; 00263 00264 // arguments to emulation functions 00265 EM_opcode_sf_type sf; 00266 EM_pred_reg_specifier qp; 00267 EM_fp_reg_specifier f1; 00268 EM_fp_reg_specifier f2; 00269 EM_fp_reg_specifier f3; 00270 EM_fp_reg_specifier f4; 00271 EM_pred_reg_specifier p1; 00272 EM_pred_reg_specifier p2; 00273 00274 EM_opcode_pc_type opcode_pc; 00275 EM_sf_pc_type pc; 00276 EM_sf_rc_type rc; 00277 EM_uint_t wre; 00278 00279 int significand_size; 00280 00281 EM_uint_t fpa, fpa_lo, fpa_hi; 00282 EM_uint_t I_exc, I_exc_lo, I_exc_hi; 00283 EM_uint_t U_exc, U_exc_lo, U_exc_hi; 00284 EM_uint_t O_exc, O_exc_lo, O_exc_hi; 00285 EM_uint_t sign, sign_lo, sign_hi; 00286 EM_uint_t exponent, exponent_lo, exponent_hi; 00287 EM_uint64_t significand; 00288 EM_uint_t significand_lo, significand_hi; 00289 EM_uint64_t low_half; 00290 EM_uint64_t high_half; 00291 EM_uint_t lsb, lsb_lo, lsb_hi; 00292 EM_uint_t round, round_lo, round_hi; 00293 EM_uint_t sticky, sticky_lo, sticky_hi; 00294 EM_uint_t I_dis, U_dis, O_dis; 00295 EM_uint_t Z_dis, D_dis, V_dis; 00296 00297 EM_fp_reg_type tmp_fp; 00298 00299 EM_int_t true_bexp, true_bexp_lo, true_bexp_hi; 00300 EM_int_t shift_cnt, shift_cnt_lo, shift_cnt_hi; 00301 EM_int_t emin; 00302 EM_int_t decr_exp, decr_exp_lo, decr_exp_hi; 00303 int ind; 00304 00305 EM_uint_t EmulationExceptionCode; 00306 00307 EM_state_type proc_state, *ps; 00308 00309 EM_uint_t SIMD_instruction; 00310 00311 // sign, exponent, and significand for the operands of a and b 00312 // in FRCPA and FRSQRTA 00313 00314 EM_uint_t sign_a; 00315 EM_int_t exponent_a; 00316 EM_uint64_t significand_a; 00317 EM_uint_t sign_b; 00318 EM_int_t exponent_b; 00319 EM_uint64_t significand_b; 00320 EM_int_t sign_c; 00321 EM_int_t exponent_c; 00322 EM_uint64_t significand_c; 00323 00324 FLOAT128_TYPE a_float128; 00325 FLOAT128_TYPE b_float128; 00326 FLOAT128_TYPE c_float128; 00327 FLOAT128_TYPE c1_float128; 00328 FLOAT128_TYPE d_float128; 00329 FLOAT128_TYPE s_float128; 00330 int I_flag; 00331 int ftz; 00332 int unnormal; 00333 int new_trap_type; 00334 00335 // local index registers 00336 int lf1 = 5; 00337 int lf2 = 2; 00338 int lf3 = 3; 00339 int lf4 = 4; 00340 00341 unsigned int rrbpr; 00342 unsigned int rrbfr; 00343 00344 00345 #ifdef DEBUG_UNIX 00346 printf ("**** DEBUG: ENTERING fp_emulate () ****\n"); 00347 #endif 00348 00349 ps = &proc_state; 00350 EM_initialize_state (ps); 00351 // do not reg any exception handlers 00352 #ifndef unix 00353 f1 = 127; // initialize f1 (for MS compiler only; not really needed) 00354 #endif 00355 SIMD_instruction = 2; 00356 ei = (EM_uint_t)0; 00357 OpCode = (EM_uint64_t)0; 00358 00359 BundleLow = pbundle->BundleLow; 00360 BundleHigh = pbundle->BundleHigh; 00361 #ifdef DEBUG_UNIX 00362 printf ("fp_emulate DEBUG: Bundle High/Low = %Lx %Lx\n", 00363 BundleHigh, BundleLow); 00364 #endif 00365 00366 ISRlow = (EM_uint_t)(*pisr); 00367 00368 // FP status reg 00369 FPSR = *pfpsr; 00370 CFM = *pifs & CONST_FORMAT(0x03fffffffff); 00371 rrbpr = (unsigned int)((CFM >> 32) & 0x3f); 00372 rrbfr = (unsigned int)((CFM >> 25) & 0x7f); 00373 #ifdef DEBUG_UNIX 00374 printf ("fp_emulate DEBUG: FPSR = %Lx\n", FPSR); 00375 printf ("fp_emulate DEBUG: CFM = %Lx\n", CFM); 00376 printf ("fp_emulate DEBUG: rrbpr = %x rrbfr = %x\n", rrbpr, rrbfr); 00377 printf ("fp_emulate DEBUG: PREDS = %Lx\n", *ppreds); 00378 printf ("fp_emulate DEBUG: ISRlow = %x\n", ISRlow); 00379 #endif 00380 00381 // copy the FPSR into AR[0] 00382 ps->state_AR[0].uint_value = FPSR; 00383 00384 #ifdef DEBUG_UNIX 00385 OpCode = (BundleLow >> 5) & CONST_FORMAT(0x01ffffffffff); 00386 printf ("DEBUG: OpCode0 = %Lx\n", OpCode); 00387 OpCode = ((BundleHigh & CONST_FORMAT(0x07fffff)) << 18) | 00388 ((BundleLow >> 46) & CONST_FORMAT(0x03ffff)); 00389 printf ("DEBUG: OpCode1 = %Lx\n", OpCode); 00390 OpCode = (BundleHigh >> 23) & CONST_FORMAT(0x01ffffffffff); 00391 printf ("DEBUG: OpCode2 = %Lx\n", OpCode); 00392 #endif 00393 00394 // excepting instruction in bundle: slot 0, 1, or 2 00395 ei = (EM_uint_t)(((*pisr) >> 41) & 0x03); 00396 // cut the faulting instruction opcode (41 bits) 00397 if (ei == 0) { // no template for this case 00398 // OpCode = (BundleLow >> 5) & CONST_FORMAT(0x01ffffffffff); 00399 #ifndef unix 00400 # if DBG 00401 DbgPrint ("fp_emulate () Internal Error: template FXX\n"); 00402 # endif 00403 #else 00404 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error: template FXX\n"); 00405 return (FP_EMUL_ERROR); 00406 #endif 00407 } else if (ei == 1) { // templates: MFI, MFB 00408 OpCode = ((BundleHigh & CONST_FORMAT(0x07fffff)) << 18) | 00409 ((BundleLow >> 46) & CONST_FORMAT(0x03ffff)); 00410 #ifdef DEBUG_UNIX 00411 printf ("DEBUG: ei = 1 OpCode = %Lx\n", OpCode); 00412 #endif 00413 } else if (ei == 2) { // templates: MMF 00414 OpCode = (BundleHigh >> 23) & CONST_FORMAT(0x01ffffffffff); 00415 #ifdef DEBUG_UNIX 00416 printf ("DEBUG: ei = 2 OpCode = %Lx\n", OpCode); 00417 #endif 00418 } else { 00419 #ifndef unix 00420 # if DBG 00421 DbgPrint ("fp_emulate () Internal Error: instruction slot 3 is invalid\n"); 00422 # endif 00423 #else 00424 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error: \ 00425 instruction slot 3 is not valid\n"); 00426 return (FP_EMUL_ERROR); 00427 #endif 00428 } 00429 00430 // decode the instruction opcode; assume fp_emulate () is only called 00431 // for FP instructions that caused an FP fault or trap 00432 00433 // sf and qp have the same offset, for all the FP instructions 00434 sf = (EM_opcode_sf_type)((OpCode >> 34) & CONST_FORMAT(0x000000000003)); 00435 qp = (EM_uint_t)(OpCode & CONST_FORMAT(0x00000000003F)); 00436 if (qp >= 16) qp = 16 + (rrbpr + qp - 16) % 48; 00437 00438 // read predicate reg qp 00439 ps->state_PR[qp] = (EM_boolean_t)(((*ppreds) >> qp) & 0x01); 00440 00441 if (ps->state_PR[qp] == 0) { 00442 #ifdef DEBUG_UNIX 00443 printf ("fp_emulate DEBUG: QUALIFYING PREDICATE %d IS 0\n", qp); 00444 #endif 00445 #ifndef unix 00446 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00447 qualifying predicate PR[%2.2d] = 0\n", qp); 00448 #else 00449 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00450 qualifying predicate PR[%2.2d] = 0\n", qp); 00451 return (FP_EMUL_ERROR); 00452 #endif 00453 } 00454 00455 I_dis = sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 00456 ((FPSR >> 5) & 0x01); 00457 U_dis = sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 00458 ((FPSR >> 4) & 0x01); 00459 O_dis = sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 00460 ((FPSR >> 3) & 0x01); 00461 Z_dis = sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 00462 ((FPSR >> 2) & 0x01); 00463 D_dis = sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 00464 ((FPSR >> 1) & 0x01); 00465 V_dis = sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 00466 (FPSR & 0x01); 00467 00468 00469 if ((trap_type == FPFLT) && 00470 (ISRlow & 0x0088)) { // if this is a SWA fault 00471 00472 // this will occur only for unnormal inputs for Merced, or for 00473 // architecturally mandated conditions for divide and square root 00474 // reciprocal approximations 00475 00476 // decode the rest of the instruction 00477 if ((OpCode & F1_MIN_MASK) == F1_PATTERN) { 00478 // F1 instruction 00479 00480 // extract f4, f3, f2, and f1 00481 f4 = (EM_uint_t)((OpCode >> 27) & CONST_FORMAT(0x00000000007F)); 00482 if (f4 >= 32) f4 = 32 + (rrbfr + f4 - 32) % 96; 00483 f3 = (EM_uint_t)((OpCode >> 20) & CONST_FORMAT(0x00000000007F)); 00484 if (f3 >= 32) f3 = 32 + (rrbfr + f3 - 32) % 96; 00485 f2 = (EM_uint_t)((OpCode >> 13) & CONST_FORMAT(0x00000000007F)); 00486 if (f2 >= 32) f2 = 32 + (rrbfr + f2 - 32) % 96; 00487 f1 = (EM_uint_t)((OpCode >> 6) & CONST_FORMAT(0x00000000007F)); 00488 if (f1 >= 32) f1 = 32 + (rrbfr + f1 - 32) % 96; 00489 00490 #ifdef DEBUG_UNIX 00491 printf ("DEBUG BEF. F1 SWA FAULT: f1 f2 f3 f4 = %x %x %x %x\n", f1, f2, f3, f4); 00492 #endif 00493 00494 // get source floating-point reg values 00495 ps->state_FR[lf2] = FP128ToFPReg (get_fp_register (f2, fp_state)); 00496 ps->state_FR[lf3] = FP128ToFPReg (get_fp_register (f3, fp_state)); 00497 ps->state_FR[lf4] = FP128ToFPReg (get_fp_register (f4, fp_state)); 00498 00499 #ifdef DEBUG_UNIX 00500 printf ("DEBUG BEFORE F1 SWA FAULT: ps->state_FR[lf2] = %x %x %Lx\n", 00501 ps->state_FR[lf2].sign, ps->state_FR[lf2].exponent, ps->state_FR[lf2].significand); 00502 printf ("DEBUG BEFORE F1 SWA FAULT: ps->state_FR[lf3] = %x %x %Lx\n", 00503 ps->state_FR[lf3].sign, ps->state_FR[lf3].exponent, ps->state_FR[lf3].significand); 00504 printf ("DEBUG BEFORE F1 SWA FAULT: ps->state_FR[lf4] = %x %x %Lx\n", 00505 ps->state_FR[lf4].sign, ps->state_FR[lf4].exponent, ps->state_FR[lf4].significand); 00506 #endif 00507 00508 switch (OpCode & F1_MASK) { 00509 00510 case FMA_PATTERN: 00511 SIMD_instruction = 0; 00512 fma (ps, pc_sf, sf, qp, lf1, lf3, lf4, lf2); 00513 break; 00514 case FMA_S_PATTERN: 00515 SIMD_instruction = 0; 00516 fma (ps, pc_s, sf, qp, lf1, lf3, lf4, lf2); 00517 break; 00518 case FMA_D_PATTERN: 00519 SIMD_instruction = 0; 00520 fma (ps, pc_d, sf, qp, lf1, lf3, lf4, lf2); 00521 break; 00522 case FPMA_PATTERN: 00523 SIMD_instruction = 1; 00524 fpma (ps, sf, qp, lf1, lf3, lf4, lf2); 00525 break; 00526 00527 case FMS_PATTERN: 00528 SIMD_instruction = 0; 00529 fms (ps, pc_sf, sf, qp, lf1, lf3, lf4, lf2); 00530 break; 00531 case FMS_S_PATTERN: 00532 SIMD_instruction = 0; 00533 fms (ps, pc_s, sf, qp, lf1, lf3, lf4, lf2); 00534 break; 00535 case FMS_D_PATTERN: 00536 SIMD_instruction = 0; 00537 fms (ps, pc_d, sf, qp, lf1, lf3, lf4, lf2); 00538 break; 00539 case FPMS_PATTERN: 00540 SIMD_instruction = 1; 00541 fpms (ps, sf, qp, lf1, lf3, lf4, lf2); 00542 break; 00543 00544 case FNMA_PATTERN: 00545 SIMD_instruction = 0; 00546 fnma (ps, pc_sf, sf, qp, lf1, lf3, lf4, lf2); 00547 break; 00548 case FNMA_S_PATTERN: 00549 SIMD_instruction = 0; 00550 fnma (ps, pc_s, sf, qp, lf1, lf3, lf4, lf2); 00551 break; 00552 case FNMA_D_PATTERN: 00553 SIMD_instruction = 0; 00554 fnma (ps, pc_d, sf, qp, lf1, lf3, lf4, lf2); 00555 break; 00556 case FPNMA_PATTERN: 00557 SIMD_instruction = 1; 00558 fpnma (ps, sf, qp, lf1, lf3, lf4, lf2); 00559 break; 00560 default: 00561 // unrecognized instruction type 00562 #ifndef unix 00563 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00564 instruction opcode %8x %8x not recognized\n", OpCode); 00565 #else 00566 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00567 instruction opcode %Lx not recognized\n", OpCode); 00568 return (FP_EMUL_ERROR); 00569 #endif 00570 00571 } 00572 00573 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 00574 00575 // successful emulation 00576 // set the destination floating-point reg value 00577 #ifdef DEBUG_UNIX 00578 printf ("DEBUG AFTER F1 SWA FAULT: ps->state_FR[lf1] = %x %x %Lx\n", 00579 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 00580 ps->state_FR[lf1].significand); 00581 #endif 00582 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 00583 if (f1 < 32) 00584 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 00585 else 00586 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 00587 00588 *pfpsr = ps->state_AR[0].uint_value; 00589 return (TRUE); 00590 00591 00592 } else if ((OpCode & F4_MIN_MASK) == F4_PATTERN) { 00593 // F4 instruction 00594 00595 // extract p2, f3, f2, and p1 00596 p2 = (EM_uint_t)((OpCode >> 27) & CONST_FORMAT(0x00000000003f)); 00597 if (p2 >= 16) p2 = 16 + (rrbpr + p2 - 16) % 48; 00598 f3 = (EM_uint_t)((OpCode >> 20) & CONST_FORMAT(0x00000000007F)); 00599 if (f3 >= 32) f3 = 32 + (rrbfr + f3 - 32) % 96; 00600 f2 = (EM_uint_t)((OpCode >> 13) & CONST_FORMAT(0x00000000007F)); 00601 if (f2 >= 32) f2 = 32 + (rrbfr + f2 - 32) % 96; 00602 p1 = (EM_uint_t)((OpCode >> 6) & CONST_FORMAT(0x00000000003F)); 00603 if (p1 >= 16) p1 = 16 + (rrbpr + p1 - 16) % 48; 00604 00605 // get source floating-point reg values 00606 ps->state_FR[lf2] = FP128ToFPReg (get_fp_register (f2, fp_state)); 00607 ps->state_FR[lf3] = FP128ToFPReg (get_fp_register (f3, fp_state)); 00608 #ifdef DEBUG_UNIX 00609 printf ("DEBUG BEFORE F4 SWA FAULT: ps->state_FR[lf2] = %x %x %Lx\n", 00610 ps->state_FR[lf2].sign, ps->state_FR[lf2].exponent, ps->state_FR[lf2].significand); 00611 printf ("DEBUG BEFORE F4 SWA FAULT: ps->state_FR[lf3] = %x %x %Lx\n", 00612 ps->state_FR[lf3].sign, ps->state_FR[lf3].exponent, ps->state_FR[lf3].significand); 00613 #endif 00614 00615 switch (OpCode & F4_MASK) { 00616 00617 case FCMP_EQ_PATTERN: 00618 SIMD_instruction = 0; 00619 fcmp_eq (ps, ctype_none, sf, qp, p1, p2, lf2, lf3); 00620 break; 00621 case FCMP_LT_PATTERN: 00622 SIMD_instruction = 0; 00623 fcmp_lt (ps, ctype_none, sf, qp, p1, p2, lf2, lf3); 00624 break; 00625 case FCMP_LE_PATTERN: 00626 SIMD_instruction = 0; 00627 fcmp_le (ps, ctype_none, sf, qp, p1, p2, lf2, lf3); 00628 break; 00629 case FCMP_UNORD_PATTERN: 00630 SIMD_instruction = 0; 00631 fcmp_unord (ps, ctype_none, sf, qp, p1, p2, lf2, lf3); 00632 break; 00633 00634 case FCMP_EQ_UNC_PATTERN: 00635 SIMD_instruction = 0; 00636 fcmp_eq (ps, fctypeUNC, sf, qp, p1, p2, lf2, lf3); 00637 break; 00638 case FCMP_LT_UNC_PATTERN: 00639 SIMD_instruction = 0; 00640 fcmp_lt (ps, fctypeUNC, sf, qp, p1, p2, lf2, lf3); 00641 break; 00642 case FCMP_LE_UNC_PATTERN: 00643 SIMD_instruction = 0; 00644 fcmp_le (ps, fctypeUNC, sf, qp, p1, p2, lf2, lf3); 00645 break; 00646 case FCMP_UNORD_UNC_PATTERN: 00647 SIMD_instruction = 0; 00648 fcmp_unord (ps, fctypeUNC, sf, qp, p1, p2, lf2, lf3); 00649 break; 00650 default: 00651 // unrecognized instruction type 00652 #ifndef unix 00653 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00654 instruction opcode %8x %8x not recognized\n", OpCode); 00655 #else 00656 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00657 instruction opcode %Lx not recognized\n", OpCode); 00658 return (FP_EMUL_ERROR); 00659 #endif 00660 00661 } 00662 00663 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 00664 00665 // successful emulation 00666 // set the destination predicate reg values 00667 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p1)); 00668 *ppreds |= (((EM_uint64_t)(ps->state_PR[p1] & 0x01)) << (EM_uint_t)p1); 00669 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 00670 *ppreds |= (((EM_uint64_t)(ps->state_PR[p2] & 0x01)) << (EM_uint_t)p2); 00671 *pfpsr = ps->state_AR[0].uint_value; 00672 #ifdef DEBUG_UNIX 00673 printf ("DEBUG AFTER F4 SWA FAULT: *ppreds = %Lx\n", (*ppreds)); 00674 #endif 00675 return (TRUE); 00676 00677 } else if ((OpCode & F6_MIN_MASK) == F6_PATTERN) { 00678 // F6 instruction 00679 switch (OpCode & F6_MASK) { 00680 00681 case FRCPA_PATTERN: 00682 SIMD_instruction = 0; 00683 break; 00684 case FPRCPA_PATTERN: 00685 SIMD_instruction = 1; 00686 break; 00687 default: 00688 // unrecognized instruction type 00689 #ifndef unix 00690 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00691 instruction opcode %8x %8x not recognized\n", OpCode); 00692 #else 00693 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 00694 instruction opcode %Lx not recognized\n", OpCode); 00695 return (FP_EMUL_ERROR); 00696 #endif 00697 } 00698 00699 // extract the ftz bit 00700 ftz = (int)((FPSR >> 6) & 0x01); 00701 00702 // extract the rounding mode 00703 rc = (EM_sf_rc_type)((FPSR >> (6 + 4 + 13 * (EM_uint_t)sf)) & 0x03); 00704 00705 // extract p2, f3, f2, and f1 00706 p2 = (EM_uint_t)((OpCode >> 27) & CONST_FORMAT(0x00000000003f)); 00707 if (p2 >= 16) p2 = 16 + (rrbpr + p2 - 16) % 48; 00708 f3 = (EM_uint_t)((OpCode >> 20) & CONST_FORMAT(0x00000000007F)); 00709 if (f3 >= 32) f3 = 32 + (rrbfr + f3 - 32) % 96; 00710 f2 = (EM_uint_t)((OpCode >> 13) & CONST_FORMAT(0x00000000007F)); 00711 if (f2 >= 32) f2 = 32 + (rrbfr + f2 - 32) % 96; 00712 f1 = (EM_uint_t)((OpCode >> 6) & CONST_FORMAT(0x00000000007F)); 00713 if (f1 >= 32) f1 = 32 + (rrbfr + f1 - 32) % 96; 00714 00715 // get source floating-point reg values 00716 ps->state_FR[lf2] = FP128ToFPReg (get_fp_register (f2, fp_state)); 00717 ps->state_FR[lf3] = FP128ToFPReg (get_fp_register (f3, fp_state)); 00718 #ifdef DEBUG_UNIX 00719 printf ("DEBUG BEFORE F6 SWA FAULT: ps->state_FR[lf2] = %x %x %Lx\n", 00720 ps->state_FR[lf2].sign, ps->state_FR[lf2].exponent, ps->state_FR[lf2].significand); 00721 printf ("DEBUG BEFORE F6 SWA FAULT: ps->state_FR[lf3] = %x %x %Lx\n", 00722 ps->state_FR[lf3].sign, ps->state_FR[lf3].exponent, ps->state_FR[lf3].significand); 00723 #endif 00724 00725 switch (OpCode & F6_MASK) { 00726 00727 case FRCPA_PATTERN: 00728 00729 // extract sign, exponent, and significand of a 00730 sign_a = (EM_uint_t)ps->state_FR[lf2].sign; 00731 exponent_a = (EM_int_t)ps->state_FR[lf2].exponent; 00732 significand_a = ps->state_FR[lf2].significand; 00733 00734 // extract sign, exponent, and significand of b 00735 // note that b cannot be 0 00736 sign_b = (EM_uint_t)ps->state_FR[lf3].sign; 00737 exponent_b = (EM_int_t)ps->state_FR[lf3].exponent; 00738 significand_b = ps->state_FR[lf3].significand; 00739 00740 // if any of a or b is zero or pseudo-zero, return the result 00741 if (significand_a == 0 || significand_b == 0) { 00742 00743 frcpa (ps, sf, qp, lf1, p2, lf2, lf3); 00744 00745 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 00746 00747 *pfpsr = ps->state_AR[0].uint_value; 00748 00749 // set the destination floating-point and predicate reg values 00750 #ifdef DEBUG_UNIX 00751 printf ("DEBUG AFTER F6 SWA FAULT FOR a OR b ZERO/PSEUDO-ZERO: ps->state_FR[lf1] = %x %x %Lx\n", 00752 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 00753 ps->state_FR[lf1].significand); 00754 #endif 00755 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 00756 if (f1 < 32) 00757 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 00758 else 00759 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 00760 // ps->state_PR[p2] = 0 for a or b zero or pseudo-zero 00761 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 00762 #ifdef DEBUG_UNIX 00763 printf ("DEBUG AFTER F6 SWA FAULT FOR a OR b ZERO/PSEUDO-ZERO: p2 = %x\n", p2); 00764 #endif 00765 #ifdef DEBUG_UNIX 00766 printf ("DEBUG AFTER F6 SWA FAULT FOR a OR b ZERO/PSEUDO-ZERO: *ppreds = %Lx\n", 00767 *ppreds); 00768 #endif 00769 return (TRUE); 00770 00771 } 00772 00773 // if any of a or b is infinity, return the result 00774 if (exponent_a == 0x1ffff && 00775 significand_a == CONST_FORMAT(0x8000000000000000) || 00776 exponent_b == 0x1ffff && 00777 significand_b == CONST_FORMAT(0x8000000000000000)) { 00778 00779 frcpa (ps, sf, qp, lf1, p2, lf2, lf3); 00780 00781 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 00782 // will never happen 00783 00784 *pfpsr = ps->state_AR[0].uint_value; 00785 00786 // set the destination floating-point and predicate reg values 00787 #ifdef DEBUG_UNIX 00788 printf ("DEBUG AFTER F6 SWA FAULT FOR a OR b INFINITY: ps->state_FR[lf1] = %x %x %Lx\n", 00789 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 00790 ps->state_FR[lf1].significand); 00791 #endif 00792 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 00793 if (f1 < 32) 00794 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 00795 else 00796 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 00797 00798 // ps->state_PR[p2] = 0; clear the output predicate for a or b inf 00799 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 00800 #ifdef DEBUG_UNIX 00801 printf ("DEBUG AFTER F6 SWA FAULT FOR a OR b INFINITY: p2 = %x\n", p2); 00802 #endif 00803 #ifdef DEBUG_UNIX 00804 printf ("DEBUG AFTER F6 SWA FAULT FOR a OR b INFINITY: *ppreds = %Lx\n", 00805 *ppreds); 00806 #endif 00807 return (TRUE); 00808 00809 } 00810 00811 // a and b are not [pseudo]0, and are not special (a and be are 00812 // normal or unnormal [denormal] floating-point numbers) 00813 00814 if (exponent_a == 0) exponent_a = 0xc001; 00815 // this covers double-extended real [pseudo-]denormals 00816 // un-bias the exponent of a 00817 exponent_a = exponent_a - 0xffff; 00818 00819 if (exponent_b == 0) exponent_b = 0xc001; 00820 // this covers double-extended real [pseudo-]denormals 00821 // un-bias the exponent of b 00822 exponent_b = exponent_b - 0xffff; 00823 00824 unnormal = 0; 00825 // check whether a is unnormal; will set D in FPSR.sfx if true 00826 // and denormal exceptions are disabled 00827 if (!(significand_a & CONST_FORMAT(0x8000000000000000))) { 00828 unnormal = 1; 00829 } 00830 // check whether b is unnormal; will set D in FPSR.sfx if true 00831 // and denormal exceptions are disabled 00832 if (!(significand_b & CONST_FORMAT(0x8000000000000000))) { 00833 unnormal = 1; 00834 } 00835 #ifdef DEBUG_UNIX 00836 if (unnormal) printf ("DEBUG F6 FRCPA SWA FAULT: unnormal = 1\n"); 00837 #endif 00838 00839 if (unnormal && !D_dis) { 00840 ISRlow = 0x0002; // denormal bit set 00841 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 00842 return (FALSE); // will raise D fault 00843 } 00844 00845 // normalize a (even if exponent_a becomes less than e_min) 00846 while (!(significand_a & CONST_FORMAT(0x8000000000000000))) { 00847 significand_a = significand_a << 1; 00848 exponent_a--; 00849 } 00850 00851 // normalize b (even if exponent_b becomes less than e_min) 00852 while (!(significand_b & CONST_FORMAT(0x8000000000000000))) { 00853 significand_b = significand_b << 1; 00854 exponent_b--; 00855 } 00856 00857 // Case (I) and Case (II) 00858 // |a/b| > MAXFP ==> might have O or I traps 00859 00860 if ((exponent_b <= exponent_a - FP_REG_EMAX - 2) || 00861 (exponent_b == exponent_a - FP_REG_EMAX - 1) && 00862 (significand_a >= significand_b)) { 00863 00864 #ifdef DEBUG_UNIX 00865 printf ("DEBUG: BEGIN F6 SWA FAULT CASE (I) - (II)\n"); 00866 #endif 00867 00868 // scale a to a' and b to b', such that c' = a'/ b' will be 00869 // normal 00870 00871 // set the scaled (possibly normalized) value of a' (sign ok) 00872 ps->state_FR[lf2].exponent = (EM_uint_t)(0xffff); 00873 ps->state_FR[lf2].significand = significand_a; 00874 00875 // set the scaled (possibly normalized) value of b' (sign ok) 00876 ps->state_FR[lf3].exponent = (EM_uint_t)(0xffff); 00877 ps->state_FR[lf3].significand = significand_b; 00878 00879 // convert a' and b' to FLOAT128 00880 a_float128 = FPRegToFP128 (ps->state_FR[lf2]); 00881 b_float128 = FPRegToFP128 (ps->state_FR[lf3]); 00882 00883 // invoke the divide algorithm to calculate c' = a' / b'; 00884 // the algorithm uses sf0 with user settings, and sf1 with 00885 // rn, 64-bits, wre, traps disabled; 00886 // copy FPSR.sfx with clear flags to FPSR1.sf0; rn,64,wre in sf1 00887 FPSR1 = (EM_uint64_t)((FPSR >> ((EM_uint_t)sf * 13)) & 0x01fc0) 00888 | 0x000000000270003f; // set sf0,sf1 and disable fp exceptions 00889 thmF (&FPSR1, &a_float128, &b_float128, &c_float128); 00890 I_flag = FPSR1 & 0x40000 ? 1 : 0; 00891 00892 if (O_dis && (I_dis || !I_flag)) { 00893 00894 // overflow exceptions are disabled and (inexact exceptions 00895 // are disabled or the result is exact) => return the 00896 // IEEE mandated result 00897 00898 if (sign_a ^ sign_b) { // opposite signs 00899 if (rc == rc_rn || rc == rc_rm) { 00900 // -Inf 00901 ps->state_FR[lf1].sign = 1; 00902 ps->state_FR[lf1].exponent = 0x1ffff; 00903 ps->state_FR[lf1].significand = 00904 CONST_FORMAT(0x8000000000000000); 00905 } else { // if (rc == rc_rp || rc == rc_rz) 00906 // -MAX_FP_REG_VAL 00907 ps->state_FR[lf1].sign = 1; 00908 ps->state_FR[lf1].exponent = 0x1fffe; 00909 ps->state_FR[lf1].significand = 00910 CONST_FORMAT(0xffffffffffffffff); 00911 } 00912 } else { // same sign 00913 if (rc == rc_rn || rc == rc_rp) { 00914 // Inf 00915 ps->state_FR[lf1].sign = 0; 00916 ps->state_FR[lf1].exponent = 0x1ffff; 00917 ps->state_FR[lf1].significand = 00918 CONST_FORMAT(0x8000000000000000); 00919 } else { // if (rc == rc_rm || rc == rc_rz) 00920 // MAX_FP_REG_VAL 00921 ps->state_FR[lf1].sign = 0; 00922 ps->state_FR[lf1].exponent = 0x1fffe; 00923 ps->state_FR[lf1].significand = 00924 CONST_FORMAT(0xffffffffffffffff); 00925 } 00926 } 00927 00928 // set D in FPSR.sfx if any of a and b was unnormal 00929 if (unnormal) { 00930 // set D = 1 in *pfpsr 00931 *pfpsr = *pfpsr | 00932 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 00933 } 00934 00935 // set I = 1 and O = 1 in *pfpsr 00936 // set O = 1 00937 *pfpsr = *pfpsr | 00938 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 10)); 00939 // set I = 1 00940 *pfpsr = *pfpsr | 00941 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 00942 // set the destination floating-point and predicate reg values 00943 #ifdef DEBUG_UNIX 00944 printf ("DEBUG Case (I), (II) AFTER F6 SWA FAULT 1: ps->state_FR[lf1] = %x %x %Lx\n", 00945 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 00946 ps->state_FR[lf1].significand); 00947 #endif 00948 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 00949 if (f1 < 32) 00950 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 00951 else 00952 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 00953 00954 // ps->state_PR[p2] = 0; clear the output predicate 00955 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 00956 #ifdef DEBUG_UNIX 00957 printf ("DEBUG Case (I), (II) AFTER F6 SWA FAULT 1 a: *ppreds = %Lx\n", 00958 *ppreds); 00959 #endif 00960 return (TRUE); 00961 00962 } else if (!O_dis) { 00963 00964 // overflow exceptions are enabled => compute the result, and 00965 // propagate an overflow exception (deliver the result with 00966 // the exponent mod 2^17 00967 00968 // convert c' (normal fp#) from FLOAT128 to EM_fp_reg_type 00969 ps->state_FR[lf1] = FP128ToFPReg (c_float128); 00970 // scale c' to c and take the mod 2^17 exponent 00971 exponent_c = (EM_uint_t)ps->state_FR[lf1].exponent + 00972 exponent_a - exponent_b; 00973 ISRlow = 0x0801; // O = 1 00974 ps->state_FR[lf1].exponent = exponent_c & 0x1ffff; 00975 00976 // determine fpa, and set the values of I and fpa in ISRlow 00977 // if (I_flag == 0) fpa = 0 00978 if (I_flag == 1) { 00979 00980 // calculate d' = |b'| * |c'| - |a'| to determine fpa 00981 c_float128.hiFlt64 = c_float128.hiFlt64 & 00982 CONST_FORMAT(0x000000000001ffff); // take |c'| 00983 b_float128.hiFlt64 = b_float128.hiFlt64 & 00984 CONST_FORMAT(0x000000000001ffff); // take |b'| 00985 a_float128.hiFlt64 = a_float128.hiFlt64 & 00986 CONST_FORMAT(0x000000000001ffff); // take |a'| 00987 00988 FPSR1 = CONST_FORMAT(0x00000000000003bf); // rn,64,wre=1,dis 00989 run_fms (&FPSR1, &d_float128, &b_float128, &c_float128, 00990 &a_float128); // d' = |b'| * |c'| - |a'| 00991 00992 if (d_float128.hiFlt64 & CONST_FORMAT(0x020000)) { 00993 // if d' < 0, I = 1 and fpa = 0 00994 ISRlow = ISRlow | 0x2000; 00995 } else { 00996 // if d' > 0, I = 1 and fpa = 1 00997 ISRlow = ISRlow | 0x6000; 00998 } 00999 01000 } 01001 01002 // set the destination floating-point and predicate reg values 01003 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 01004 01005 #ifdef DEBUG_UNIX 01006 printf ("DEBUG Case (I), (II) AFTER F6 SWA FAULT 2: ps->state_FR[lf1] = %x %x %Lx\n", 01007 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01008 ps->state_FR[lf1].significand); 01009 #endif 01010 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01011 if (f1 < 32) 01012 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01013 else 01014 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01015 01016 // ps->state_PR[p2] = 0; clear the output predicate 01017 // update (*ppreds) [as if O disabled] 01018 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01019 #ifdef DEBUG_UNIX 01020 printf ("DEBUG Case (I), (II) AFTER F6 SWA FAULT 2 a: *ppreds = %Lx\n", *ppreds); 01021 #endif 01022 // update *pfpsr 01023 // set D in FPSR.sfx if any of a and b was unnormal 01024 if (unnormal) { 01025 // set D = 1 in *pfpsr 01026 *pfpsr = *pfpsr | 01027 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01028 } 01029 // set O = 1 01030 *pfpsr = *pfpsr | 01031 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 10)); 01032 // set I = 1 if I_flag = 1 01033 if (I_flag) *pfpsr = *pfpsr | 01034 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01035 01036 // caller will advance instruction pointer 01037 return (FALSE | FAULT_TO_TRAP); // will raise O trap 01038 01039 } else { // if (!I_dis && I_flag) 01040 01041 // overflow exceptions are disabled, but the inexact 01042 // exceptions are enabled and the result is inexact => 01043 // provide the IEEE mandated result, and 01044 // propagate an inexact exception 01045 01046 if (sign_a ^ sign_b) { // opposite signs 01047 if (rc == rc_rn || rc == rc_rm) { 01048 // -Inf 01049 ps->state_FR[lf1].sign = 1; 01050 ps->state_FR[lf1].exponent = 0x1ffff; 01051 ps->state_FR[lf1].significand = 01052 CONST_FORMAT(0x8000000000000000); 01053 fpa = 1; 01054 } else { // if (rc == rc_rp || rc == rc_rz) 01055 // -MAX_FP_REG_VAL 01056 ps->state_FR[lf1].sign = 1; 01057 ps->state_FR[lf1].exponent = 0x1fffe; 01058 ps->state_FR[lf1].significand = 01059 CONST_FORMAT(0xffffffffffffffff); 01060 fpa = 0; 01061 } 01062 } else { // same sign 01063 if (rc == rc_rn || rc == rc_rp) { 01064 // Inf 01065 ps->state_FR[lf1].sign = 0; 01066 ps->state_FR[lf1].exponent = 0x1ffff; 01067 ps->state_FR[lf1].significand = 01068 CONST_FORMAT(0x8000000000000000); 01069 fpa = 1; 01070 } else { // if (rc == rc_rm || rc == rc_rz) 01071 // MAX_FP_REG_VAL 01072 ps->state_FR[lf1].sign = 0; 01073 ps->state_FR[lf1].exponent = 0x1fffe; 01074 ps->state_FR[lf1].significand = 01075 CONST_FORMAT(0xffffffffffffffff); 01076 fpa = 0; 01077 } 01078 } 01079 01080 ISRlow = 0x2001 | (fpa == 1 ? 0x4000 : 0x0000); // I = 1 and fpa 01081 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 01082 01083 // set the destination floating-point and predicate reg values 01084 #ifdef DEBUG_UNIX 01085 printf ("DEBUG Case (I), (II) AFTER F6 SWA FAULT 3: ps->state_FR[lf1] = %x %x %Lx\n", 01086 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01087 ps->state_FR[lf1].significand); 01088 #endif 01089 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01090 if (f1 < 32) 01091 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01092 else 01093 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01094 01095 // ps->state_PR[p2] = 0; clear the output predicate 01096 // update *ppreds [as if O disabled] 01097 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01098 #ifdef DEBUG_UNIX 01099 printf ("DEBUG Case (I), (II) AFTER F6 SWA FAULT 3 a: *ppreds = %Lx\n", 01100 *ppreds); 01101 #endif 01102 // update *pfpsr 01103 // set D in FPSR.sfx if any of a and b was unnormal 01104 if (unnormal) { 01105 // set D = 1 in *pfpsr 01106 *pfpsr = *pfpsr | 01107 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01108 } 01109 // set I = 1 and O = 1 in *pfpsr 01110 // set O = 1 01111 *pfpsr = *pfpsr | 01112 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 10)); 01113 // set I = 1 01114 *pfpsr = *pfpsr | 01115 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01116 01117 // caller will advance instruction pointer 01118 return (FALSE | FAULT_TO_TRAP); // will raise I trap 01119 01120 } 01121 01122 // Case (III), Case (IV), Case (V), Case (VI), and Case (VII) 01123 // a/b is normal, non-zero ==> might have an I trap 01124 01125 } else if ((exponent_b == exponent_a - FP_REG_EMAX - 1) && 01126 (significand_a < significand_b) || 01127 (exponent_b == exponent_a - FP_REG_EMAX) || 01128 (exponent_a - FP_REG_EMAX + 1 <= exponent_b ) && 01129 (exponent_b <= exponent_a - FP_REG_EMIN - 2) && 01130 ((exponent_a <= FP_REG_EMIN + N64 - 1) || 01131 (exponent_b <= FP_REG_EMIN - 1) || 01132 (exponent_b >= FP_REG_EMAX - 2)) || 01133 (exponent_b == exponent_a - FP_REG_EMIN - 1) || 01134 (exponent_b == exponent_a - FP_REG_EMIN) && 01135 (significand_a >= significand_b)) { 01136 01137 #ifdef DEBUG_UNIX 01138 printf ("DEBUG: BEGIN F6 SWA FAULT CASE (III) - (VII)\n"); 01139 #endif 01140 01141 // scale a to a' and b to b', such that c' = a'/ b' will be 01142 // normal 01143 01144 // set the scaled (possibly normalized) value of a' (sign ok) 01145 ps->state_FR[lf2].exponent = (EM_uint_t)(0xffff); 01146 ps->state_FR[lf2].significand = significand_a; 01147 01148 // set the scaled (possibly normalized) value of b' (sign ok) 01149 ps->state_FR[lf3].exponent = (EM_uint_t)(0xffff); 01150 ps->state_FR[lf3].significand = significand_b; 01151 01152 // convert a' and b' to FLOAT128 01153 a_float128 = FPRegToFP128 (ps->state_FR[lf2]); 01154 b_float128 = FPRegToFP128 (ps->state_FR[lf3]); 01155 01156 // invoke the divide algorithm to calculate c' = a' / b'; 01157 // the algorithm uses sf0 with user settings, and sf1 with 01158 // rn, 64-bits, wre, traps disabled; 01159 // copy FPSR.sfx with clear flags to FPSR1.sf0; rn,64,wre in sf1 01160 FPSR1 = (EM_uint64_t)((FPSR >> ((EM_uint_t)sf * 13)) & 0x01fc0) 01161 | 0x000000000270003f; // set sf0,sf1 and disable fp exceptions 01162 thmF (&FPSR1, &a_float128, &b_float128, &c_float128); 01163 I_flag = FPSR1 & 0x40000 ? 1 : 0; 01164 01165 // set the result 01166 // convert c' (normal fp#) from FLOAT128 to EM_fp_reg_type 01167 ps->state_FR[lf1] = FP128ToFPReg (c_float128); 01168 // scale c' to c 01169 ps->state_FR[lf1].exponent = (EM_uint_t)ps->state_FR[lf1].exponent 01170 + exponent_a - exponent_b; 01171 01172 if (I_dis || !I_flag) { 01173 01174 // set D in FPSR.sfx if any of a and b was unnormal 01175 if (unnormal) { 01176 // set D = 1 in *pfpsr 01177 *pfpsr = *pfpsr | 01178 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01179 } 01180 01181 // set I in *pfpsr 01182 if (I_flag) *pfpsr = *pfpsr | 01183 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01184 // set the destination floating-point and predicate reg values 01185 #ifdef DEBUG_UNIX 01186 printf ("DEBUG Case (III) - (VII) AFTER F6 SWA FAULT 4: ps->state_FR[lf1] = %x %x %Lx\n", 01187 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01188 ps->state_FR[lf1].significand); 01189 #endif 01190 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01191 if (f1 < 32) 01192 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01193 else 01194 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01195 01196 // ps->state_PR[p2] = 0; clear the output predicate 01197 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01198 #ifdef DEBUG_UNIX 01199 printf ("DEBUG Case (III) - (VII) AFTER F6 SWA FAULT 4 a: *ppreds = %Lx\n", 01200 *ppreds); 01201 #endif 01202 return (TRUE); 01203 01204 } else { // if (!I_dis && I_flag) 01205 01206 // calculate d' = |b'| * |c'| - |a'| to determine fpa 01207 c_float128.hiFlt64 = c_float128.hiFlt64 & 01208 CONST_FORMAT(0x000000000001ffff); // take |c'| 01209 b_float128.hiFlt64 = b_float128.hiFlt64 & 01210 CONST_FORMAT(0x000000000001ffff); // take |b'| 01211 a_float128.hiFlt64 = a_float128.hiFlt64 & 01212 CONST_FORMAT(0x000000000001ffff); // take |a'| 01213 FPSR1 = CONST_FORMAT(0x00000000000003bf); // rn,64,wre=1,dis 01214 run_fms (&FPSR1, &d_float128, &b_float128, &c_float128, 01215 &a_float128); // d' = |b'| * |c'| - |a'| 01216 01217 if (d_float128.hiFlt64 & CONST_FORMAT(0x0000000000020000)) { 01218 // if d' < 0, I = 1 and fpa = 0 01219 ISRlow = 0x2001; 01220 } else { 01221 // if d' > 0, I = 1 and fpa = 1 01222 ISRlow = 0x6001; 01223 } 01224 01225 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 01226 01227 // set the destination floating-point and predicate reg values 01228 #ifdef DEBUG_UNIX 01229 printf ("DEBUG Case (III) - (VII) AFTER F6 SWA FAULT 5: ps->state_FR[lf1] = %x %x %Lx\n", 01230 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01231 ps->state_FR[lf1].significand); 01232 #endif 01233 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01234 if (f1 < 32) 01235 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01236 else 01237 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01238 01239 // ps->state_PR[p2] = 0; clear the output predicate 01240 // update *ppreds 01241 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01242 #ifdef DEBUG_UNIX 01243 printf ("DEBUG Case (III) -(VII) AFTER F6 SWA FAULT 5 a: *ppreds = %Lx\n", 01244 *ppreds); 01245 #endif 01246 // update *pfpsr 01247 // set D = 1 in FPSR.sfx if any of a and b was unnormal 01248 if (unnormal) { 01249 // set D = 1 in *pfpsr 01250 *pfpsr = *pfpsr | 01251 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01252 } 01253 // set I = 1 in *pfpsr 01254 if (I_flag) *pfpsr = *pfpsr | 01255 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01256 01257 // caller will advance instruction pointer 01258 return (FALSE | FAULT_TO_TRAP); // will raise I trap 01259 01260 } 01261 01262 // Case (VIII), Case (IX), Case (X), Case (XI), and Case (XII) 01263 // 0 < |a/b| < SMALLEST NORMAL ==> might have U or I traps 01264 // As a/b cannot contain more than N-1 consecutive 1's or N-1 01265 // consecutive 0's, even if fpa = 1 is added in the first IEEE 01266 // rounding, that does not change the quotient's exponent 01267 01268 } else if ( (exponent_b == exponent_a - FP_REG_EMIN) && 01269 (significand_a < significand_b) || 01270 (exponent_b >= exponent_a - FP_REG_EMIN + 1)) { 01271 01272 #ifdef DEBUG_UNIX 01273 printf ("DEBUG: BEGIN F6 SWA FAULT CASE (VIII) - (XII)\n"); 01274 #endif 01275 // scale a to a' and b to b', such that c' = a'/ b' will be 01276 // normal 01277 01278 // set the scaled (possibly normalized) value of a' (sign ok) 01279 ps->state_FR[lf2].exponent = (EM_uint_t)(0xffff); 01280 ps->state_FR[lf2].significand = significand_a; 01281 01282 // set the scaled (possibly normalized) value of b' (sign ok) 01283 ps->state_FR[lf3].exponent = (EM_uint_t)(0xffff); 01284 ps->state_FR[lf3].significand = significand_b; 01285 01286 // convert a' and b' to FLOAT128 01287 a_float128 = FPRegToFP128 (ps->state_FR[lf2]); 01288 b_float128 = FPRegToFP128 (ps->state_FR[lf3]); 01289 01290 // invoke the divide algorithm to calculate c' = a' / b'; 01291 // the algorithm uses sf0 with user settings, and sf1 with 01292 // rn, 64-bits, wre, traps disabled; 01293 // copy FPSR.sfx with clear flags to FPSR1.sf0; rn,64,wre in sf1 01294 FPSR1 = (EM_uint64_t)((FPSR >> ((EM_uint_t)sf * 13)) & 0x01fc0) 01295 | 0x000000000270003f; // set sf0,sf1 and disable fp exceptions 01296 thmF (&FPSR1, &a_float128, &b_float128, &c_float128); 01297 I_flag = FPSR1 & 0x40000 ? 1 : 0; 01298 01299 c1_float128 = c_float128; 01300 01301 if (I_flag == 1) { 01302 // calculate d' = |b'| * |c'| - |a'| to determine fpa (used 01303 // only if a U or I exception will be raised) 01304 c1_float128.hiFlt64 = c1_float128.hiFlt64 & 01305 CONST_FORMAT(0x000000000001ffff); // take |c'| 01306 b_float128.hiFlt64 = b_float128.hiFlt64 & 01307 CONST_FORMAT(0x000000000001ffff); // take |b'| 01308 a_float128.hiFlt64 = a_float128.hiFlt64 & 01309 CONST_FORMAT(0x000000000001ffff); // take |a'| 01310 FPSR1 = CONST_FORMAT(0x00000000000003bf); // rn,64,wre=1,dis 01311 run_fms (&FPSR1, &d_float128, &b_float128, &c1_float128, 01312 &a_float128); // d' = |b'| * |c'| - |a'| 01313 if (d_float128.hiFlt64 & CONST_FORMAT(0x020000)) { 01314 // if d' < 0, I = 1 and fpa = 0 01315 fpa = 0; 01316 } else { 01317 // if d' > 0, I = 1 and fpa = 1 01318 fpa = 1; 01319 } 01320 } else { // if (I_flag == 0) fpa = 0 01321 fpa = 0; 01322 } 01323 01324 if (!U_dis) { 01325 01326 // underflow exceptions are enabled => compute the result, and 01327 // propagate an underflow exception (deliver the result with 01328 // the exponent mod 2^17 01329 01330 // convert c' (normal fp#) from FLOAT128 to EM_fp_reg_type 01331 ps->state_FR[lf1] = FP128ToFPReg (c_float128); 01332 // scale c' to c and take the mod 2^17 exponent 01333 exponent_c = (EM_uint_t)ps->state_FR[lf1].exponent + 01334 exponent_a - exponent_b; 01335 ps->state_FR[lf1].exponent = exponent_c & 0x1ffff; 01336 01337 ISRlow = 0x1001; // U = 1 01338 // set the values of I and fpa in ISRlow 01339 if (I_flag == 1) { 01340 if (fpa == 0) { 01341 ISRlow = ISRlow | 0x2000; // I = 1 01342 } else { 01343 ISRlow = ISRlow | 0x6000; // I = 1, fpa = 1 01344 } 01345 } 01346 01347 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 01348 01349 // set the destination floating-point and predicate reg values 01350 #ifdef DEBUG_UNIX 01351 printf ("DEBUG Case (VIII) - (XII) AFTER F6 SWA FAULT 7: ps->state_FR[lf1] = %x %x %Lx\n", 01352 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01353 ps->state_FR[lf1].significand); 01354 #endif 01355 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01356 if (f1 < 32) 01357 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01358 else 01359 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01360 01361 // ps->state_PR[p2] = 0; clear the output predicate 01362 // update *ppreds 01363 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01364 #ifdef DEBUG_UNIX 01365 printf ("DEBUG Case (VIII) - (XII) AFTER F6 SWA FAULT 7 a: *ppreds = %Lx\n", 01366 *ppreds); 01367 #endif 01368 // update *pfpsr 01369 // set D in FPSR.sfx if any of a and b was unnormal 01370 if (unnormal) { 01371 // set D = 1 in *pfpsr 01372 *pfpsr = *pfpsr | 01373 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01374 } 01375 // set U = 1 01376 *pfpsr = *pfpsr | 01377 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 11)); 01378 // set I = 1 if I_flag = 1 01379 if (I_flag) { 01380 *pfpsr = *pfpsr | 01381 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01382 } 01383 01384 // caller will advance instruction pointer 01385 return (FALSE | FAULT_TO_TRAP); // will raise U trap 01386 01387 } else { // if (U_dis) 01388 01389 // underflow exceptions are disabled 01390 01391 // convert c' (normal fp#) from FLOAT128 to EM_fp_reg_type 01392 ps->state_FR[lf1] = FP128ToFPReg (c_float128); 01393 01394 if (ftz == 0) { 01395 01396 // denormalize 01397 sign_c = ps->state_FR[lf1].sign; 01398 exponent_c = ps->state_FR[lf1].exponent; 01399 significand_c = ps->state_FR[lf1].significand; 01400 if (fpa) significand_c = significand_c - 1; 01401 // Note: if fpa = 1, significand_c cannot be 1.0...0, 01402 // because it could not have been 1.1...1 before adding 01403 // 1 to it (cannot have N consecutive 1's in the result); 01404 // this means that significand_c - 1 above does not 01405 // require an exponent correction (it does not lose 01406 // the J-bit) 01407 true_bexp = exponent_c + exponent_a - exponent_b; 01408 // true_bexp - 0x0ffff is the true unbiased exponent after 01409 // the first IEEE rounding 01410 01411 // perform the second IEEE rounding 01412 01413 significand_size = N64; 01414 shift_cnt = FP_REG_EMIN - true_bexp + 0x0ffff; 01415 01416 if (shift_cnt <= significand_size) { 01417 // do the actual shift to denormalize the result; the 01418 // result will be a denormal, or zero 01419 round = I_flag; 01420 sticky = 0; 01421 for (ind = 0 ; ind < shift_cnt ; ind++) { 01422 sticky = round | sticky; 01423 round = (EM_uint_t)(significand_c & 0x01); 01424 significand_c = significand_c >> 1; 01425 } 01426 true_bexp = true_bexp + shift_cnt; // e_min + 0xffff 01427 } else { // all the significand bits shift out into sticky 01428 significand_c = 0; 01429 round = 0; 01430 sticky = 1; 01431 true_bexp = true_bexp + shift_cnt; // e_min + 0xffff 01432 } 01433 01434 // perform the rounding; the result is 0, denormal, or 01435 // 1.0 x 2^emin 01436 switch (rc) { 01437 case rc_rn: 01438 lsb = (EM_uint_t)(significand_c & 0x01); 01439 fpa = round & (lsb | sticky); 01440 break; 01441 case rc_rm: 01442 fpa = (sign_c == 0 ? 0 : (round | sticky)); 01443 break; 01444 case rc_rp: 01445 fpa = (sign_c == 1 ? 0 : (round | sticky)); 01446 break; 01447 case rc_rz: 01448 fpa = 0; 01449 break; 01450 default: 01451 #ifndef unix 01452 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 01453 Error: invalid rc = %d\n", rc); 01454 #else 01455 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 01456 invalid rc = %d\n", rc); 01457 return (FP_EMUL_ERROR); 01458 #endif 01459 } 01460 01461 01462 // add fpa to the significand if fpa = 1 01463 if (fpa == 1) { 01464 significand_c = significand_c + 1; 01465 } 01466 01467 if (significand_c == 0) { 01468 true_bexp = 0; // ow it is e_min 01469 } 01470 01471 exponent_c = true_bexp; 01472 01473 // determine the new value of I_flag (must be 1) 01474 I_flag = round | sticky; // not used except for check below 01475 // ps->state_FR[lf1].sign unchanged 01476 ps->state_FR[lf1].exponent = exponent_c; 01477 ps->state_FR[lf1].significand = significand_c; 01478 01479 } else { // if ftz == 1 01480 01481 fpa = 0; 01482 // ps->state_FR[lf1].sign unchanged 01483 ps->state_FR[lf1].exponent = 0; 01484 ps->state_FR[lf1].significand = 0; 01485 I_flag = 1; 01486 01487 } 01488 01489 // update *pfpsr 01490 // set D in FPSR.sfx if any of a and b was unnormal 01491 if (unnormal) { 01492 // set D = 1 in *pfpsr 01493 *pfpsr = *pfpsr | 01494 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01495 } 01496 if (I_flag) { 01497 // set U = 1 01498 *pfpsr = *pfpsr | 01499 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 11)); 01500 // set I = 1 01501 *pfpsr = *pfpsr | 01502 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01503 } 01504 01505 // set the destination floating-point and predicate reg values 01506 #ifdef DEBUG_UNIX 01507 printf ("DEBUG Case (VIII) - (XII) AFTER F6 SWA FAULT 8: ps->state_FR[lf1] = %x %x %Lx\n", 01508 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01509 ps->state_FR[lf1].significand); 01510 #endif 01511 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01512 if (f1 < 32) 01513 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01514 else 01515 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01516 01517 // ps->state_PR[p2] = 0; clear the output predicate 01518 // update *ppreds [as if O disabled] 01519 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01520 #ifdef DEBUG_UNIX 01521 printf ("DEBUG Case (VIII) - (XII) AFTER F6 SWA FAULT 8 a: *ppreds = %Lx\n", 01522 *ppreds); 01523 #endif 01524 01525 if (I_flag && !I_dis) { 01526 01527 // underflow exceptions are disabled, but the inexact 01528 // exceptions are enabled and the result is inexact => 01529 // provide the IEEE mandated result, and 01530 // propagate an inexact exception 01531 01532 ISRlow = 0x2001; // I = 1 01533 if (fpa) ISRlow = ISRlow | 0x4000; // fpa = 1 01534 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 01535 01536 // caller will advance instruction pointer 01537 return (FALSE | FAULT_TO_TRAP); // will raise I trap 01538 01539 } // else no trap (tiny and inexact result) 01540 01541 return (TRUE); 01542 01543 } 01544 01545 // Case (XIII) 01546 } else { 01547 01548 #ifdef DEBUG_UNIX 01549 printf ("DEBUG: BEGIN F6 SWA FAULT CASE (XIII)\n"); 01550 #endif 01551 01552 // this must be a Merced specific SWA fault (e.g. for single, 01553 // double, or double-extended denormals) 01554 frcpa (ps, sf, qp, lf1, p2, lf2, lf3); 01555 01556 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 01557 01558 *pfpsr = ps->state_AR[0].uint_value; 01559 // set the destination floating-point and predicate reg values 01560 #ifdef DEBUG_UNIX 01561 printf ("DEBUG Case (XIII) AFTER F6 SWA FAULT 14: ps->state_FR[lf1] = %x %x %Lx\n", 01562 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01563 ps->state_FR[lf1].significand); 01564 #endif 01565 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01566 if (f1 < 32) 01567 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01568 else 01569 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01570 01571 // update ps->state_PR[p2] = 1; 01572 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01573 *ppreds |= 01574 (((EM_uint64_t)(ps->state_PR[p2] & 0x01)) << (EM_uint_t)p2); 01575 #ifdef DEBUG_UNIX 01576 printf ("DEBUG Case (XIII) AFTER F6 SWA FAULT 14 a: *ppreds = %Lx\n", 01577 *ppreds); 01578 #endif 01579 return (TRUE); 01580 01581 } 01582 01583 break; 01584 01585 case FPRCPA_PATTERN: 01586 01587 // should get here only for denormal inputs 01588 fprcpa (ps, sf, qp, lf1, p2, lf2, lf3); 01589 01590 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 01591 01592 *pfpsr = ps->state_AR[0].uint_value; 01593 // set the destination floating-point and predicate 01594 // reg values (redundant, as the output predicate will be cleared) 01595 #ifdef DEBUG_UNIX 01596 printf ("DEBUG AFTER F6 FPRCPA SWA FAULT 15: ps->state_FR[lf1] = %x %x %Lx\n", 01597 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01598 ps->state_FR[lf1].significand); 01599 #endif 01600 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01601 if (f1 < 32) 01602 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01603 else 01604 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01605 01606 // update ps->state_PR[p2] = 0; clear the output predicate 01607 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01608 *ppreds |= 01609 (((EM_uint64_t)(ps->state_PR[p2] & 0x01)) << (EM_uint_t)p2); 01610 #ifdef DEBUG_UNIX 01611 printf ("DEBUG AFTER F6 FPRCPA SWA FAULT 15 a: *ppreds = %Lx\n", 01612 *ppreds); 01613 #endif 01614 return (TRUE); 01615 01616 } 01617 01618 } else if ((OpCode & F7_MIN_MASK) == F7_PATTERN) { 01619 // F7 instruction 01620 01621 switch (OpCode & F7_MASK) { 01622 01623 case FRSQRTA_PATTERN: 01624 SIMD_instruction = 0; 01625 break; 01626 case FPRSQRTA_PATTERN: 01627 SIMD_instruction = 1; 01628 break; 01629 default: 01630 // unrecognized instruction type 01631 #ifndef unix 01632 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 01633 instruction opcode %8x %8x not recognized\n", OpCode); 01634 #else 01635 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 01636 instruction opcode %Lx not recognized\n", OpCode); 01637 return (FP_EMUL_ERROR); 01638 #endif 01639 } 01640 01641 // extract the rounding mode 01642 rc = (EM_sf_rc_type)((FPSR >> (6 + 4 + 13 * (EM_uint_t)sf)) & 0x03); 01643 01644 // extract p2, f3, and f1 01645 p2 = (EM_uint_t)((OpCode >> 27) & CONST_FORMAT(0x00000000003f)); 01646 if (p2 >= 16) p2 = 16 + (rrbpr + p2 - 16) % 48; 01647 #ifdef DEBUG_UNIX 01648 printf ("DEBUG F7 instruction: p2 = %x\n", p2); 01649 #endif 01650 f3 = (EM_uint_t)((OpCode >> 20) & CONST_FORMAT(0x00000000007F)); 01651 if (f3 >= 32) f3 = 32 + (rrbfr + f3 - 32) % 96; 01652 f1 = (EM_uint_t)((OpCode >> 6) & CONST_FORMAT(0x00000000007F)); 01653 if (f1 >= 32) f1 = 32 + (rrbfr + f1 - 32) % 96; 01654 01655 // get source floating-point reg value 01656 ps->state_FR[lf3] = FP128ToFPReg (get_fp_register (f3, fp_state)); 01657 #ifdef DEBUG_UNIX 01658 printf ("DEBUG BEFORE F7 SWA FAULT: ps->state_FR[lf3] = %x %x %Lx\n", 01659 ps->state_FR[lf3].sign, ps->state_FR[lf3].exponent, ps->state_FR[lf3].significand); 01660 #endif 01661 01662 switch (OpCode & F7_MASK) { 01663 01664 case FRSQRTA_PATTERN: 01665 01666 // extract sign, exponent, and significand of a 01667 // note that a is (a non-zero positive normal, or a positive 01668 // pseudo-zero or unnormal/denormal fp#), or (a negative pseudo-zero 01669 // or non-zero unnormal/denormal fp#) 01670 sign_a = (EM_uint_t)ps->state_FR[lf3].sign; 01671 exponent_a = (EM_int_t)ps->state_FR[lf3].exponent; 01672 significand_a = ps->state_FR[lf3].significand; 01673 if (exponent_a == 0 && significand_a != 0) exponent_a = 0xc001; 01674 01675 unnormal = 0; 01676 if (!(significand_a & CONST_FORMAT(0x8000000000000000))) { 01677 unnormal = 1; 01678 } 01679 #ifdef DEBUG_UNIX 01680 if (unnormal) printf ("DEBUG F7 FRSQRTA SWA FAULT: unnormal = 1\n"); 01681 #endif 01682 01683 // raise a D trap if an unnormal, but not a non-zero negative one 01684 if (unnormal && !D_dis && !(sign_a == 1 && significand_a != 0)) { 01685 ISRlow = 0x0002; // denormal 01686 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 01687 return (FALSE); // will raise D trap 01688 } 01689 01690 // if a pseudo-zero or negative non-zero, return the result 01691 if (exponent_a != 0 && significand_a == 0 || 01692 sign_a == 1 && significand_a != 0) { 01693 // a is pseudo-zero or negative non-zero 01694 01695 frsqrta (ps, sf, qp, lf1, p2, lf3); 01696 01697 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 01698 01699 *pfpsr = ps->state_AR[0].uint_value; 01700 01701 // set the destination floating-point and predicate reg values 01702 #ifdef DEBUG_UNIX 01703 printf ("DEBUG AFTER F7 SWA FAULT FOR PSEUDO-ZERO OR -DEN: ps->state_FR[lf1] = %x %x %Lx\n", 01704 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01705 ps->state_FR[lf1].significand); 01706 #endif 01707 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01708 if (f1 < 32) 01709 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01710 else 01711 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01712 01713 // ps->state_PR[p2] = 0 for zero or negative argument 01714 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01715 *ppreds |= 01716 (((EM_uint64_t)(ps->state_PR[p2] & 0x01)) << (EM_uint_t)p2); 01717 #ifdef DEBUG_UNIX 01718 printf ("DEBUG AFTER F7 SWA FAULT FOR PSEUDO-ZERO OR -DEN: p2 = %x\n", p2); 01719 #endif 01720 #ifdef DEBUG_UNIX 01721 printf ("DEBUG AFTER F7 SWA FAULT FOR PSEUDO-ZERO OR -DEN: *ppreds = %Lx\n", *ppreds); 01722 #endif 01723 return (TRUE); 01724 01725 } 01726 01727 // normalize a (even if exponent_a becomes less than e_min) 01728 while (!(significand_a & CONST_FORMAT(0x8000000000000000))) { 01729 significand_a = significand_a << 1; 01730 exponent_a--; 01731 } 01732 01733 // Case (I) 01734 // sqrt (a) is normal ==> might have an I trap 01735 01736 if (exponent_a - 0xffff <= FP_REG_EMIN + N64 - 1) { 01737 01738 // scale a to a', such that s' = sqrt (a') will be normal 01739 01740 // set the scaled (and possibly normalized) value of a' (sign ok) 01741 if (exponent_a % 2 != 0) // exponent_a is biased 01742 ps->state_FR[lf3].exponent = (EM_uint_t)0xffff; 01743 else 01744 ps->state_FR[lf3].exponent = (EM_uint_t)0x10000; 01745 ps->state_FR[lf3].significand = significand_a; 01746 01747 // convert a' to FLOAT128 01748 a_float128 = FPRegToFP128 (ps->state_FR[lf3]); 01749 01750 // invoke the square root algorithm to calculate s' = sqrt (a'); 01751 // the algorithm uses sf0 with user settings, and sf1 with 01752 // rn, 64-bits, wre, traps disabled; 01753 // the current FPSR is not affected 01754 // copy FPSR.sfx with clear flags to FPSR1.sf0; rn,64,wre in sf1 01755 FPSR1 = (EM_uint64_t)((FPSR >> ((EM_uint_t)sf * 13)) & 0x01fc0) | 01756 0x000000000270003f; // set sf0,sf1 and disable fp exceptions 01757 thmL (&FPSR1, &a_float128, &s_float128); 01758 I_flag = FPSR1 & 0x40000 ? 1 : 0; 01759 01760 // convert s' (normal fp#) from FLOAT128 to EM_fp_reg_type 01761 ps->state_FR[lf1] = FP128ToFPReg (s_float128); 01762 01763 // scale s' to s 01764 if (exponent_a % 2 != 0) // exponent_a is biased 01765 ps->state_FR[lf1].exponent += ((exponent_a - 0xffff) / 2); 01766 else 01767 ps->state_FR[lf1].exponent += ((exponent_a - 0xffff - 1) / 2); 01768 01769 if (I_dis || !I_flag) { 01770 01771 // set D in FPSR.sfx if unnormal operand (and denormal exeptions 01772 // are disabled) 01773 if (unnormal) { // if (unnormal && D_dis) 01774 // set D in FPSR.sfx (set D = 1 in *pfpsr) 01775 *pfpsr = *pfpsr | 01776 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01777 } 01778 if (I_flag) { // set I in FPSR 01779 // set I = 1 in *pfpsr 01780 *pfpsr = *pfpsr | 01781 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01782 } 01783 01784 // set the destination floating-point and predicate reg values 01785 #ifdef DEBUG_UNIX 01786 printf ("DEBUG Case (I) AFTER F7 SWA FAULT 1: ps->state_FR[lf1] = %x %x %Lx\n", 01787 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01788 ps->state_FR[lf1].significand); 01789 #endif 01790 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01791 if (f1 < 32) 01792 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01793 else 01794 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01795 01796 // ps->state_PR[p2] = 0; clear the output predicate 01797 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01798 #ifdef DEBUG_UNIX 01799 printf ("DEBUG Case (I) AFTER F7 SWA FAULT 1 a: *ppreds = %Lx\n", 01800 *ppreds); 01801 #endif 01802 return (TRUE); 01803 01804 } else { // if (!I_dis && I_flag) 01805 01806 // determine fpa, and set the values of I and fpa in ISRlow 01807 // calculate d' = s' * s' - a' to determine fpa 01808 FPSR1 = CONST_FORMAT(0x00000000000003bf); 01809 run_fms (&FPSR1, &d_float128, &s_float128, &s_float128, 01810 &a_float128); // d' = s' * s' - a' 01811 01812 if (d_float128.hiFlt64 & CONST_FORMAT(0x0000000000020000)) { 01813 // if d' < 0, I = 1 and fpa = 0 01814 ISRlow = 0x2001; 01815 } else { 01816 // if d' > 0, I = 1 and fpa = 1 01817 ISRlow = 0x6001; 01818 } 01819 01820 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 01821 01822 // set the destination floating-point and predicate reg values 01823 #ifdef DEBUG_UNIX 01824 printf ("DEBUG Case (I) AFTER F7 SWA FAULT 2: ps->state_FR[lf1] = %x %x %Lx\n", 01825 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01826 ps->state_FR[lf1].significand); 01827 #endif 01828 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01829 if (f1 < 32) 01830 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01831 else 01832 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01833 01834 //update *ppreds 01835 // ps->state_PR[p2] = 0; clear the output predicate 01836 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01837 #ifdef DEBUG_UNIX 01838 printf ("DEBUG Case (I) AFTER F7 SWA FAULT 2 a: *ppreds = %Lx\n", 01839 *ppreds); 01840 #endif 01841 // set D in FPSR.sfx if unnormal operand (and denormal exeptions 01842 // are disabled) 01843 if (unnormal) { // if (unnormal && D_dis) 01844 // set D in FPSR.sfx (set D = 1 in *pfpsr) 01845 *pfpsr = *pfpsr | 01846 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 8)); 01847 } 01848 // update: *pfpsr: set I = 1 01849 *pfpsr = *pfpsr | 01850 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 01851 01852 // caller will advance instruction pointer 01853 return (FALSE | FAULT_TO_TRAP); // will raise I trap 01854 01855 } 01856 01857 // Case (II) 01858 } else { 01859 01860 frsqrta (ps, sf, qp, lf1, p2, lf3); 01861 01862 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 01863 01864 *pfpsr = ps->state_AR[0].uint_value; 01865 // set the destination floating-point and predicate reg values 01866 #ifdef DEBUG_UNIX 01867 printf ("DEBUG Case (II) AFTER F7 SWA FAULT 3: ps->state_FR[lf1] = %x %x %Lx\n", 01868 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01869 ps->state_FR[lf1].significand); 01870 #endif 01871 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01872 if (f1 < 32) 01873 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01874 else 01875 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01876 01877 // update ps->state_PR[p2] = 1 for positive arguments only 01878 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01879 *ppreds |= 01880 (((EM_uint64_t)(ps->state_PR[p2] & 0x01)) << (EM_uint_t)p2); 01881 #ifdef DEBUG_UNIX 01882 printf ("DEBUG Case (II) AFTER F7 SWA FAULT 3 a: p2 = %x\n", p2); 01883 #endif 01884 #ifdef DEBUG_UNIX 01885 printf ("DEBUG Case (II) AFTER F7 SWA FAULT 3 a: *ppreds = %Lx\n", *ppreds); 01886 #endif 01887 return (TRUE); 01888 01889 } 01890 01891 break; 01892 01893 case FPRSQRTA_PATTERN: 01894 01895 // should get here only for denormal inputs 01896 fprsqrta (ps, sf, qp, lf1, p2, lf3); 01897 01898 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 01899 01900 *pfpsr = ps->state_AR[0].uint_value; 01901 // set the destination floating-point and predicate 01902 // reg values (redundant, as the output predicate will be cleared) 01903 #ifdef DEBUG_UNIX 01904 printf ("DEBUG AFTER F7 FPRSQRTA SWA FAULT 4: ps->state_FR[lf1] = %x %x %Lx\n", 01905 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 01906 ps->state_FR[lf1].significand); 01907 #endif 01908 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 01909 if (f1 < 32) 01910 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 01911 else 01912 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 01913 01914 // update ps->state_PR[p2] = 0; clear the output predicate 01915 *ppreds &= (~(((EM_uint64_t)1) << (EM_uint_t)p2)); 01916 *ppreds |= 01917 (((EM_uint64_t)(ps->state_PR[p2] & 0x01)) << (EM_uint_t)p2); 01918 #ifdef DEBUG_UNIX 01919 printf ("DEBUG AFTER F7 FPRSQRTA SWA FAULT 4 a: *ppreds = %Lx\n", *ppreds); 01920 #endif 01921 return (TRUE); 01922 01923 } 01924 01925 } else if ((OpCode & F8_MIN_MASK) == F8_PATTERN) { 01926 // F8 instruction 01927 01928 // extract f3, f2, and f1 01929 f3 = (EM_fp_reg_specifier)((OpCode >> 20) & CONST_FORMAT(0x00000000007F)); 01930 if (f3 >= 32) f3 = 32 + (rrbfr + f3 - 32) % 96; 01931 f2 = (EM_fp_reg_specifier)((OpCode >> 13) & CONST_FORMAT(0x00000000007F)); 01932 if (f2 >= 32) f2 = 32 + (rrbfr + f2 - 32) % 96; 01933 f1 = (EM_fp_reg_specifier)((OpCode >> 6) & CONST_FORMAT(0x00000000007F)); 01934 if (f1 >= 32) f1 = 32 + (rrbfr + f1 - 32) % 96; 01935 01936 // get source floating-point reg values 01937 ps->state_FR[lf2] = FP128ToFPReg (get_fp_register (f2, fp_state)); 01938 ps->state_FR[lf3] = FP128ToFPReg (get_fp_register (f3, fp_state)); 01939 #ifdef DEBUG_UNIX 01940 printf ("DEBUG BEFORE F8 SWA FAULT: ps->state_FR[lf2] = %x %x %Lx\n", 01941 ps->state_FR[lf2].sign, ps->state_FR[lf2].exponent, ps->state_FR[lf2].significand); 01942 printf ("DEBUG BEFORE F8 SWA FAULT: ps->state_FR[lf3] = %x %x %Lx\n", 01943 ps->state_FR[lf3].sign, ps->state_FR[lf3].exponent, ps->state_FR[lf3].significand); 01944 #endif 01945 01946 switch (OpCode & F8_MASK) { 01947 01948 case FMIN_PATTERN: 01949 SIMD_instruction = 0; 01950 fmin (ps, sf, qp, lf1, lf2, lf3); 01951 break; 01952 case FMAX_PATTERN: 01953 SIMD_instruction = 0; 01954 fmax (ps, sf, qp, lf1, lf2, lf3); 01955 break; 01956 case FAMIN_PATTERN: 01957 SIMD_instruction = 0; 01958 famin (ps, sf, qp, lf1, lf2, lf3); 01959 break; 01960 case FAMAX_PATTERN: 01961 SIMD_instruction = 0; 01962 famax (ps, sf, qp, lf1, lf2, lf3); 01963 break; 01964 case FPMIN_PATTERN: 01965 SIMD_instruction = 1; 01966 fpmin (ps, sf, qp, lf1, lf2, lf3); 01967 break; 01968 case FPMAX_PATTERN: 01969 SIMD_instruction = 1; 01970 fpmax (ps, sf, qp, lf1, lf2, lf3); 01971 break; 01972 case FPAMIN_PATTERN: 01973 SIMD_instruction = 1; 01974 fpamin (ps, sf, qp, lf1, lf2, lf3); 01975 break; 01976 case FPAMAX_PATTERN: 01977 SIMD_instruction = 1; 01978 fpamax (ps, sf, qp, lf1, lf2, lf3); 01979 break; 01980 01981 case FPCMP_EQ_PATTERN: 01982 SIMD_instruction = 1; 01983 fpcmp_eq (ps, sf, qp, lf1, lf2, lf3); 01984 break; 01985 case FPCMP_LT_PATTERN: 01986 SIMD_instruction = 1; 01987 fpcmp_lt (ps, sf, qp, lf1, lf2, lf3); 01988 break; 01989 case FPCMP_LE_PATTERN: 01990 SIMD_instruction = 1; 01991 fpcmp_le (ps, sf, qp, lf1, lf2, lf3); 01992 break; 01993 case FPCMP_UNORD_PATTERN: 01994 SIMD_instruction = 1; 01995 fpcmp_unord (ps, sf, qp, lf1, lf2, lf3); 01996 break; 01997 case FPCMP_NEQ_PATTERN: 01998 SIMD_instruction = 1; 01999 fpcmp_neq (ps, sf, qp, lf1, lf2, lf3); 02000 break; 02001 case FPCMP_NLT_PATTERN: 02002 SIMD_instruction = 1; 02003 fpcmp_nlt (ps, sf, qp, lf1, lf2, lf3); 02004 break; 02005 case FPCMP_NLE_PATTERN: 02006 SIMD_instruction = 1; 02007 fpcmp_nle (ps, sf, qp, lf1, lf2, lf3); 02008 break; 02009 case FPCMP_ORD_PATTERN: 02010 SIMD_instruction = 1; 02011 fpcmp_ord (ps, sf, qp, lf1, lf2, lf3); 02012 break; 02013 02014 default: 02015 // unrecognized instruction type 02016 #ifndef unix 02017 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02018 instruction opcode %8x %8x not recognized\n", OpCode); 02019 #else 02020 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02021 instruction opcode %Lx not recognized\n", OpCode); 02022 return (FP_EMUL_ERROR); 02023 #endif 02024 } 02025 02026 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 02027 02028 // successful emulation 02029 // set the destination floating-point reg value 02030 #ifdef DEBUG_UNIX 02031 printf ("DEBUG AFTER F8 SWA FAULT: ps->state_FR[lf1] = %x %x %Lx\n", 02032 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 02033 ps->state_FR[lf1].significand); 02034 #endif 02035 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 02036 if (f1 < 32) 02037 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 02038 else 02039 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 02040 02041 *pfpsr = ps->state_AR[0].uint_value; 02042 return (TRUE); 02043 02044 } else if ((OpCode & F10_MIN_MASK) == F10_PATTERN) { 02045 // F10 instruction 02046 02047 // extract f2 and f1 02048 f2 = (EM_uint_t)((OpCode >> 13) & CONST_FORMAT(0x00000000007F)); 02049 if (f2 >= 32) f2 = 32 + (rrbfr + f2 - 32) % 96; 02050 f1 = (EM_uint_t)((OpCode >> 6) & CONST_FORMAT(0x00000000007F)); 02051 if (f1 >= 32) f1 = 32 + (rrbfr + f1 - 32) % 96; 02052 02053 // get source floating-point reg value 02054 ps->state_FR[lf2] = FP128ToFPReg (get_fp_register (f2, fp_state)); 02055 #ifdef DEBUG_UNIX 02056 printf ("DEBUG BEFORE F10 SWA FAULT: ps->state_FR[lf2] = %x %x %Lx\n", 02057 ps->state_FR[lf2].sign, ps->state_FR[lf2].exponent, ps->state_FR[lf2].significand); 02058 #endif 02059 02060 switch (OpCode & F10_MASK) { 02061 02062 case FCVT_FX_PATTERN: 02063 SIMD_instruction = 0; 02064 fcvt_fx (ps, sf, qp, lf1, lf2); 02065 break; 02066 case FCVT_FXU_PATTERN: 02067 SIMD_instruction = 0; 02068 fcvt_fxu (ps, sf, qp, lf1, lf2); 02069 break; 02070 case FCVT_FX_TRUNC_PATTERN: 02071 SIMD_instruction = 0; 02072 fcvt_fx_trunc (ps, sf, qp, lf1, lf2); 02073 break; 02074 case FCVT_FXU_TRUNC_PATTERN: 02075 SIMD_instruction = 0; 02076 fcvt_fxu_trunc (ps, sf, qp, lf1, lf2); 02077 break; 02078 case FPCVT_FX_PATTERN: 02079 SIMD_instruction = 1; 02080 fpcvt_fx (ps, sf, qp, lf1, lf2); 02081 break; 02082 case FPCVT_FXU_PATTERN: 02083 SIMD_instruction = 1; 02084 fpcvt_fxu (ps, sf, qp, lf1, lf2); 02085 break; 02086 case FPCVT_FX_TRUNC_PATTERN: 02087 SIMD_instruction = 1; 02088 fpcvt_fx_trunc (ps, sf, qp, lf1, lf2); 02089 break; 02090 case FPCVT_FXU_TRUNC_PATTERN: 02091 SIMD_instruction = 1; 02092 fpcvt_fxu_trunc (ps, sf, qp, lf1, lf2); 02093 break; 02094 default: 02095 // unrecognized instruction type 02096 #ifndef unix 02097 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02098 instruction opcode %8x %8x not recognized\n", OpCode); 02099 #else 02100 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02101 instruction opcode %Lx not recognized\n", OpCode); 02102 return (FP_EMUL_ERROR); 02103 #endif 02104 } 02105 02106 if ((ps->state_MERCED_RTL >> 16) & 0x0ffff) goto new_exception; 02107 02108 // successful emulation 02109 // set the destination floating-point reg value 02110 #ifdef DEBUG_UNIX 02111 printf ("DEBUG AFTER F10 SWA FAULT: ps->state_FR[lf1] = %x %x %Lx\n", 02112 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 02113 ps->state_FR[lf1].significand); 02114 #endif 02115 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 02116 if (f1 < 32) 02117 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 02118 else 02119 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 02120 02121 *pfpsr = ps->state_AR[0].uint_value; 02122 return (TRUE); 02123 02124 } else { 02125 02126 // unrecognized instruction type 02127 #ifndef unix 02128 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02129 instruction opcode %8x %8x not recognized\n", OpCode); 02130 #else 02131 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02132 instruction opcode %Lx not recognized\n", OpCode); 02133 return (FP_EMUL_ERROR); 02134 #endif 02135 } 02136 02137 // advance instruction pointer in the trap frame 02138 02139 return (TRUE); 02140 02141 // end 'if ((trap_type == FPFLT) && (ISRlow & 0x0088))' 02142 02143 } else if ((trap_type == FPTRAP) && swa_trap (sf, FPSR, ISRlow)) { 02144 02145 // else if this is a SWA trap 02146 02147 // this can only happen in one situation for Merced at the present time: 02148 // for a tiny result (which can occur only for an F1 instruction), when the 02149 // underflow exceptions are disabled; the IA-64 FP Emulation Library also 02150 // handles correctly the situations in which a huge result occurs, and the 02151 // overflow exceptions are disabled, or when an inexact result occurs, and 02152 // the inexact exceptions are disabled 02153 02154 // shortcut the case when the result is inexact, and the inexact exceptions 02155 // are disabled 02156 if ((ISRlow & 0x1980) == 0) return (TRUE); // nothing to do in this case 02157 02158 // Note: overflow, which can also be caused only by an F1 instruction 02159 // (but not on Merced) is included too; 02160 02161 // Note that for Merced, if a SWA trap occurs, fp_emulate () is 02162 // entered with U exceptions disabled and the U flag set in the ISR code; 02163 // the I flag can be set or not (unlike the U flag in the FPSR in the 02164 // absence of a trap, which will be set together with the I flag when 02165 // U traps are disabled [with U traps disabled, the result has to be 02166 // tiny and inexact for the U flag to be set - the I flag will be set 02167 // too]) 02168 02169 // SIMD_instruction unchanged at 2 02170 02171 // decode the rest of the instruction 02172 if ((OpCode & F1_MIN_MASK) == F1_PATTERN) { 02173 // F1 instruction 02174 02175 // extract f1 02176 f1 = (EM_uint_t)((OpCode >> 6) & CONST_FORMAT(0x00000000007F)); 02177 if (f1 >= 32) f1 = 32 + (rrbfr + f1 - 32) % 96; 02178 #ifdef DEBUG_UNIX 02179 printf ("DEBUG BEFORE F1 SWA TRAP: f1 = %x\n", f1); 02180 #endif 02181 02182 // no need to get the source floating-point reg values - they 02183 // might have changed 02184 02185 // read the destination reg f1 02186 ps->state_FR[lf1] = FP128ToFPReg (get_fp_register (f1, fp_state)); 02187 #ifdef DEBUG_UNIX 02188 printf ("DEBUG BEFORE F1 SWA TRAP: ps->state_FR[lf1] = %x %x %Lx\n", 02189 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 02190 ps->state_FR[lf1].significand); 02191 #endif 02192 02193 switch (OpCode & F1_MASK) { 02194 02195 case FMA_PATTERN: 02196 case FMS_PATTERN: 02197 case FNMA_PATTERN: 02198 opcode_pc = pc_sf; 02199 break; 02200 case FMA_S_PATTERN: 02201 case FMS_S_PATTERN: 02202 case FNMA_S_PATTERN: 02203 opcode_pc = pc_s; 02204 break; 02205 case FMA_D_PATTERN: 02206 case FMS_D_PATTERN: 02207 case FNMA_D_PATTERN: 02208 opcode_pc = pc_d; 02209 break; 02210 case FPMA_PATTERN: 02211 case FPMS_PATTERN: 02212 case FPNMA_PATTERN: 02213 opcode_pc = pc_simd; 02214 break; 02215 02216 default: 02217 // unrecognized instruction type 02218 #ifndef unix 02219 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02220 instruction opcode %8x %8x not recognized\n", OpCode); 02221 #else 02222 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02223 instruction opcode %Lx not recognized\n", OpCode); 02224 return (FP_EMUL_ERROR); 02225 #endif 02226 } 02227 02228 // SWA trap - software assistance required 02229 02230 // implement table 3-5 of EM EAS 2.1 02231 rc = (EM_sf_rc_type)((FPSR >> (6 + 4 + 13 * (EM_uint_t)sf)) & 0x03); 02232 pc = (EM_sf_pc_type)((FPSR >> (6 + 2 + 13 * (EM_uint_t)sf)) & 0x03); 02233 wre = (EM_uint_t)((FPSR >> (6 + 1 + 13 * (EM_uint_t)sf)) & 0x01); 02234 02235 // determine the precision of the result, the size of the exponent, 02236 // and emin 02237 if (opcode_pc == pc_simd || opcode_pc == pc_s && wre == 0) { 02238 significand_size = 24; 02239 emin = EMIN_08_BITS; 02240 } else if (opcode_pc == pc_d && wre == 0) { 02241 significand_size = 53; 02242 emin = EMIN_11_BITS; 02243 } else if (opcode_pc == pc_s && wre == 1) { 02244 significand_size = 24; 02245 emin = EMIN_17_BITS; 02246 } else if (opcode_pc == pc_d && wre == 1) { 02247 significand_size = 53; 02248 emin = EMIN_17_BITS; 02249 } else if (opcode_pc == pc_sf) { 02250 if (pc == sf_single && wre == 0) { 02251 significand_size = 24; 02252 emin = EMIN_15_BITS; 02253 } else if (pc == sf_double && wre == 0) { 02254 significand_size = 53; 02255 emin = EMIN_15_BITS; 02256 } else if (pc == sf_double_extended && wre == 0) { 02257 significand_size = 64; 02258 emin = EMIN_15_BITS; 02259 } else if (pc == sf_single && wre == 1) { 02260 significand_size = 24; 02261 emin = EMIN_17_BITS; 02262 } else if (pc == sf_double && wre == 1) { 02263 significand_size = 53; 02264 emin = EMIN_17_BITS; 02265 } else if (pc == sf_double_extended && wre == 1) { 02266 significand_size = 64; 02267 emin = EMIN_17_BITS; 02268 } else { 02269 #ifndef unix 02270 FP_EMULATION_ERROR0 ("fp_emulate () internal error in \ 02271 determining the computation model\n"); 02272 } 02273 } else { 02274 FP_EMULATION_ERROR0 ("fp_emulate () internal error in \ 02275 determining the computation model\n"); 02276 #else 02277 FP_EMULATION_ERROR0 ("fp_emulate () internal error in \ 02278 determining the computation model\n"); 02279 return (FP_EMUL_ERROR); 02280 } 02281 } else { 02282 FP_EMULATION_ERROR0 ("fp_emulate () internal error in \ 02283 determining the computation model\n"); 02284 return (FP_EMUL_ERROR); 02285 #endif 02286 } 02287 02288 tmp_fp = ps->state_FR[lf1]; 02289 02290 // Note: if the cause of the SWA trap is O or U, cannot get here with 02291 // zero or a denormal in FR[f1]; if U, the result in FR[f1] will have 02292 // to be denormalized 02293 if (opcode_pc != pc_simd) { // non-SIMD instruction 02294 02295 fpa = (ISRlow >> 14) & 0x01; 02296 I_exc = (ISRlow >> 13) & 0x01; // inexact = round OR sticky 02297 U_exc = (ISRlow >> 12) & 0x01; // underflow 02298 O_exc = (ISRlow >> 11) & 0x01; // overflow 02299 02300 sign = tmp_fp.sign; 02301 exponent = tmp_fp.exponent; 02302 significand = tmp_fp.significand; 02303 02304 // calculate the true biased exponent, and then the true exponent 02305 if (U_dis && U_exc) { // the result is not zero, and is normal with 02306 // unbounded exponent (tiny result) 02307 02308 decr_exp = 0; 02309 02310 // extract the number of significand bits specified by the 02311 // destination precision, and determine the significand, before 02312 // the rounding performed in hardware (1st IEEE rounding) 02313 02314 if (significand_size == 64) { 02315 if (fpa == 1) { 02316 significand = significand - 1; 02317 if (significand == CONST_FORMAT(0x07fffffffffffffff)) { 02318 significand = CONST_FORMAT(0x0ffffffffffffffff); 02319 decr_exp = 1; 02320 } 02321 } 02322 } else if (significand_size == 53) { 02323 // the 53 bits are already there, but need to be shifted right 02324 significand = significand >> 11; 02325 if (fpa == 1) { 02326 significand = significand - 1; 02327 if (significand == CONST_FORMAT(0x0fffffffffffff)) { 02328 significand = CONST_FORMAT(0x01fffffffffffff); 02329 decr_exp = 1; 02330 } 02331 } 02332 } else if (significand_size == 24) { 02333 // the 24 bits are already there, but need to be shifted right 02334 significand = significand >> 40; 02335 if (fpa == 1) { 02336 significand = significand - 1; 02337 if (significand == CONST_FORMAT(0x07fffff)) { 02338 significand = CONST_FORMAT(0x0ffffff); 02339 decr_exp = 1; 02340 } 02341 } 02342 } else { 02343 // internal error 02344 #ifndef unix 02345 FP_EMULATION_ERROR6 ("fp_emulate (): incorrect \ 02346 significand size %d for ISRlow = %4.4x and FR[%d] = %1.1x %5.5x %8.8x %8.8x\n", 02347 significand_size, ISRlow, f1, tmp_fp.sign, tmp_fp.exponent, 02348 tmp_fp.significand) 02349 #else 02350 FP_EMULATION_ERROR6 ("fp_emulate (): incorrect \ 02351 significand size %d for ISRlow = %4.4x and FR[%d] = %1.1x %5.5x %Lx\n", 02352 significand_size, ISRlow, f1, tmp_fp.sign, tmp_fp.exponent, 02353 tmp_fp.significand) 02354 return (FP_EMUL_ERROR); 02355 #endif 02356 } 02357 02358 true_bexp = ((exponent + 0x1007b) & 0x1ffff) - 0x1007b; 02359 02360 // true_bexp - 0x0ffff is the true unbiased exponent after the 02361 // first IEEE rounding; determine whether the result is tiny 02362 if (true_bexp - 0x0ffff > emin - 1) { 02363 #ifndef unix 02364 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error: non-tiny res\n"); 02365 #else 02366 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error: non-tiny res\n"); 02367 return (FP_EMUL_ERROR); 02368 #endif 02369 } 02370 02371 // adjust now true_bexp if necessary 02372 if (decr_exp) true_bexp--; 02373 02374 // perform the second IEEE rounding 02375 02376 shift_cnt = emin - true_bexp + 0x0ffff; // >= 1 02377 02378 if (shift_cnt <= significand_size) { 02379 // do the actual shift to denormalize the result; the result 02380 // will be a denormal, or zero 02381 round = I_exc; 02382 // this is indicated even for O or U if the result is inexact 02383 sticky = 0; 02384 for (ind = 0 ; ind < shift_cnt ; ind++) { 02385 sticky = round | sticky; 02386 round = (EM_uint_t)(significand & 0x01); 02387 significand = significand >> 1; 02388 } 02389 true_bexp = true_bexp + shift_cnt; // e_min + 0xffff 02390 } else { // all the significand bits shift out into sticky 02391 significand = 0; 02392 round = 0; 02393 sticky = 1; 02394 true_bexp = true_bexp + shift_cnt; // e_min + 0xffff 02395 } 02396 02397 // perform the rounding; the result is 0, denormal, or 02398 // 1.0 x 2^emin 02399 switch (rc) { 02400 case rc_rn: 02401 lsb = (EM_uint_t)(significand & 0x01); 02402 fpa = round & (lsb | sticky); 02403 break; 02404 case rc_rm: 02405 fpa = (sign == 0 ? 0 : (round | sticky)); 02406 break; 02407 case rc_rp: 02408 fpa = (sign == 1 ? 0 : (round | sticky)); 02409 break; 02410 case rc_rz: 02411 fpa = 0; 02412 break; 02413 default: 02414 #ifndef unix 02415 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02416 invalid rc = %d\n", rc) 02417 #else 02418 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02419 invalid rc = %d\n", rc) 02420 return (FP_EMUL_ERROR); 02421 #endif 02422 } 02423 02424 02425 // add fpa to the significand if fpa = 1, and fix for the case when 02426 // there is a carry-out in this addition 02427 02428 if (fpa == 1) { 02429 02430 significand = significand + 1; 02431 02432 if (significand_size == 64) { 02433 if (significand == CONST_FORMAT(0x0)) { // was 0xff....f 02434 significand = CONST_FORMAT(0x08000000000000000); 02435 true_bexp++; // e_min + 0xffff + 0x1 02436 } 02437 } else if (significand_size == 53) { 02438 if (significand == CONST_FORMAT(0x020000000000000)) { 02439 significand = CONST_FORMAT(0x010000000000000); 02440 true_bexp++; // e_min + 0xffff + 0x1 02441 } 02442 } else if (significand_size == 24) { 02443 if (significand == CONST_FORMAT(0x01000000)) { 02444 significand = CONST_FORMAT(0x0800000); 02445 true_bexp++; // e_min + 0xffff + 0x1 02446 } 02447 } else { // this case not really needed 02448 // internal error 02449 #ifndef unix 02450 FP_EMULATION_ERROR1 ( 02451 "fp_emulate (): incorrect significand size %d\n", 02452 significand_size) 02453 #else 02454 FP_EMULATION_ERROR1 ( 02455 "fp_emulate (): incorrect significand size %d\n", 02456 significand_size) 02457 return (FP_EMUL_ERROR); 02458 #endif 02459 } 02460 02461 } 02462 02463 if (significand == 0) { 02464 true_bexp = 0; // ow it is e_min or e_min + 1 02465 } 02466 02467 exponent = true_bexp; 02468 02469 // determine the new value of I_exc 02470 I_exc = round | sticky; 02471 02472 // set the new values for both the FPSR and the ISR code, but the 02473 // ISR code will only be used if an inexact exception will be 02474 // raised (for this, the final result generated by 02475 // fp_emulate () has to be inexact, and the inexact traps have 02476 // to be enabled 02477 02478 if (I_exc) { 02479 // if tiny and inexact 02480 /* set U and set I in FPSR */ 02481 *pfpsr = *pfpsr | 02482 ((EM_uint64_t)3 << (6 + (EM_uint_t)sf * 13 + 11)); 02483 if (!I_dis) { // update ISR code 02484 // clear U and set I in ISRlow - will raise an inexact trap 02485 ISRlow = (ISRlow & 0xefff) | 0x2000; 02486 // update the fpa bit in the ISR code (NOT DONE IN EM_FP82) 02487 if (fpa) 02488 ISRlow = ISRlow | 0x4000; 02489 else 02490 ISRlow = ISRlow & 0xbfff; 02491 } 02492 } 02493 02494 // shift left the significand if needed 02495 if (significand_size == 53) 02496 significand = significand << 11; // msb explicit 02497 else if (significand_size == 24) 02498 significand = significand << 40; // msb explicit 02499 02500 // if the exponent is 0xc001 and the result is unnormal, 02501 // modify the exponent to 0x0000 02502 if (exponent == 0xc001 && ((significand & 0x8000000000000000) == 0)) 02503 exponent = 0x0; 02504 02505 } else if (O_dis && O_exc) { // the result is not zero, and is normal 02506 // with unbounded exponent 02507 02508 // true_bexp not used in this case, and neither is decr_exp 02509 // true_bexp = ((exponent - 0x1007f) & 0x1ffff) + 0x1007f; 02510 02511 // determine the result, according to the rounding mode 02512 switch (rc) { 02513 case rc_rn: // +infinity or -infinity 02514 exponent = 0x01ffff; 02515 significand = CONST_FORMAT(0x8000000000000000); 02516 break; 02517 case rc_rm: // +max_fp or -infinity 02518 exponent = (sign == 0 ? 0x01fffe : 0x01ffff); 02519 significand = (sign == 0 ? CONST_FORMAT(0x0ffffffffffffffff) : 02520 CONST_FORMAT(0x8000000000000000)); 02521 break; 02522 case rc_rp: // +infinity or -max_fp 02523 exponent = (sign == 0 ? 0x01ffff : 0x01fffe); 02524 significand = (sign == 0 ? CONST_FORMAT(0x8000000000000000) : 02525 CONST_FORMAT(0x0ffffffffffffffff)); 02526 break; 02527 case rc_rz: // +max_fp or -max_fp 02528 exponent = 0x01fffe; 02529 significand = CONST_FORMAT(0x0ffffffffffffffff); 02530 break; 02531 default: 02532 #ifndef unix 02533 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error:\ 02534 invalid rc = %d for non-SIMD F1 instruction\n", rc) 02535 #else 02536 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error:\ 02537 invalid rc = %d for non-SIMD F1 instruction\n", rc) 02538 return (FP_EMUL_ERROR); 02539 #endif 02540 } 02541 02542 /* set O and set I in FPSR */ 02543 *pfpsr = *pfpsr | ((EM_uint64_t)5 << (6 + (EM_uint_t)sf * 13 + 10)); 02544 02545 if (!I_dis) { // update ISR code - will raise an inexact exception 02546 02547 if (exponent == 0x1ffff) 02548 fpa = 1; 02549 else 02550 fpa = 0; 02551 02552 /* clear O and set I in ISRlow - the result is always inexact */ 02553 ISRlow = (ISRlow & 0xf7ff) | 0x2000; 02554 // update the fpa bit in the ISR code (NOT DONE IN EM_FP82) 02555 if (fpa) 02556 ISRlow = ISRlow | 0x4000; 02557 else 02558 ISRlow = ISRlow & 0xbfff; 02559 02560 // Note that the exact result cannot be retrieved by the inexact 02561 // exception trap handler [but this will not occur on Merced] 02562 02563 } 02564 02565 } else if (I_dis && I_exc) { 02566 02567 // redundant - this case was caught before 02568 ; // nothing to do 02569 02570 } else { 02571 02572 // internal error 02573 #ifndef unix 02574 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02575 SWA trap code invoked with F1 instruction, w/o O or U set in ISR.code = %x\n", 02576 ISRlow & 0x0ffff) 02577 #else 02578 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02579 SWA trap code invoked with F1 instruction, w/o O or U set in ISR.code = %x\n", 02580 ISRlow & 0x0ffff) 02581 return (FP_EMUL_ERROR); 02582 #endif 02583 } 02584 02585 // return the result 02586 // FR[f1].sign is unchanged 02587 ps->state_FR[lf1].exponent = exponent; 02588 ps->state_FR[lf1].significand = significand; 02589 02590 // successful emulation, but might still raise another trap - O, U, or I 02591 02592 // set the destination floating-point reg value (this is a trap) 02593 // [redundant if the cause of the SWA trap was I && I_dis] 02594 #ifdef DEBUG_UNIX 02595 printf ("DEBUG AFTER F1 SWA TRAP 1: ps->state_FR[lf1] = %x %x %Lx\n", 02596 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 02597 ps->state_FR[lf1].significand); 02598 #endif 02599 #ifdef DEBUG_UNIX 02600 printf ("DEBUG AFTER F1 SWA TRAP: f1 = %x\n", f1); 02601 #endif 02602 02603 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 02604 if (f1 < 32) 02605 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 02606 else 02607 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 02608 02609 // return TRUE if no exception has to be raised 02610 if (I_dis || I_exc == 0) { 02611 02612 return (TRUE); 02613 02614 } else { 02615 // if (inexact && I traps enabled) 02616 // ISRlow might have been updated 02617 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 02618 02619 #ifdef DEBUG_UNIX 02620 printf ("DEBUG AFTER F1 SWA TRAP 1 A RET FALSE: ps->state_FR[lf1] = %x %x %Lx\n", 02621 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 02622 ps->state_FR[lf1].significand); 02623 #endif 02624 return (FALSE); // will raise I trap 02625 02626 } 02627 02628 } else { // SIMD instruction 02629 02630 // tmp_fp.exponent = 0x1003e 02631 02632 fpa_hi = (ISRlow >> 14) & 0x01; 02633 I_exc_hi = (ISRlow >> 13) & 0x01; // inexact = round OR sticky 02634 U_exc_hi = (ISRlow >> 12) & 0x01; // underflow 02635 O_exc_hi = (ISRlow >> 11) & 0x01; // overflow 02636 fpa_lo = (ISRlow >> 10) & 0x01; 02637 I_exc_lo = (ISRlow >> 9) & 0x01; // inexact = round OR sticky 02638 U_exc_lo = (ISRlow >> 8) & 0x01; // underflow 02639 O_exc_lo = (ISRlow >> 7) & 0x01; // overflow 02640 02641 if (!U_exc_lo && !U_exc_hi && !O_exc_lo && !O_exc_hi) { 02642 02643 // internal error 02644 #ifndef unix 02645 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02646 SWA trap code invoked with SIMD F1 instruction, w/o O or U set in \ 02647 ISR.code = %x\n", ISRlow & 0x0ffff) 02648 #else 02649 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 02650 SWA trap code invoked with SIMD F1 instruction, w/o O or U set in \ 02651 ISR.code = %x\n", ISRlow & 0x0ffff) 02652 return (FP_EMUL_ERROR); 02653 #endif 02654 02655 } 02656 02657 sign_lo = (EM_uint_t)((tmp_fp.significand >> 31) & 0x01); 02658 exponent_lo = (EM_uint_t)((tmp_fp.significand >> 23) & 0x0ff); 02659 significand_lo = (EM_uint_t)((tmp_fp.significand) & 0x07fffff); 02660 // never get here with a 0 or an unnormal result if a disabled 02661 // underflow or overflow occurred in the low half; if the low half 02662 // has neither an underflow nor an overflow, then the assignment 02663 // below might be incorrect, but significand_lo will be corrected when 02664 // the result is returned 02665 significand_lo = significand_lo | 0x800000; 02666 02667 sign_hi = (EM_uint_t)((tmp_fp.significand >> 63) & 0x01); 02668 exponent_hi = (EM_uint_t)((tmp_fp.significand >> 55) & 0x0ff); 02669 significand_hi = (EM_uint_t)((tmp_fp.significand >> 32) & 0x07fffff); 02670 // never get here with a 0 or an unnormal result if a disabled 02671 // underflow or overflow occurred in the high half; if the high half 02672 // has neither an underflow nor an overflow, then the assignment 02673 // below might be incorrect, but significand_hi will be corrected when 02674 // the result is returned 02675 significand_hi = significand_hi | 0x800000; 02676 02677 // if underflow or overflow, calculate the true biased exponent, by 02678 // possibly adding or subtracting 2^8 02679 02680 if (U_dis && U_exc_lo) { // the result is not zero, and is normal with 02681 // unbounded exponent (but tiny) 02682 02683 decr_exp_lo = 0; 02684 // if SWA trap in the low half 02685 if (fpa_lo == 1) { 02686 significand_lo = significand_lo - 1; 02687 if (significand_lo == 0x07fffff) { 02688 significand_lo = 0x0ffffff; 02689 decr_exp_lo = 1; 02690 } 02691 } 02692 02693 true_bexp_lo = (exponent_lo == 0 ? exponent_lo : exponent_lo - 0x100); 02694 02695 // true_bexp_lo - 0x07f is the true unbiased exponent after the 02696 // first IEEE rounding; determine whether the result is tiny 02697 if (true_bexp_lo - 0x07f > emin - 1) { 02698 #ifndef unix 02699 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error:non-tiny resL\n"); 02700 #else 02701 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error:non-tiny resL\n"); 02702 return (FP_EMUL_ERROR); 02703 #endif 02704 } 02705 02706 // adjust now true_bexp_lo if necessary 02707 if (decr_exp_lo) true_bexp_lo--; 02708 02709 // perform the second IEEE rounding 02710 02711 shift_cnt_lo = emin - true_bexp_lo + 0x07f; // >= 1 02712 02713 if (shift_cnt_lo <= significand_size) { // <= 24 02714 // do the actual shift to denormalize the result; the result 02715 // will be a denormal, or zero 02716 round_lo = I_exc_lo; 02717 // this is indicated even for O or U, if the result inexact 02718 sticky_lo = 0; 02719 for (ind = 0 ; ind < shift_cnt_lo ; ind++) { 02720 sticky_lo = round_lo | sticky_lo; 02721 round_lo = significand_lo & 0x01; 02722 significand_lo = significand_lo >> 1; 02723 } 02724 true_bexp_lo = true_bexp_lo + shift_cnt_lo; // e_min + 0x7f 02725 } else { // all the significand bits shift out into sticky 02726 significand_lo = 0; 02727 round_lo = 0; 02728 sticky_lo = 1; 02729 true_bexp_lo = true_bexp_lo + shift_cnt_lo; // e_min + 0x7f 02730 } 02731 02732 // perform the rounding; the result is 0, denormal, or 02733 // 1.0 x 2^emin 02734 switch (rc) { 02735 case rc_rn: 02736 lsb_lo = significand_lo & 0x01; 02737 fpa_lo = round_lo & (lsb_lo | sticky_lo); 02738 break; 02739 case rc_rm: 02740 fpa_lo = (sign_lo == 0 ? 0 : (round_lo | sticky_lo)); 02741 break; 02742 case rc_rp: 02743 fpa_lo = (sign_lo == 1 ? 0 : (round_lo | sticky_lo)); 02744 break; 02745 case rc_rz: 02746 fpa_lo = 0; 02747 break; 02748 default: 02749 #ifndef unix 02750 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 02751 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 02752 #else 02753 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 02754 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 02755 return (FP_EMUL_ERROR); 02756 #endif 02757 } 02758 02759 02760 // add fpa_lo to the significand if fpa_lo = 1, and fix for the 02761 // case when there is a carry-out in this addition 02762 02763 if (fpa_lo == 1) { 02764 significand_lo = significand_lo + 1; 02765 02766 if (significand_lo == 0x01000000) { 02767 significand_lo = 0x0800000; 02768 true_bexp_lo++; // e_min + 0x7f + 0x001 02769 } 02770 } 02771 02772 if (significand_lo == 0) { 02773 true_bexp_lo = 0; // otherwise it is e_min or e_min + 1 02774 } 02775 02776 exponent_lo = true_bexp_lo; 02777 02778 if ((exponent_lo == 0x01) && ((significand_lo & 0x0800000) == 0)) { 02779 exponent_lo = 0x0; // result low is denormal 02780 #ifdef DEBUG_UNIX 02781 printf ("DEBUG: result low is denormal\n"); 02782 #endif 02783 } 02784 02785 // determine the new value of I_exc_lo 02786 I_exc_lo = round_lo | sticky_lo; 02787 02788 // the low half could only raise an inexact exception; if it does 02789 // not, clear U_exc_lo, I_exc_lo, and fpa_lo in ISRlow 02790 02791 if (!I_exc_lo) { 02792 02793 // if exact, then no new exception has to be raised for the low 02794 // half of the instruction; clear U_exc_lo in ISRlow (an exception 02795 // might be raised by the high half of the instruction) [clearing 02796 // I is redundant: it could not have been inexact and then become 02797 // exact after denormalization] 02798 02799 ISRlow = ISRlow & 0xfeff; 02800 // need to clean ISRlow because the other half might raise exc. 02801 02802 // FPSR in the trap frame needs no update for the low half: with 02803 // U traps disabled, the result would have to be tiny and inexact 02804 // in order to set the U flag; both U and I flags in the FPSR 02805 // stay unchanged 02806 U_exc_lo = 0; 02807 02808 // no exception raised for the low half - clear fpa_lo (an 02809 // exception might be raised by the high half of the instruction) 02810 ISRlow = ISRlow & 0xfbff; 02811 02812 } else { // if (I_exc_lo) 02813 02814 // if tiny and inexact will set U and I in FPSR, as U_exc_lo = 1 02815 if (I_dis) { // the low half will not raise an inexact exception 02816 // clear I_exc_lo and U_exc_lo in ISRlow (prepare ISRlow 02817 // because the high half might raise an exception); no exception 02818 // raised for the low half - clear also fpa_lo 02819 ISRlow = ISRlow & 0xf8ff; 02820 } else { // update ISR code - will raise an inexact trap 02821 // clear U_lo and set I_lo in ISRlow - will raise inexact trap 02822 ISRlow = (ISRlow & 0xfeff) | 0x0200; 02823 // update the fpa_lo bit in the ISR code (NOT DONE IN EM_FP82) 02824 if (fpa_lo) 02825 ISRlow = ISRlow | 0x0400; 02826 else 02827 ISRlow = ISRlow & 0xfbff; 02828 02829 } 02830 02831 } 02832 02833 } 02834 02835 if (O_dis && O_exc_lo) { // the result is not zero, and is normal with 02836 // unbounded exponent 02837 02838 // true_bexp_lo not used in this case, and neither is decr_exp_lo 02839 // true_bexp_lo = 02840 // (exponent_lo == 0xff ? exponent_lo : exponent_lo + 0x100); 02841 02842 // determine the result, according to the rounding mode 02843 switch (rc) { 02844 case rc_rn: // +infinity or -infinity 02845 exponent_lo = 0x0ff; 02846 significand_lo = 0x0800000; 02847 break; 02848 case rc_rm: // +max_fp or -infinity 02849 exponent_lo = (sign_lo == 0 ? 0x0fe : 0x0ff); 02850 significand_lo = (sign_lo == 0 ? 0x0ffffff : 0x0800000); 02851 break; 02852 case rc_rp: // +infinity or -max_fp 02853 exponent_lo = (sign_lo == 0 ? 0x0ff : 0x0fe); 02854 significand_lo = (sign_lo == 0 ? 0x0800000 : 0x0ffffff); 02855 break; 02856 case rc_rz: // +max_fp or -max_fp 02857 exponent_lo = 0x0fe; 02858 significand_lo = 0x0ffffff; 02859 break; 02860 default: 02861 #ifndef unix 02862 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 02863 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 02864 #else 02865 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 02866 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 02867 return (FP_EMUL_ERROR); 02868 #endif 02869 } 02870 02871 if (exponent_lo == 0xff) 02872 fpa_lo = 1; 02873 else 02874 fpa_lo = 0; 02875 02876 // will update FPSR 02877 O_exc_lo = 1; // must have been so already 02878 I_exc_lo = 1; 02879 02880 if (I_dis) { 02881 02882 // clear fpa_lo, I_exc_lo and O_exc_lo in ISRlow - an enabled 02883 // exception might be raised for the high half 02884 ISRlow = ISRlow & 0xf97f; 02885 02886 } else { // if (!I_dis) 02887 02888 // update ISR code - will raise an inexact exception for low half; 02889 // clear O_exc_lo and set I_exc_lo in ISRlow - the result 02890 // is always inexact 02891 ISRlow = (ISRlow & 0xff7f) | 0x0200; 02892 // update the fpa_lo bit 02893 if (fpa_lo) 02894 ISRlow = ISRlow | 0x0400; 02895 else 02896 ISRlow = ISRlow & 0xfbff; 02897 // Note that the exact result cannot be retrieved by the inexact 02898 // exception trap handler [but this will not occur on Merced] 02899 02900 } 02901 02902 } 02903 02904 if (I_dis && I_exc_lo) { 02905 02906 ; // nothing to do (keep this as a place holder) 02907 // redundant - this case was caught before 02908 02909 } 02910 02911 if (U_dis && U_exc_hi) { // the result is not zero, and is normal with 02912 // unbounded exponent (but tiny) 02913 02914 decr_exp_hi = 0; 02915 // if SWA trap in the high half 02916 if (fpa_hi == 1) { 02917 significand_hi = significand_hi - 1; 02918 if (significand_hi == 0x07fffff) { 02919 significand_hi = 0x0ffffff; 02920 decr_exp_hi = 1; 02921 } 02922 } 02923 02924 true_bexp_hi = (exponent_hi == 0 ? exponent_hi : exponent_hi - 0x100); 02925 02926 // true_bexp_hi - 0x07f is the true unbiased exponent after the 02927 // first IEEE rounding; determine whether the result is tiny 02928 if (true_bexp_hi - 0x07f > emin - 1) { 02929 #ifndef unix 02930 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error:non-tiny resH\n"); 02931 #else 02932 FP_EMULATION_ERROR0 ("fp_emulate () Internal Error:non-tiny resH\n"); 02933 return (FP_EMUL_ERROR); 02934 #endif 02935 } 02936 02937 // adjust now true_bexp_hi if necessary 02938 if (decr_exp_hi) true_bexp_hi--; 02939 02940 // perform the second IEEE rounding 02941 02942 shift_cnt_hi = emin - true_bexp_hi + 0x07f; // >= 1 02943 02944 if (shift_cnt_hi <= significand_size) { // <= 24 02945 // do the actual shift to denormalize the result; the result 02946 // will be a denormal, or zero 02947 round_hi = I_exc_hi; 02948 // this is indicated even for O or U, if the result inexact 02949 sticky_hi = 0; 02950 for (ind = 0 ; ind < shift_cnt_hi ; ind++) { 02951 sticky_hi = round_hi | sticky_hi; 02952 round_hi = significand_hi & 0x01; 02953 significand_hi = significand_hi >> 1; 02954 } 02955 true_bexp_hi = true_bexp_hi + shift_cnt_hi; // e_min + 0x7f 02956 } else { // all the significand bits shift out into sticky 02957 significand_hi = 0; 02958 round_hi = 0; 02959 sticky_hi = 1; 02960 true_bexp_hi = true_bexp_hi + shift_cnt_hi; // e_min + 0x7f 02961 } 02962 02963 // perform the rounding; the result is 0, denormal, or 02964 // 1.0 x 2^emin 02965 switch (rc) { 02966 case rc_rn: 02967 lsb_hi = significand_hi & 0x01; 02968 fpa_hi = round_hi & (lsb_hi | sticky_hi); 02969 break; 02970 case rc_rm: 02971 fpa_hi = (sign_hi == 0 ? 0 : (round_hi | sticky_hi)); 02972 break; 02973 case rc_rp: 02974 fpa_hi = (sign_hi == 1 ? 0 : (round_hi | sticky_hi)); 02975 break; 02976 case rc_rz: 02977 fpa_hi = 0; 02978 break; 02979 default: 02980 #ifndef unix 02981 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 02982 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 02983 #else 02984 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 02985 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 02986 return (FP_EMUL_ERROR); 02987 #endif 02988 } 02989 02990 02991 // add fpa_hi to the significand if fpa = 1, and fix for the 02992 // case when there is a carry-out in this addition 02993 02994 if (fpa_hi == 1) { 02995 significand_hi = significand_hi + 1; 02996 02997 if (significand_hi == 0x01000000) { 02998 significand_hi = 0x0800000; 02999 true_bexp_hi++; // e_min + 0x7f + 0x01 03000 } 03001 } 03002 03003 if (significand_hi == 0) { 03004 true_bexp_hi = 0; // otherwise it is e_min or e_min + 1 03005 } 03006 03007 exponent_hi = true_bexp_hi; 03008 03009 if ((exponent_hi == 0x01) && ((significand_hi & 0x0800000) == 0)) { 03010 exponent_hi = 0x0; // result high is denormal 03011 #ifdef DEBUG_UNIX 03012 printf ("DEBUG: result high is denormal\n"); 03013 #endif 03014 } 03015 03016 // determine the new value of I_exc_hi 03017 I_exc_hi = round_hi | sticky_hi; 03018 03019 // the high half could only raise an inexact exception; if it does 03020 // not, clear U_exc_hi, I_exc_hi, and fpa_hi in ISRlow 03021 03022 if (!I_exc_hi) { 03023 03024 // if exact, then no new exception has to be raised for the high 03025 // half of the instruction; clear U_exc_hi in ISRlow (an exception 03026 // might be raised by the high half of the instruction) [clearing 03027 // I is redundant: it could not have been inexact and then become 03028 // exact after denormalization] 03029 03030 ISRlow = ISRlow & 0xefff; 03031 // need to clean ISRlow because the other half might raise exc. 03032 03033 // FPSR in the trap frame needs no update for the high half: with 03034 // U traps disabled, the result would have to be tiny and inexact 03035 // in order to set the U flag; both U and I flags in the FPSR 03036 // stay unchanged 03037 U_exc_hi = 0; 03038 03039 // no exception raised for the high half - clear fpa_hi (an 03040 // exception might be raised by the low half of the instruction) 03041 ISRlow = ISRlow & 0xbfff; 03042 03043 } else { // if (I_exc_hi) 03044 03045 // if tiny and inexact will set U and I in FPSR, as U_exc_hi = 1 03046 if (I_dis) { // the high half will not raise an inexact exception 03047 // clear I_exc_hi and U_exc_hi in ISRlow (prepare ISRlow 03048 // because the low half might raise an exception); no exception 03049 // raised for the high half - clear also fpa_hi 03050 ISRlow = ISRlow & 0x8fff; 03051 } else { // update ISR code - will raise an inexact trap 03052 // clear U_hi and set I_hi in ISRlow - will raise inexact trap 03053 ISRlow = (ISRlow & 0xefff) | 0x2000; 03054 // update the fpa_hi bit in the ISR code (NOT DONE IN EM_FP82) 03055 if (fpa_hi) 03056 ISRlow = ISRlow | 0x4000; 03057 else 03058 ISRlow = ISRlow & 0xbfff; 03059 } 03060 03061 } 03062 03063 } 03064 03065 if (O_dis && O_exc_hi) { // the result is not zero, and is normal with 03066 // unbounded exponent 03067 03068 // true_bexp_hi not used in this case, and neither is decr_exp_hi 03069 // true_bexp_hi = 03070 // (exponent_hi == 0xff ? exponent_hi : exponent_hi + 0x100); 03071 03072 // determine the result, according to the rounding mode 03073 switch (rc) { 03074 case rc_rn: // +infinity or -infinity 03075 exponent_hi = 0x0ff; 03076 significand_hi = 0x0800000; 03077 break; 03078 case rc_rm: // +max_fp or -infinity 03079 exponent_hi = (sign_hi == 0 ? 0x0fe : 0x0ff); 03080 significand_hi = (sign_hi == 0 ? 0x0ffffff : 0x0800000); 03081 break; 03082 case rc_rp: // +infinity or -max_fp 03083 exponent_hi = (sign_hi == 0 ? 0x0ff : 0x0fe); 03084 significand_hi = (sign_hi == 0 ? 0x0800000 : 0x0ffffff); 03085 break; 03086 case rc_rz: // +max_fp or -max_fp 03087 exponent_hi = 0x0fe; 03088 significand_hi = 0x0ffffff; 03089 break; 03090 default: 03091 #ifndef unix 03092 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 03093 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 03094 #else 03095 FP_EMULATION_ERROR1 ("fp_emulate () Internal \ 03096 Error: invalid rc = %d for SIMD F1 instruction\n", rc) 03097 return (FP_EMUL_ERROR); 03098 #endif 03099 } 03100 03101 if (exponent_hi == 0xff) 03102 fpa_hi = 1; 03103 else 03104 fpa_hi = 0; 03105 03106 // will update FPSR 03107 O_exc_hi = 1; // must have been so already 03108 I_exc_hi = 1; 03109 03110 if (I_dis) { 03111 03112 // clear fpa_hi, I_exc_hi and O_exc_hi in ISRlow - an enabled U 03113 // exception might be raised for the low half 03114 ISRlow = ISRlow & 0x97ff; 03115 03116 } else { // if (!I_dis) 03117 03118 // update ISR code - will raise an inexact exception for high half; 03119 // clear O_exc_hi and set I_exc_hi in ISRlow - the result 03120 // is always inexact 03121 ISRlow = (ISRlow & 0xf7ff) | 0x2000; 03122 // update the fpa_hi bit 03123 if (fpa_hi) 03124 ISRlow = ISRlow | 0x4000; 03125 else 03126 ISRlow = ISRlow & 0xbfff; 03127 // Note that the exact result cannot be retrieved by the inexact 03128 // exception trap handler [but this will not occur on Merced] 03129 03130 } 03131 03132 } 03133 03134 if (I_dis && I_exc_hi) { 03135 03136 ; // nothing to do (keep this as a place holder) 03137 // redundant - this case was caught before 03138 03139 } 03140 03141 // return the result 03142 low_half = (sign_lo << 31) | (exponent_lo << 23) | 03143 significand_lo & 0x07fffff; 03144 03145 high_half = (sign_hi << 31) | (exponent_hi << 23) | 03146 significand_hi & 0x07fffff; 03147 03148 ps->state_FR[lf1].significand = high_half << 32 | low_half; 03149 03150 // Note: the exponent is 0x1003e even for a significand of 0 (ACR 131) 03151 03152 // successful emulation 03153 03154 // set the destination floating-point register value (this is a trap) 03155 #ifdef DEBUG_UNIX 03156 printf ("DEBUG AFTER F1 SWA TRAP 2: ps->state_FR[lf1] = %x %x %Lx\n", 03157 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 03158 ps->state_FR[lf1].significand); 03159 #endif 03160 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 03161 if (f1 < 32) 03162 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 03163 else 03164 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 03165 03166 // update the FPSR (but will not be able to distinguish lo from hi) 03167 if (I_exc_lo || I_exc_hi) { // set I in FPSR 03168 // I traps must be disabled at this point 03169 *pfpsr = *pfpsr | 03170 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 12)); 03171 } // else leave unchanged the sticky bit I 03172 if (U_exc_lo || U_exc_hi) { // set U in FPSR 03173 // U traps must be disabled at this point 03174 *pfpsr = *pfpsr | 03175 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 11)); 03176 } // else leave unchanged the sticky bit U 03177 if (O_exc_lo || O_exc_hi) { // set O in FPSR 03178 // O traps must be disabled at this point 03179 *pfpsr = *pfpsr | 03180 ((EM_uint64_t)1 << (6 + (EM_uint_t)sf * 13 + 10)); 03181 } // else leave unchanged the sticky bit O 03182 03183 // return TRUE if no exception has to be raised 03184 if ((I_dis || I_exc_lo == 0 && I_exc_hi == 0) && 03185 (U_dis || U_exc_lo == 0 && U_exc_hi == 0) && 03186 (O_dis || O_exc_lo == 0 && O_exc_hi == 0)) { 03187 03188 return (TRUE); 03189 03190 } else { 03191 03192 // if ((low inexact || high inexact) && I traps enabled or 03193 // (low underflow || high underflow) && U traps enabled or 03194 // (low overflow || high overflow) && O traps enabled) 03195 03196 // ISRlow might have been updated 03197 *pisr = ((*pisr) & 0xffffffffffff0000) | ISRlow; 03198 03199 return (FALSE | SIMD_INSTRUCTION); // will raise O, U, or I trap 03200 03201 } 03202 03203 } 03204 03205 } else { 03206 03207 // unrecognized instruction type 03208 #ifndef unix 03209 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03210 instruction opcode %8x %8x not valid for SWA trap\n", OpCode) 03211 #else 03212 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03213 instruction opcode %Lx not valid for SWA trap\n", OpCode) 03214 return (FP_EMUL_ERROR); 03215 #endif 03216 03217 } 03218 03219 // end 'else if (trap_type == FPTRAP && swa_trap (sf, FPSR, ISRlow))' 03220 03221 } else if (trap_type == FPFLT || trap_type == FPTRAP) { 03222 03223 #ifdef DEBUG_UNIX 03224 printf ("DEBUG: INSTRUCTION NOT EMULATED\n"); 03225 #endif 03226 03227 // if we got here, the trapping instruction was not emulated, because it 03228 // did not raise a SWA fault or trap. This includes the cases when a 03229 // V or Z fault or an U, O, or I trap occurred, and the exceptions 03230 // raised were enabled 03231 03232 // determine whether this is a non-SIMD, or a SIMD instruction 03233 03234 if ((OpCode & F1_MIN_MASK) == F1_PATTERN) { 03235 // F1 instruction 03236 03237 switch (OpCode & F1_MASK) { 03238 case FMA_PATTERN: 03239 case FMA_S_PATTERN: 03240 case FMA_D_PATTERN: 03241 case FMS_PATTERN: 03242 case FMS_S_PATTERN: 03243 case FMS_D_PATTERN: 03244 case FNMA_PATTERN: 03245 case FNMA_S_PATTERN: 03246 case FNMA_D_PATTERN: 03247 SIMD_instruction = 0; 03248 break; 03249 case FPMA_PATTERN: 03250 case FPMS_PATTERN: 03251 case FPNMA_PATTERN: 03252 SIMD_instruction = 1; 03253 break; 03254 default: 03255 // unrecognized instruction type 03256 #ifndef unix 03257 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03258 instruction opcode %8x %8x not recognized\n", OpCode) 03259 #else 03260 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03261 instruction opcode %Lx not recognized\n", OpCode) 03262 return (FP_EMUL_ERROR); 03263 #endif 03264 } 03265 03266 } else if ((OpCode & F4_MIN_MASK) == F4_PATTERN) { 03267 // F4 instruction 03268 03269 switch (OpCode & F4_MASK) { 03270 case FCMP_EQ_PATTERN: 03271 case FCMP_LT_PATTERN: 03272 case FCMP_LE_PATTERN: 03273 case FCMP_UNORD_PATTERN: 03274 case FCMP_EQ_UNC_PATTERN: 03275 case FCMP_LT_UNC_PATTERN: 03276 case FCMP_LE_UNC_PATTERN: 03277 case FCMP_UNORD_UNC_PATTERN: 03278 SIMD_instruction = 0; 03279 break; 03280 default: 03281 // unrecognized instruction type 03282 #ifndef unix 03283 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03284 instruction opcode %8x %8x not recognized\n", OpCode) 03285 #else 03286 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03287 instruction opcode %Lx not recognized\n", OpCode) 03288 return (FP_EMUL_ERROR); 03289 #endif 03290 } 03291 03292 } else if ((OpCode & F6_MIN_MASK) == F6_PATTERN) { 03293 // F6 instruction 03294 03295 switch (OpCode & F6_MASK) { 03296 case FRCPA_PATTERN: 03297 SIMD_instruction = 0; 03298 break; 03299 case FPRCPA_PATTERN: 03300 SIMD_instruction = 1; 03301 break; 03302 default: 03303 // unrecognized instruction type 03304 #ifndef unix 03305 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03306 instruction opcode %8x %8x not recognized\n", OpCode) 03307 #else 03308 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03309 instruction opcode %Lx not recognized\n", OpCode) 03310 return (FP_EMUL_ERROR); 03311 #endif 03312 } 03313 03314 } else if ((OpCode & F7_MIN_MASK) == F7_PATTERN) { 03315 // F7 instruction 03316 03317 switch (OpCode & F7_MASK) { 03318 case FRSQRTA_PATTERN: 03319 SIMD_instruction = 0; 03320 break; 03321 case FPRSQRTA_PATTERN: 03322 SIMD_instruction = 1; 03323 break; 03324 default: 03325 // unrecognized instruction type 03326 #ifndef unix 03327 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03328 instruction opcode %8x %8x not recognized\n", OpCode) 03329 #else 03330 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03331 instruction opcode %Lx not recognized\n", OpCode) 03332 return (FP_EMUL_ERROR); 03333 #endif 03334 } 03335 03336 } else if ((OpCode & F8_MIN_MASK) == F8_PATTERN) { 03337 // F8 instruction 03338 03339 switch (OpCode & F8_MASK) { 03340 case FMIN_PATTERN: 03341 case FMAX_PATTERN: 03342 case FAMIN_PATTERN: 03343 case FAMAX_PATTERN: 03344 SIMD_instruction = 0; 03345 break; 03346 case FPMIN_PATTERN: 03347 case FPMAX_PATTERN: 03348 case FPAMIN_PATTERN: 03349 case FPAMAX_PATTERN: 03350 case FPCMP_EQ_PATTERN: 03351 case FPCMP_LT_PATTERN: 03352 case FPCMP_LE_PATTERN: 03353 case FPCMP_UNORD_PATTERN: 03354 case FPCMP_NEQ_PATTERN: 03355 case FPCMP_NLT_PATTERN: 03356 case FPCMP_NLE_PATTERN: 03357 case FPCMP_ORD_PATTERN: 03358 SIMD_instruction = 1; 03359 break; 03360 default: 03361 // unrecognized instruction type 03362 #ifndef unix 03363 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03364 instruction opcode %8x %8x not recognized\n", OpCode) 03365 #else 03366 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03367 instruction opcode %Lx not recognized\n", OpCode) 03368 return (FP_EMUL_ERROR); 03369 #endif 03370 } 03371 03372 } else if ((OpCode & F10_MIN_MASK) == F10_PATTERN) { 03373 // F10 instruction 03374 03375 switch (OpCode & F10_MASK) { 03376 case FCVT_FX_PATTERN: 03377 case FCVT_FXU_PATTERN: 03378 case FCVT_FX_TRUNC_PATTERN: 03379 case FCVT_FXU_TRUNC_PATTERN: 03380 SIMD_instruction = 0; 03381 break; 03382 case FPCVT_FX_PATTERN: 03383 case FPCVT_FXU_PATTERN: 03384 case FPCVT_FX_TRUNC_PATTERN: 03385 case FPCVT_FXU_TRUNC_PATTERN: 03386 SIMD_instruction = 1; 03387 break; 03388 default: 03389 // unrecognized instruction type 03390 #ifndef unix 03391 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03392 instruction opcode %8x %8x not recognized\n", OpCode) 03393 #else 03394 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03395 instruction opcode %Lx not recognized\n", OpCode) 03396 return (FP_EMUL_ERROR); 03397 #endif 03398 } 03399 03400 } else { 03401 03402 // unrecognized instruction type 03403 #ifndef unix 03404 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03405 instruction opcode %8x %8x not recognized\n", OpCode) 03406 #else 03407 FP_EMULATION_ERROR1 ("fp_emulate () Internal Error: \ 03408 instruction opcode %Lx not recognized\n", OpCode) 03409 return (FP_EMUL_ERROR); 03410 #endif 03411 03412 } 03413 03414 #ifdef DEBUG_UNIX 03415 if ((OpCode & F1_MIN_MASK) == F1_PATTERN) { 03416 printf ("DEBUG AFTER 'NO EMULATION' RET FALSE: ps->state_FR[lf1] = %x %x %Lx\n", 03417 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 03418 ps->state_FR[lf1].significand); 03419 } 03420 #endif 03421 03422 // no emulation; ISRlow and AR[0].uint_value (FPSR) have not changed 03423 if (SIMD_instruction) 03424 return (FALSE | SIMD_INSTRUCTION); // will raise fp exception 03425 else 03426 return (FALSE); // will raise fp exception 03427 03428 } else { 03429 03430 #ifdef DEBUG_UNIX 03431 printf ("DEBUG: STATUS FLOAT ERROR\n"); 03432 #endif 03433 03434 // fp_emulate () called w/o 03435 // trap_frame->type == 0x020 || trap_fram->type == 0x021 03436 #ifndef unix 03437 FP_EMULATION_ERROR2 ("fp_emulate () Internal Error: \ 03438 fp_emulate () called w/o trap_type FPFLT or FPTRAP \ 03439 OpCode = %8x %8x, and ISR code = %x\n", OpCode, ISRlow) 03440 #else 03441 FP_EMULATION_ERROR2 ("fp_emulate () Internal Error: \ 03442 fp_emulate () called w/o trap_type FPFLT or FPTRAP \ 03443 OpCode = %Lx, and ISR code = %x\n", OpCode, ISRlow) 03444 return (FP_EMUL_ERROR); 03445 #endif 03446 03447 } 03448 03449 new_exception: 03450 03451 // can get here only if 03452 // trap_type == FPFLT, and 03453 // the emulation generated a new FP exception (e.g. if an fma with 03454 // denormal input[s] causes an underflow, and the underflow traps are 03455 // enabled; also [but not only] if an fma has a denormal input, and denormal 03456 // exceptions are enabled) 03457 03458 new_trap_type = ps->trap_type; 03459 03460 if (new_trap_type == FPFLT) { // fault 03461 03462 // this is a fault - it must be a denormal exception or an invalid 03463 // exception (the latter only for fcvt.fxu[.trunc] with negative 03464 // unnormal input) 03465 fault_ISR_code = (ps->state_MERCED_RTL >> 16) & 0x0ffff; 03466 if ((fault_ISR_code & 0x077) == 0) { // SWA fault only 03467 #ifndef unix 03468 FP_EMULATION_ERROR0 ( 03469 "fp_emulate () Internal Error: SWA fault repeated\n"); 03470 #else 03471 FP_EMULATION_ERROR0 ( 03472 "fp_emulate () Internal Error: SWA fault repeated\n"); 03473 return (FP_EMUL_ERROR); 03474 #endif 03475 } 03476 03477 #ifdef DEBUG_UNIX 03478 printf ("DEBUG fp_emulate () NEW_EXC fault_ISR_code = %x\n", fault_ISR_code); 03479 #endif 03480 03481 #ifdef DEBUG_UNIX 03482 if ((OpCode & F1_MIN_MASK) == F1_PATTERN) 03483 printf ("DEBUG NEW_EXC FAULT PART RET FALSE: ps->state_FR[lf1] = %x %x %Lx\n", 03484 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 03485 ps->state_FR[lf1].significand); 03486 #endif 03487 03488 // update the ISR code, to pass the new value of ISR.code to the 03489 // user 03490 *pisr = ((*pisr) & 0xffffffffffff0000) | fault_ISR_code; 03491 03492 // the FPSR is unchanged 03493 if (SIMD_instruction) 03494 return (FALSE | SIMD_INSTRUCTION); // will raise fp exception 03495 else 03496 return (FALSE); // will raise fp exception 03497 03498 } else if (new_trap_type == FPTRAP) { // trap 03499 03500 // this is a trap 03501 trap_ISR_code = (ps->state_MERCED_RTL >> 16) & 0x0ffff; 03502 03503 #ifdef DEBUG_UNIX 03504 printf ("DEBUG fp_emulate () NEW_EXC trap_ISR_code = %4.4x\n", trap_ISR_code); 03505 #endif 03506 03507 // set the destination floating-point reg value, as this is a trap 03508 #ifdef DEBUG_UNIX 03509 printf ("DEBUG NEW_EXC TRAP PART RET FALSE: ps->state_FR[lf1] = %x %x %Lx\n", 03510 ps->state_FR[lf1].sign, ps->state_FR[lf1].exponent, 03511 ps->state_FR[lf1].significand); 03512 printf ("DEBUG NEW_EXC TRAP PART RET FALSE: f1 f2 f3 f4 = %x %x %x %x\n", 03513 f1, f2, f3, f4); 03514 #endif 03515 set_fp_register (f1, FPRegToFP128(ps->state_FR[lf1]), fp_state); 03516 if (f1 < 32) 03517 *pipsr = *pipsr | (EM_uint64_t)0x10; // set mfl bit 03518 else 03519 *pipsr = *pipsr | (EM_uint64_t)0x20; // set mfh bit 03520 03521 // replace trap_ISR_code in the updated ISR code (nothing else changes) 03522 *pisr = ((*pisr) & 0xffffffffffff0000) | trap_ISR_code; 03523 03524 // copy ps->state_AR[0] back into the trap frame FPSR 03525 *pfpsr = ps->state_AR[0].uint_value; 03526 03527 // caller will advance instruction pointer iip 03528 if (SIMD_instruction) 03529 return (FALSE | FAULT_TO_TRAP | SIMD_INSTRUCTION); // will raise fp exc 03530 else 03531 return (FALSE | FAULT_TO_TRAP); // will raise fp exception 03532 03533 } else { 03534 #ifndef unix 03535 FP_EMULATION_ERROR1 ( 03536 "fp_emulate () Internal Error: new_trap_type = %x\n", new_trap_type) 03537 #else 03538 FP_EMULATION_ERROR1 ( 03539 "fp_emulate () Internal Error: new_trap_type = %x\n", new_trap_type) 03540 return (FP_EMUL_ERROR); 03541 #endif 03542 } 03543 03544 } 03545 03546 03547 03548 int 03549 swa_trap (EM_opcode_sf_type sf, EM_uint64_t FPSR, EM_uint_t ISRlow) 03550 03551 { 03552 03553 int IsSWA; 03554 int U_en, U_exc; 03555 int O_en, O_exc; 03556 03557 // this assumes that for a non-SIMD instruction, the "low" bits in 03558 // ISRlow are 0 03559 03560 // SWA trap for U if 03561 // bit 0 in ISR.code is set [fp trap] [redundant] 03562 // and 03563 // (bit 8 or bit 12 in ISR.code is set) [U normal, or SIMD low or high] 03564 // and 03565 // (sfx not 0 and bit 6 in FPSR.sfx set [sfx != 0 and traps disabled in 03566 // or bit 4 in FPSR set) sfx or U traps disabled] 03567 // 03568 03569 IsSWA = (ISRlow & 0x01) && (ISRlow & 0x100 | ISRlow & 0x1000) && 03570 ((EM_uint_t)sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 03571 ((FPSR >> 4) & 0x01)); 03572 03573 // SWA trap for O if 03574 // bit 0 in ISR.code is set [fp trap] [redundant] 03575 // and 03576 // (bit 7 or bit 11 in ISR.code is set) [O normal, or SIMD low or high] 03577 // and 03578 // (sfx not 0 and bit 6 in FPSR.sfx set [traps disabled in sfx] 03579 // or bit 3 in FPSR set) [O traps disabled] 03580 // 03581 03582 IsSWA = IsSWA || 03583 ((ISRlow & 0x01) && (ISRlow & 0x080 | ISRlow & 0x0800) && 03584 ((EM_uint_t)sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) || 03585 ((FPSR >> 3) & 0x01))); 03586 03587 #ifdef DEBUG_UNIX 03588 if (IsSWA && ((ISRlow & 0x1980) == 0)) 03589 printf ("DEBUG swa_trap () WARNING: IsSWA and ((ISRlow & 0x1980) == 0)\n"); 03590 #endif 03591 03592 // U_en = ((EM_uint_t)sf == 0 || td == 0) && FPSR_4 == 0 03593 U_en = ((EM_uint_t)sf == 0 || 03594 !((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01)) && 03595 !((FPSR >> 4) & 0x01); 03596 // U exc if 03597 // bit 0 in ISR.code is set [fp trap] [redundant] 03598 // and 03599 // (bit 8 or bit 12 in ISR.code is set) [U normal, or SIMD low or high] 03600 // and 03601 // U traps are enabled 03602 U_exc = (ISRlow & 0x01) && (ISRlow & 0x100 | ISRlow & 0x1000) && U_en; 03603 03604 // O_en = ((EM_uint_t)sf == 0 || td == 0) && FPSR_3 == 0 03605 O_en = ((EM_uint_t)sf == 0 || 03606 !((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01)) && 03607 !((FPSR >> 3) & 0x01); 03608 // O exc if 03609 // bit 0 in ISR.code is set [fp trap] [redundant] 03610 // and 03611 // (bit 7 or bit 11 in ISR.code is set) [O normal, or SIMD low or high] 03612 // and 03613 // O traps are enabled 03614 O_exc = (ISRlow & 0x01) && (ISRlow & 0x080 | ISRlow & 0x0800) && O_en; 03615 03616 // if no U exc and no O exc and no SWA trap due to U or O, then check I 03617 if (!U_exc && !O_exc && !IsSWA) { 03618 03619 // SWA trap for I if 03620 // bit 0 in ISR.code is set [fp trap] [redundant] 03621 // and 03622 // (bit 9 or bit 13 in ISR.code is set) [I normal, or SIMD low or high] 03623 // and 03624 // (sfx not 0 and bit 6 in FPSR.sfx set [sfx != 0 and traps disabled in 03625 // or bit 5 in FPSR set) sfx or U traps disabled] 03626 // 03627 03628 IsSWA = (ISRlow & 0x01) && (ISRlow & 0x200 | ISRlow & 0x2000) && 03629 ((EM_uint_t)sf != 0 && ((FPSR >> (6 + 6 + 13 * (EM_uint_t)sf)) & 0x01) 03630 || ((FPSR >> 5) & 0x01)); 03631 03632 } 03633 03634 return (IsSWA); 03635 03636 } 03637 03638 03639 EM_fp_reg_type 03640 FP128ToFPReg (FLOAT128_TYPE f128) 03641 03642 { 03643 03644 EM_fp_reg_type freg; 03645 char *p; 03646 EM_uint64_t *ui64; 03647 03648 03649 p = (char *)(&f128); 03650 ui64 = (EM_uint64_t *)(&f128); 03651 03652 freg.sign = (p[10] >> 1) & 0x01; 03653 freg.exponent = ((unsigned int)(p[10] & 0x01) << 16) | 03654 ((unsigned int)(p[9] & 0xff) << 8) | ((unsigned int)(p[8] & 0xff)); 03655 freg.significand = *ui64; 03656 03657 return (freg); 03658 03659 } 03660 03661 03662 03663 FLOAT128_TYPE 03664 FPRegToFP128 (EM_fp_reg_type freg) 03665 03666 { 03667 03668 FLOAT128_TYPE f128; 03669 03670 03671 f128.loFlt64 = freg.significand; 03672 03673 f128.hiFlt64 = ((long)(freg.sign) << 17) | ((long)freg.exponent); 03674 03675 return (f128); 03676 03677 }

Generated on Sat May 15 19:40:01 2004 for test by doxygen 1.3.7