From 29589a9b78f54db3c74c8e00f47121b2f8efaf6b Mon Sep 17 00:00:00 2001 From: Lu Yahan Date: Mon, 21 Feb 2022 11:25:32 +0800 Subject: [PATCH] deps: V8: cherry-pick 77d515484864 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original commit message: [riscv64] Move explicit specialization into .cc file Building with Gcc-10 causes error "explicit specialization in non-namespace scope". This change fixes it. Bug: v8:12649 Change-Id: I36b2b042b336c2dfd32ba5541fdbbdb8dc8b4fd7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3473997 Reviewed-by: ji qiu Commit-Queue: ji qiu Cr-Commit-Position: refs/heads/main@{#79185} Refs: https://github.com/v8/v8/commit/77d515484864984f721d6726610f314982ac44d2 PR-URL: https://github.com/nodejs/node/pull/42067 Refs: https://github.com/v8/v8/commit/b66334313c8bd73b253d0779f59f3e8656967043 Reviewed-By: Michaël Zasso Reviewed-By: Jiawen Geng Reviewed-By: Richard Lau Reviewed-By: Colin Ihrig Reviewed-By: Mary Marchini Reviewed-By: Juan José Arboleda Reviewed-By: James M Snell Reviewed-By: Stewart X Addison --- .../execution/riscv64/simulator-riscv64.cc | 1283 +++++++++++++++++ .../src/execution/riscv64/simulator-riscv64.h | 110 ++ 2 files changed, 1393 insertions(+) diff --git a/deps/v8/src/execution/riscv64/simulator-riscv64.cc b/deps/v8/src/execution/riscv64/simulator-riscv64.cc index 3ec0c0e81179bb..59d5f5486226e9 100644 --- a/deps/v8/src/execution/riscv64/simulator-riscv64.cc +++ b/deps/v8/src/execution/riscv64/simulator-riscv64.cc @@ -59,6 +59,1289 @@ #include "src/heap/combined-heap.h" #include "src/runtime/runtime-utils.h" #include "src/utils/ostreams.h" +#include "src/utils/utils.h" + +// The following code about RVV was based from: +// https://github.com/riscv/riscv-isa-sim +// Copyright (c) 2010-2017, The Regents of the University of California +// (Regents). All Rights Reserved. + +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the Regents nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. + +// IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, +// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF +// REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +// HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +// MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +static inline bool is_aligned(const unsigned val, const unsigned pos) { + return pos ? (val & (pos - 1)) == 0 : true; +} + +static inline bool is_overlapped(const int astart, int asize, const int bstart, + int bsize) { + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; +} +static inline bool is_overlapped_widen(const int astart, int asize, + const int bstart, int bsize) { + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + if (astart < bstart && is_overlapped(astart, asize, bstart, bsize) && + !is_overlapped(astart, asize, bstart + bsize, bsize)) { + return false; + } else { + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; + } +} + +#ifdef DEBUG +#define require_align(val, pos) \ + if (!is_aligned(val, pos)) { \ + std::cout << val << " " << pos << std::endl; \ + } \ + CHECK_EQ(is_aligned(val, pos), true) +#else +#define require_align(val, pos) CHECK_EQ(is_aligned(val, pos), true) +#endif + +// RVV +// The following code about RVV was based from: +// https://github.com/riscv/riscv-isa-sim +// Copyright (c) 2010-2017, The Regents of the University of California +// (Regents). All Rights Reserved. + +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the Regents nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. + +// IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, +// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF +// REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +// HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +// MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +template +struct type_usew_t; +template <> +struct type_usew_t<8> { + using type = uint8_t; +}; + +template <> +struct type_usew_t<16> { + using type = uint16_t; +}; + +template <> +struct type_usew_t<32> { + using type = uint32_t; +}; + +template <> +struct type_usew_t<64> { + using type = uint64_t; +}; + +template <> +struct type_usew_t<128> { + using type = __uint128_t; +}; +template +struct type_sew_t; + +template <> +struct type_sew_t<8> { + using type = int8_t; +}; + +template <> +struct type_sew_t<16> { + using type = int16_t; +}; + +template <> +struct type_sew_t<32> { + using type = int32_t; +}; + +template <> +struct type_sew_t<64> { + using type = int64_t; +}; + +template <> +struct type_sew_t<128> { + using type = __int128_t; +}; + +#define VV_PARAMS(x) \ + type_sew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_sew_t::type vs1 = Rvvelt::type>(rvv_vs1_reg(), i); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VV_UPARAMS(x) \ + type_usew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_usew_t::type vs1 = Rvvelt::type>(rvv_vs1_reg(), i); \ + type_usew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VX_PARAMS(x) \ + type_sew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_sew_t::type rs1 = (type_sew_t::type)(get_register(rs1_reg())); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VX_UPARAMS(x) \ + type_usew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_usew_t::type rs1 = (type_usew_t::type)(get_register(rs1_reg())); \ + type_usew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VI_PARAMS(x) \ + type_sew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_sew_t::type simm5 = (type_sew_t::type)(instr_.RvvSimm5()); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VI_UPARAMS(x) \ + type_usew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_usew_t::type uimm5 = (type_usew_t::type)(instr_.RvvUimm5()); \ + type_usew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VN_PARAMS(x) \ + constexpr int half_x = x >> 1; \ + type_sew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_sew_t::type uimm5 = (type_sew_t::type)(instr_.RvvUimm5()); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VN_UPARAMS(x) \ + constexpr int half_x = x >> 1; \ + type_usew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_usew_t::type uimm5 = (type_usew_t::type)(instr_.RvvUimm5()); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VXI_PARAMS(x) \ + type_sew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), i, true); \ + type_sew_t::type vs1 = Rvvelt::type>(rvv_vs1_reg(), i); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); \ + type_sew_t::type rs1 = (type_sew_t::type)(get_register(rs1_reg())); \ + type_sew_t::type simm5 = (type_sew_t::type)(instr_.RvvSimm5()); + +#define VI_XI_SLIDEDOWN_PARAMS(x, off) \ + auto& vd = Rvvelt::type>(rvv_vd_reg(), i, true); \ + auto vs2 = Rvvelt::type>(rvv_vs2_reg(), i + off); + +#define VI_XI_SLIDEUP_PARAMS(x, offset) \ + auto& vd = Rvvelt::type>(rvv_vd_reg(), i, true); \ + auto vs2 = Rvvelt::type>(rvv_vs2_reg(), i - offset); + +/* Vector Integer Extension */ +#define VI_VIE_PARAMS(x, scale) \ + if ((x / scale) < 8) UNREACHABLE(); \ + auto& vd = Rvvelt::type>(rvv_vd_reg(), i, true); \ + auto vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VI_VIE_UPARAMS(x, scale) \ + if ((x / scale) < 8) UNREACHABLE(); \ + auto& vd = Rvvelt::type>(rvv_vd_reg(), i, true); \ + auto vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define require_noover(astart, asize, bstart, bsize) \ + CHECK_EQ(!is_overlapped(astart, asize, bstart, bsize), true) +#define require_noover_widen(astart, asize, bstart, bsize) \ + CHECK_EQ(!is_overlapped_widen(astart, asize, bstart, bsize), true) + +#define RVV_VI_GENERAL_LOOP_BASE \ + for (uint64_t i = rvv_vstart(); i < rvv_vl(); i++) { +#define RVV_VI_LOOP_END \ + set_rvv_vstart(0); \ + } + +#define RVV_VI_MASK_VARS \ + const uint8_t midx = i / 64; \ + const uint8_t mpos = i % 64; + +#define RVV_VI_LOOP_MASK_SKIP(BODY) \ + RVV_VI_MASK_VARS \ + if (instr_.RvvVM() == 0) { \ + bool skip = ((Rvvelt(0, midx) >> mpos) & 0x1) == 0; \ + if (skip) { \ + continue; \ + } \ + } + +#define RVV_VI_VV_LOOP(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VV_PARAMS(8); \ + BODY \ + } else if (rvv_vsew() == E16) { \ + VV_PARAMS(16); \ + BODY \ + } else if (rvv_vsew() == E32) { \ + VV_PARAMS(32); \ + BODY \ + } else if (rvv_vsew() == E64) { \ + VV_PARAMS(64); \ + BODY \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VV_ULOOP(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VV_UPARAMS(8); \ + BODY \ + } else if (rvv_vsew() == E16) { \ + VV_UPARAMS(16); \ + BODY \ + } else if (rvv_vsew() == E32) { \ + VV_UPARAMS(32); \ + BODY \ + } else if (rvv_vsew() == E64) { \ + VV_UPARAMS(64); \ + BODY \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VX_LOOP(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VX_PARAMS(8); \ + BODY \ + } else if (rvv_vsew() == E16) { \ + VX_PARAMS(16); \ + BODY \ + } else if (rvv_vsew() == E32) { \ + VX_PARAMS(32); \ + BODY \ + } else if (rvv_vsew() == E64) { \ + VX_PARAMS(64); \ + BODY \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VX_ULOOP(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VX_UPARAMS(8); \ + BODY \ + } else if (rvv_vsew() == E16) { \ + VX_UPARAMS(16); \ + BODY \ + } else if (rvv_vsew() == E32) { \ + VX_UPARAMS(32); \ + BODY \ + } else if (rvv_vsew() == E64) { \ + VX_UPARAMS(64); \ + BODY \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VI_LOOP(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VI_PARAMS(8); \ + BODY \ + } else if (rvv_vsew() == E16) { \ + VI_PARAMS(16); \ + BODY \ + } else if (rvv_vsew() == E32) { \ + VI_PARAMS(32); \ + BODY \ + } else if (rvv_vsew() == E64) { \ + VI_PARAMS(64); \ + BODY \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VI_ULOOP(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VI_UPARAMS(8); \ + BODY \ + } else if (rvv_vsew() == E16) { \ + VI_UPARAMS(16); \ + BODY \ + } else if (rvv_vsew() == E32) { \ + VI_UPARAMS(32); \ + BODY \ + } else if (rvv_vsew() == E64) { \ + VI_UPARAMS(64); \ + BODY \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +// widen operation loop + +#define VI_WIDE_CHECK_COMMON \ + CHECK_LE(rvv_vflmul(), 4); \ + CHECK_LE(rvv_vsew() * 2, kRvvELEN); \ + require_align(rvv_vd_reg(), rvv_vflmul() * 2); \ + require_vm; + +#define VI_NARROW_CHECK_COMMON \ + CHECK_LE(rvv_vflmul(), 4); \ + CHECK_LE(rvv_vsew() * 2, kRvvELEN); \ + require_align(rvv_vs2_reg(), rvv_vflmul() * 2); \ + require_align(rvv_vd_reg(), rvv_vflmul()); \ + require_vm; + +#define RVV_VI_CHECK_SLIDE(is_over) \ + require_align(rvv_vs2_reg(), rvv_vflmul()); \ + require_align(rvv_vd_reg(), rvv_vflmul()); \ + require_vm; \ + if (is_over) require(rvv_vd_reg() != rvv_vs2_reg()); + +#define RVV_VI_CHECK_DDS(is_rs) \ + VI_WIDE_CHECK_COMMON; \ + require_align(rvv_vs2_reg(), rvv_vflmul() * 2); \ + if (is_rs) { \ + require_align(rvv_vs1_reg(), rvv_vflmul()); \ + if (rvv_vflmul() < 1) { \ + require_noover(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \ + rvv_vflmul()); \ + } else { \ + require_noover_widen(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \ + rvv_vflmul()); \ + } \ + } + +#define RVV_VI_CHECK_DSS(is_vs1) \ + VI_WIDE_CHECK_COMMON; \ + require_align(rvv_vs2_reg(), rvv_vflmul()); \ + if (rvv_vflmul() < 1) { \ + require_noover(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs2_reg(), \ + rvv_vflmul()); \ + } else { \ + require_noover_widen(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs2_reg(), \ + rvv_vflmul()); \ + } \ + if (is_vs1) { \ + require_align(rvv_vs1_reg(), rvv_vflmul()); \ + if (rvv_vflmul() < 1) { \ + require_noover(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \ + rvv_vflmul()); \ + } else { \ + require_noover_widen(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \ + rvv_vflmul()); \ + } \ + } + +#define RVV_VI_CHECK_SDS(is_vs1) \ + VI_NARROW_CHECK_COMMON; \ + if (rvv_vd_reg() != rvv_vs2_reg()) \ + require_noover(rvv_vd_reg(), rvv_vflmul(), rvv_vs2_reg(), \ + rvv_vflmul() * 2); \ + if (is_vs1) require_align(rvv_vs1_reg(), rvv_vflmul()); + +#define RVV_VI_VV_LOOP_WIDEN(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VV_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VV_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VV_PARAMS(32); \ + BODY; \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VX_LOOP_WIDEN(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + if (rvv_vsew() == E8) { \ + VX_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VX_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VX_PARAMS(32); \ + BODY; \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define VI_WIDE_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \ + switch (rvv_vsew()) { \ + case E8: { \ + Rvvelt(rvv_vd_reg(), i, true) = \ + op1((sign##16_t)(sign##8_t)var0 op0(sign##16_t)(sign##8_t) var1) + \ + var2; \ + } break; \ + case E16: { \ + Rvvelt(rvv_vd_reg(), i, true) = \ + op1((sign##32_t)(sign##16_t)var0 op0(sign##32_t)(sign##16_t) var1) + \ + var2; \ + } break; \ + default: { \ + Rvvelt(rvv_vd_reg(), i, true) = \ + op1((sign##64_t)(sign##32_t)var0 op0(sign##64_t)(sign##32_t) var1) + \ + var2; \ + } break; \ + } + +#define VI_WIDE_WVX_OP(var0, op0, sign) \ + switch (rvv_vsew()) { \ + case E8: { \ + sign##16_t & vd_w = Rvvelt(rvv_vd_reg(), i, true); \ + sign##16_t vs2_w = Rvvelt(rvv_vs2_reg(), i); \ + vd_w = vs2_w op0(sign##16_t)(sign##8_t) var0; \ + } break; \ + case E16: { \ + sign##32_t & vd_w = Rvvelt(rvv_vd_reg(), i, true); \ + sign##32_t vs2_w = Rvvelt(rvv_vs2_reg(), i); \ + vd_w = vs2_w op0(sign##32_t)(sign##16_t) var0; \ + } break; \ + default: { \ + sign##64_t & vd_w = Rvvelt(rvv_vd_reg(), i, true); \ + sign##64_t vs2_w = Rvvelt(rvv_vs2_reg(), i); \ + vd_w = vs2_w op0(sign##64_t)(sign##32_t) var0; \ + } break; \ + } + +#define RVV_VI_VVXI_MERGE_LOOP(BODY) \ + RVV_VI_GENERAL_LOOP_BASE \ + if (rvv_vsew() == E8) { \ + VXI_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VXI_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VXI_PARAMS(32); \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VXI_PARAMS(64); \ + BODY; \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define VV_WITH_CARRY_PARAMS(x) \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); \ + type_sew_t::type vs1 = Rvvelt::type>(rvv_vs1_reg(), i); \ + type_sew_t::type& vd = Rvvelt::type>(rvv_vd_reg(), i, true); + +#define XI_WITH_CARRY_PARAMS(x) \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); \ + type_sew_t::type rs1 = (type_sew_t::type)(get_register(rs1_reg())); \ + type_sew_t::type simm5 = (type_sew_t::type)instr_.RvvSimm5(); \ + type_sew_t::type& vd = Rvvelt::type>(rvv_vd_reg(), i, true); + +// carry/borrow bit loop +#define RVV_VI_VV_LOOP_WITH_CARRY(BODY) \ + CHECK_NE(rvv_vd_reg(), 0); \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_MASK_VARS \ + if (rvv_vsew() == E8) { \ + VV_WITH_CARRY_PARAMS(8) \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VV_WITH_CARRY_PARAMS(16) \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VV_WITH_CARRY_PARAMS(32) \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VV_WITH_CARRY_PARAMS(64) \ + BODY; \ + } \ + RVV_VI_LOOP_END + +#define RVV_VI_XI_LOOP_WITH_CARRY(BODY) \ + CHECK_NE(rvv_vd_reg(), 0); \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_MASK_VARS \ + if (rvv_vsew() == E8) { \ + XI_WITH_CARRY_PARAMS(8) \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + XI_WITH_CARRY_PARAMS(16) \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + XI_WITH_CARRY_PARAMS(32) \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + XI_WITH_CARRY_PARAMS(64) \ + BODY; \ + } \ + RVV_VI_LOOP_END + +#define VV_CMP_PARAMS(x) \ + type_sew_t::type vs1 = Rvvelt::type>(rvv_vs1_reg(), i); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VX_CMP_PARAMS(x) \ + type_sew_t::type rs1 = (type_sew_t::type)(get_register(rs1_reg())); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VI_CMP_PARAMS(x) \ + type_sew_t::type simm5 = (type_sew_t::type)instr_.RvvSimm5(); \ + type_sew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VV_UCMP_PARAMS(x) \ + type_usew_t::type vs1 = Rvvelt::type>(rvv_vs1_reg(), i); \ + type_usew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VX_UCMP_PARAMS(x) \ + type_usew_t::type rs1 = \ + (type_sew_t::type)(get_register(rvv_vs1_reg())); \ + type_usew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define VI_UCMP_PARAMS(x) \ + type_usew_t::type uimm5 = (type_usew_t::type)instr_.RvvUimm5(); \ + type_usew_t::type vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define float32_t float +#define float64_t double + +#define RVV_VI_LOOP_CMP_BASE \ + CHECK(rvv_vsew() >= E8 && rvv_vsew() <= E64); \ + for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) { \ + RVV_VI_LOOP_MASK_SKIP(); \ + uint64_t mmask = uint64_t(1) << mpos; \ + uint64_t& vdi = Rvvelt(rvv_vd_reg(), midx, true); \ + uint64_t res = 0; + +#define RVV_VI_LOOP_CMP_END \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + } \ + rvv_trace_vd(); \ + set_rvv_vstart(0); + +// comparision result to masking register +#define RVV_VI_VV_LOOP_CMP(BODY) \ + RVV_VI_LOOP_CMP_BASE \ + if (rvv_vsew() == E8) { \ + VV_CMP_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VV_CMP_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VV_CMP_PARAMS(32); \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VV_CMP_PARAMS(64); \ + BODY; \ + } \ + RVV_VI_LOOP_CMP_END + +#define RVV_VI_VX_LOOP_CMP(BODY) \ + RVV_VI_LOOP_CMP_BASE \ + if (rvv_vsew() == E8) { \ + VX_CMP_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VX_CMP_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VX_CMP_PARAMS(32); \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VX_CMP_PARAMS(64); \ + BODY; \ + } \ + RVV_VI_LOOP_CMP_END + +#define RVV_VI_VI_LOOP_CMP(BODY) \ + RVV_VI_LOOP_CMP_BASE \ + if (rvv_vsew() == E8) { \ + VI_CMP_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VI_CMP_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VI_CMP_PARAMS(32); \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VI_CMP_PARAMS(64); \ + BODY; \ + } \ + RVV_VI_LOOP_CMP_END + +#define RVV_VI_VV_ULOOP_CMP(BODY) \ + RVV_VI_LOOP_CMP_BASE \ + if (rvv_vsew() == E8) { \ + VV_UCMP_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VV_UCMP_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VV_UCMP_PARAMS(32); \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VV_UCMP_PARAMS(64); \ + BODY; \ + } \ + RVV_VI_LOOP_CMP_END + +#define RVV_VI_VX_ULOOP_CMP(BODY) \ + RVV_VI_LOOP_CMP_BASE \ + if (rvv_vsew() == E8) { \ + VX_UCMP_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VX_UCMP_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VX_UCMP_PARAMS(32); \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VX_UCMP_PARAMS(64); \ + BODY; \ + } \ + RVV_VI_LOOP_CMP_END + +#define RVV_VI_VI_ULOOP_CMP(BODY) \ + RVV_VI_LOOP_CMP_BASE \ + if (rvv_vsew() == E8) { \ + VI_UCMP_PARAMS(8); \ + BODY; \ + } else if (rvv_vsew() == E16) { \ + VI_UCMP_PARAMS(16); \ + BODY; \ + } else if (rvv_vsew() == E32) { \ + VI_UCMP_PARAMS(32); \ + BODY; \ + } else if (rvv_vsew() == E64) { \ + VI_UCMP_PARAMS(64); \ + BODY; \ + } \ + RVV_VI_LOOP_CMP_END + +#define RVV_VI_VFP_LOOP_BASE \ + for (uint64_t i = rvv_vstart(); i < rvv_vl(); ++i) { \ + RVV_VI_LOOP_MASK_SKIP(); + +#define RVV_VI_VFP_LOOP_END \ + } \ + set_rvv_vstart(0); + +#define RVV_VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \ + RVV_VI_VFP_LOOP_BASE \ + switch (rvv_vsew()) { \ + case E16: { \ + UNIMPLEMENTED(); \ + } \ + case E32: { \ + float& vd = Rvvelt(rvv_vd_reg(), i, true); \ + float fs1 = get_fpu_register_float(rs1_reg()); \ + float vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY32; \ + break; \ + } \ + case E64: { \ + double& vd = Rvvelt(rvv_vd_reg(), i, true); \ + double fs1 = get_fpu_register_double(rs1_reg()); \ + double vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY64; \ + break; \ + } \ + default: \ + UNREACHABLE(); \ + break; \ + } \ + RVV_VI_VFP_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \ + RVV_VI_VFP_LOOP_BASE \ + switch (rvv_vsew()) { \ + case E16: { \ + UNIMPLEMENTED(); \ + break; \ + } \ + case E32: { \ + float& vd = Rvvelt(rvv_vd_reg(), i, true); \ + float vs1 = Rvvelt(rvv_vs1_reg(), i); \ + float vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY32; \ + break; \ + } \ + case E64: { \ + double& vd = Rvvelt(rvv_vd_reg(), i, true); \ + double vs1 = Rvvelt(rvv_vs1_reg(), i); \ + double vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY64; \ + break; \ + } \ + default: \ + require(0); \ + break; \ + } \ + RVV_VI_VFP_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VFP_FMA(type, _f1, _f2, _a) \ + auto fn = [](type f1, type f2, type a) { return std::fma(f1, f2, a); }; \ + vd = CanonicalizeFPUOpFMA(fn, _f1, _f2, _a); + +#define RVV_VI_VFP_FMA_VV_LOOP(BODY32, BODY64) \ + RVV_VI_VFP_LOOP_BASE \ + switch (rvv_vsew()) { \ + case E16: { \ + UNIMPLEMENTED(); \ + } \ + case E32: { \ + float& vd = Rvvelt(rvv_vd_reg(), i, true); \ + float vs1 = Rvvelt(rvv_vs1_reg(), i); \ + float vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY32; \ + break; \ + } \ + case E64: { \ + double& vd = Rvvelt(rvv_vd_reg(), i, true); \ + double vs1 = Rvvelt(rvv_vs1_reg(), i); \ + double vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY64; \ + break; \ + } \ + default: \ + require(0); \ + break; \ + } \ + RVV_VI_VFP_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VFP_FMA_VF_LOOP(BODY32, BODY64) \ + RVV_VI_VFP_LOOP_BASE \ + switch (rvv_vsew()) { \ + case E16: { \ + UNIMPLEMENTED(); \ + } \ + case E32: { \ + float& vd = Rvvelt(rvv_vd_reg(), i, true); \ + float fs1 = get_fpu_register_float(rs1_reg()); \ + float vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY32; \ + break; \ + } \ + case E64: { \ + double& vd = Rvvelt(rvv_vd_reg(), i, true); \ + float fs1 = get_fpu_register_float(rs1_reg()); \ + double vs2 = Rvvelt(rvv_vs2_reg(), i); \ + BODY64; \ + break; \ + } \ + default: \ + require(0); \ + break; \ + } \ + RVV_VI_VFP_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VFP_LOOP_CMP_BASE \ + for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) { \ + RVV_VI_LOOP_MASK_SKIP(); \ + uint64_t mmask = uint64_t(1) << mpos; \ + uint64_t& vdi = Rvvelt(rvv_vd_reg(), midx, true); \ + uint64_t res = 0; + +#define RVV_VI_VFP_LOOP_CMP_END \ + switch (rvv_vsew()) { \ + case E16: \ + case E32: \ + case E64: { \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + break; \ + } \ + default: \ + UNREACHABLE(); \ + break; \ + } \ + } \ + set_rvv_vstart(0); \ + rvv_trace_vd(); + +#define RVV_VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \ + RVV_VI_VFP_LOOP_CMP_BASE \ + switch (rvv_vsew()) { \ + case E16: { \ + UNIMPLEMENTED(); \ + } \ + case E32: { \ + float vs2 = Rvvelt(rvv_vs2_reg(), i); \ + float vs1 = Rvvelt(rvv_vs1_reg(), i); \ + BODY32; \ + break; \ + } \ + case E64: { \ + double vs2 = Rvvelt(rvv_vs2_reg(), i); \ + double vs1 = Rvvelt(rvv_vs1_reg(), i); \ + BODY64; \ + break; \ + } \ + default: \ + UNREACHABLE(); \ + break; \ + } \ + RVV_VI_VFP_LOOP_CMP_END + +// reduction loop - signed +#define RVV_VI_LOOP_REDUCTION_BASE(x) \ + auto& vd_0_des = Rvvelt::type>(rvv_vd_reg(), 0, true); \ + auto vd_0_res = Rvvelt::type>(rvv_vs1_reg(), 0); \ + for (uint64_t i = rvv_vstart(); i < rvv_vl(); ++i) { \ + RVV_VI_LOOP_MASK_SKIP(); \ + auto vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define RVV_VI_LOOP_REDUCTION_END(x) \ + } \ + if (rvv_vl() > 0) { \ + vd_0_des = vd_0_res; \ + } \ + set_rvv_vstart(0); + +#define REDUCTION_LOOP(x, BODY) \ + RVV_VI_LOOP_REDUCTION_BASE(x) \ + BODY; \ + RVV_VI_LOOP_REDUCTION_END(x) + +#define RVV_VI_VV_LOOP_REDUCTION(BODY) \ + if (rvv_vsew() == E8) { \ + REDUCTION_LOOP(8, BODY) \ + } else if (rvv_vsew() == E16) { \ + REDUCTION_LOOP(16, BODY) \ + } else if (rvv_vsew() == E32) { \ + REDUCTION_LOOP(32, BODY) \ + } else if (rvv_vsew() == E64) { \ + REDUCTION_LOOP(64, BODY) \ + } \ + rvv_trace_vd(); + +#define VI_VFP_LOOP_REDUCTION_BASE(width) \ + float##width##_t vd_0 = Rvvelt(rvv_vd_reg(), 0); \ + float##width##_t vs1_0 = Rvvelt(rvv_vs1_reg(), 0); \ + vd_0 = vs1_0; \ + /*bool is_active = false;*/ \ + for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) { \ + RVV_VI_LOOP_MASK_SKIP(); \ + float##width##_t vs2 = Rvvelt(rvv_vs2_reg(), i); \ + /*is_active = true;*/ + +#define VI_VFP_LOOP_REDUCTION_END(x) \ + } \ + set_rvv_vstart(0); \ + if (rvv_vl() > 0) { \ + Rvvelt::type>(rvv_vd_reg(), 0, true) = vd_0; \ + } + +#define RVV_VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \ + if (rvv_vsew() == E16) { \ + UNIMPLEMENTED(); \ + } else if (rvv_vsew() == E32) { \ + VI_VFP_LOOP_REDUCTION_BASE(32) \ + BODY32; \ + VI_VFP_LOOP_REDUCTION_END(32) \ + } else if (rvv_vsew() == E64) { \ + VI_VFP_LOOP_REDUCTION_BASE(64) \ + BODY64; \ + VI_VFP_LOOP_REDUCTION_END(64) \ + } \ + rvv_trace_vd(); + +// reduction loop - unsgied +#define RVV_VI_ULOOP_REDUCTION_BASE(x) \ + auto& vd_0_des = Rvvelt::type>(rvv_vd_reg(), 0, true); \ + auto vd_0_res = Rvvelt::type>(rvv_vs1_reg(), 0); \ + for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) { \ + RVV_VI_LOOP_MASK_SKIP(); \ + auto vs2 = Rvvelt::type>(rvv_vs2_reg(), i); + +#define REDUCTION_ULOOP(x, BODY) \ + RVV_VI_ULOOP_REDUCTION_BASE(x) \ + BODY; \ + RVV_VI_LOOP_REDUCTION_END(x) + +#define RVV_VI_VV_ULOOP_REDUCTION(BODY) \ + if (rvv_vsew() == E8) { \ + REDUCTION_ULOOP(8, BODY) \ + } else if (rvv_vsew() == E16) { \ + REDUCTION_ULOOP(16, BODY) \ + } else if (rvv_vsew() == E32) { \ + REDUCTION_ULOOP(32, BODY) \ + } else if (rvv_vsew() == E64) { \ + REDUCTION_ULOOP(64, BODY) \ + } \ + rvv_trace_vd(); + +#define VI_STRIP(inx) reg_t vreg_inx = inx; + +#define VI_ELEMENT_SKIP(inx) \ + if (inx >= vl) { \ + continue; \ + } else if (inx < rvv_vstart()) { \ + continue; \ + } else { \ + RVV_VI_LOOP_MASK_SKIP(); \ + } + +#define require_vm \ + do { \ + if (instr_.RvvVM() == 0) CHECK_NE(rvv_vd_reg(), 0); \ + } while (0); + +#define VI_CHECK_STORE(elt_width, is_mask_ldst) \ + reg_t veew = is_mask_ldst ? 1 : sizeof(elt_width##_t) * 8; +// float vemul = is_mask_ldst ? 1 : ((float)veew / rvv_vsew() * Rvvvflmul); +// reg_t emul = vemul < 1 ? 1 : vemul; +// require(vemul >= 0.125 && vemul <= 8); +// require_align(rvv_rd(), vemul); +// require((nf * emul) <= (NVPR / 4) && (rvv_rd() + nf * emul) <= NVPR); + +#define VI_CHECK_LOAD(elt_width, is_mask_ldst) \ + VI_CHECK_STORE(elt_width, is_mask_ldst); \ + require_vm; + +/*vd + fn * emul*/ +#define RVV_VI_LD(stride, offset, elt_width, is_mask_ldst) \ + const reg_t nf = rvv_nf() + 1; \ + const reg_t vl = is_mask_ldst ? ((rvv_vl() + 7) / 8) : rvv_vl(); \ + const int64_t baseAddr = rs1(); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + set_rvv_vstart(i); \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + auto val = ReadMem( \ + baseAddr + (stride) + (offset) * sizeof(elt_width##_t), \ + instr_.instr()); \ + type_sew_t::type& vd = \ + Rvvelt::type>(rvv_vd_reg(), \ + vreg_inx, true); \ + vd = val; \ + } \ + } \ + set_rvv_vstart(0); \ + if (::v8::internal::FLAG_trace_sim) { \ + __int128_t value = Vregister_[rvv_vd_reg()]; \ + SNPrintF(trace_buf_, "%016" PRIx64 "%016" PRIx64 " <-- 0x%016" PRIx64, \ + *(reinterpret_cast(&value) + 1), \ + *reinterpret_cast(&value), \ + (uint64_t)(get_register(rs1_reg()))); \ + } + +#define RVV_VI_ST(stride, offset, elt_width, is_mask_ldst) \ + const reg_t nf = rvv_nf() + 1; \ + const reg_t vl = is_mask_ldst ? ((rvv_vl() + 7) / 8) : rvv_vl(); \ + const int64_t baseAddr = rs1(); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_STRIP(i) \ + VI_ELEMENT_SKIP(i); \ + set_rvv_vstart(i); \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + elt_width##_t vs1 = Rvvelt::type>( \ + rvv_vs3_reg(), vreg_inx); \ + WriteMem(baseAddr + (stride) + (offset) * sizeof(elt_width##_t), vs1, \ + instr_.instr()); \ + } \ + } \ + set_rvv_vstart(0); \ + if (::v8::internal::FLAG_trace_sim) { \ + __int128_t value = Vregister_[rvv_vd_reg()]; \ + SNPrintF(trace_buf_, "%016" PRIx64 "%016" PRIx64 " --> 0x%016" PRIx64, \ + *(reinterpret_cast(&value) + 1), \ + *reinterpret_cast(&value), \ + (uint64_t)(get_register(rs1_reg()))); \ + } + +#define VI_VFP_LOOP_SCALE_BASE \ + /*require(STATE.frm < 0x5);*/ \ + for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) { \ + RVV_VI_LOOP_MASK_SKIP(); + +#define RVV_VI_VFP_CVT_SCALE(BODY8, BODY16, BODY32, CHECK8, CHECK16, CHECK32, \ + is_widen, eew_check) \ + if (is_widen) { \ + RVV_VI_CHECK_DSS(false); \ + } else { \ + RVV_VI_CHECK_SDS(false); \ + } \ + CHECK(eew_check); \ + switch (rvv_vsew()) { \ + case E8: { \ + CHECK8 \ + VI_VFP_LOOP_SCALE_BASE \ + BODY8 /*set_fp_exceptions*/; \ + RVV_VI_VFP_LOOP_END \ + } break; \ + case E16: { \ + CHECK16 \ + VI_VFP_LOOP_SCALE_BASE \ + BODY16 /*set_fp_exceptions*/; \ + RVV_VI_VFP_LOOP_END \ + } break; \ + case E32: { \ + CHECK32 \ + VI_VFP_LOOP_SCALE_BASE \ + BODY32 /*set_fp_exceptions*/; \ + RVV_VI_VFP_LOOP_END \ + } break; \ + default: \ + require(0); \ + break; \ + } \ + rvv_trace_vd(); + +// calculate the value of r used in rounding +static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) { + uint8_t d = v8::internal::unsigned_bitextract_64(shift, shift, v); + uint8_t d1; + uint64_t D1, D2; + + if (shift == 0 || shift > 64) { + return 0; + } + + d1 = v8::internal::unsigned_bitextract_64(shift - 1, shift - 1, v); + D1 = v8::internal::unsigned_bitextract_64(shift - 1, 0, v); + if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ + return d1; + } else if (vxrm == 1) { /* round-to-nearest-even */ + if (shift > 1) { + D2 = v8::internal::unsigned_bitextract_64(shift - 2, 0, v); + return d1 & ((D2 != 0) | d); + } else { + return d1 & d; + } + } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ + return !d & (D1 != 0); + } + return 0; /* round-down (truncate) */ +} + +template +inline Dst signed_saturation(Src v, uint n) { + Dst smax = (Dst)(INT64_MAX >> (64 - n)); + Dst smin = (Dst)(INT64_MIN >> (64 - n)); + return (v > smax) ? smax : ((v < smin) ? smin : (Dst)v); +} + +template +inline Dst unsigned_saturation(Src v, uint n) { + Dst umax = (Dst)(UINT64_MAX >> (64 - n)); + return (v > umax) ? umax : ((v < 0) ? 0 : (Dst)v); +} + +#define RVV_VN_CLIPU_VI_LOOP() \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VN_UPARAMS(16); \ + vd = unsigned_saturation( \ + (static_cast(vs2) >> uimm5) + \ + get_round(static_cast(rvv_vxrm()), vs2, uimm5), \ + 8); \ + } else if (rvv_vsew() == E16) { \ + VN_UPARAMS(32); \ + vd = unsigned_saturation( \ + (static_cast(vs2) >> uimm5) + \ + get_round(static_cast(rvv_vxrm()), vs2, uimm5), \ + 16); \ + } else if (rvv_vsew() == E32) { \ + VN_UPARAMS(64); \ + vd = unsigned_saturation( \ + (static_cast(vs2) >> uimm5) + \ + get_round(static_cast(rvv_vxrm()), vs2, uimm5), \ + 32); \ + } else if (rvv_vsew() == E64) { \ + UNREACHABLE(); \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VN_CLIP_VI_LOOP() \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E8) { \ + VN_PARAMS(16); \ + vd = signed_saturation( \ + (vs2 >> uimm5) + get_round(static_cast(rvv_vxrm()), vs2, uimm5), \ + 8); \ + } else if (rvv_vsew() == E16) { \ + VN_PARAMS(32); \ + vd = signed_saturation( \ + (vs2 >> uimm5) + get_round(static_cast(rvv_vxrm()), vs2, uimm5), \ + 16); \ + } else if (rvv_vsew() == E32) { \ + VN_PARAMS(64); \ + vd = signed_saturation( \ + (vs2 >> uimm5) + get_round(static_cast(rvv_vxrm()), vs2, uimm5), \ + 32); \ + } else if (rvv_vsew() == E64) { \ + UNREACHABLE(); \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define CHECK_EXT(div) \ + CHECK_NE(rvv_vd_reg(), rvv_vs2_reg()); \ + reg_t from = rvv_vsew() / div; \ + CHECK(from >= E8 && from <= E64); \ + CHECK_GE((float)rvv_vflmul() / div, 0.125); \ + CHECK_LE((float)rvv_vflmul() / div, 8); \ + require_align(rvv_vd_reg(), rvv_vflmul()); \ + require_align(rvv_vs2_reg(), rvv_vflmul() / div); \ + if ((rvv_vflmul() / div) < 1) { \ + require_noover(rvv_vd_reg(), rvv_vflmul(), rvv_vs2_reg(), \ + rvv_vflmul() / div); \ + } else { \ + require_noover_widen(rvv_vd_reg(), rvv_vflmul(), rvv_vs2_reg(), \ + rvv_vflmul() / div); \ + } + +#define RVV_VI_VIE_8_LOOP(signed) \ + CHECK_EXT(8) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E64) { \ + if (signed) { \ + VI_VIE_PARAMS(64, 8); \ + vd = static_cast(vs2); \ + } else { \ + VI_VIE_UPARAMS(64, 8); \ + vd = static_cast(vs2); \ + } \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VIE_4_LOOP(signed) \ + CHECK_EXT(4) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E32) { \ + if (signed) { \ + VI_VIE_PARAMS(32, 4); \ + vd = static_cast(vs2); \ + } else { \ + VI_VIE_UPARAMS(32, 4); \ + vd = static_cast(vs2); \ + } \ + } else if (rvv_vsew() == E64) { \ + if (signed) { \ + VI_VIE_PARAMS(64, 4); \ + vd = static_cast(vs2); \ + } else { \ + VI_VIE_UPARAMS(64, 4); \ + vd = static_cast(vs2); \ + } \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); + +#define RVV_VI_VIE_2_LOOP(signed) \ + CHECK_EXT(2) \ + RVV_VI_GENERAL_LOOP_BASE \ + RVV_VI_LOOP_MASK_SKIP() \ + if (rvv_vsew() == E16) { \ + if (signed) { \ + VI_VIE_PARAMS(16, 2); \ + vd = static_cast(vs2); \ + } else { \ + VI_VIE_UPARAMS(16, 2); \ + vd = static_cast(vs2); \ + } \ + } else if (rvv_vsew() == E32) { \ + if (signed) { \ + VI_VIE_PARAMS(32, 2); \ + vd = static_cast(vs2); \ + } else { \ + VI_VIE_UPARAMS(32, 2); \ + vd = static_cast(vs2); \ + } \ + } else if (rvv_vsew() == E64) { \ + if (signed) { \ + VI_VIE_PARAMS(64, 2); \ + vd = static_cast(vs2); \ + } else { \ + VI_VIE_UPARAMS(64, 2); \ + vd = static_cast(vs2); \ + } \ + } else { \ + UNREACHABLE(); \ + } \ + RVV_VI_LOOP_END \ + rvv_trace_vd(); namespace v8 { namespace internal { diff --git a/deps/v8/src/execution/riscv64/simulator-riscv64.h b/deps/v8/src/execution/riscv64/simulator-riscv64.h index 2fa40cea4e9d15..c812e2665b39c6 100644 --- a/deps/v8/src/execution/riscv64/simulator-riscv64.h +++ b/deps/v8/src/execution/riscv64/simulator-riscv64.h @@ -550,6 +550,116 @@ class Simulator : public SimulatorBase { } } + inline void rvv_trace_vd() { + if (::v8::internal::FLAG_trace_sim) { + __int128_t value = Vregister_[rvv_vd_reg()]; + SNPrintF(trace_buf_, "%016" PRIx64 "%016" PRIx64 " (%" PRId64 ")", + *(reinterpret_cast(&value) + 1), + *reinterpret_cast(&value), icount_); + } + } + + inline void rvv_trace_vs1() { + if (::v8::internal::FLAG_trace_sim) { + PrintF("\t%s:0x%016" PRIx64 "%016" PRIx64 "\n", + v8::internal::VRegisters::Name(static_cast(rvv_vs1_reg())), + (uint64_t)(get_vregister(static_cast(rvv_vs1_reg())) >> 64), + (uint64_t)get_vregister(static_cast(rvv_vs1_reg()))); + } + } + + inline void rvv_trace_vs2() { + if (::v8::internal::FLAG_trace_sim) { + PrintF("\t%s:0x%016" PRIx64 "%016" PRIx64 "\n", + v8::internal::VRegisters::Name(static_cast(rvv_vs2_reg())), + (uint64_t)(get_vregister(static_cast(rvv_vs2_reg())) >> 64), + (uint64_t)get_vregister(static_cast(rvv_vs2_reg()))); + } + } + inline void rvv_trace_v0() { + if (::v8::internal::FLAG_trace_sim) { + PrintF("\t%s:0x%016" PRIx64 "%016" PRIx64 "\n", + v8::internal::VRegisters::Name(v0), + (uint64_t)(get_vregister(v0) >> 64), (uint64_t)get_vregister(v0)); + } + } + + inline void rvv_trace_rs1() { + if (::v8::internal::FLAG_trace_sim) { + PrintF("\t%s:0x%016" PRIx64 "\n", + v8::internal::Registers::Name(static_cast(rs1_reg())), + (uint64_t)(get_register(rs1_reg()))); + } + } + + inline void rvv_trace_status() { + if (::v8::internal::FLAG_trace_sim) { + int i = 0; + for (; i < trace_buf_.length(); i++) { + if (trace_buf_[i] == '\0') break; + } + SNPrintF(trace_buf_.SubVector(i, trace_buf_.length()), + " sew:%s lmul:%s vstart:%lu vl:%lu", rvv_sew_s(), rvv_lmul_s(), + rvv_vstart(), rvv_vl()); + } + } + + template + T& Rvvelt(reg_t vReg, uint64_t n, bool is_write = false) { + CHECK_NE(rvv_sew(), 0); + CHECK_GT((rvv_vlen() >> 3) / sizeof(T), 0); + reg_t elts_per_reg = (rvv_vlen() >> 3) / (sizeof(T)); + vReg += n / elts_per_reg; + n = n % elts_per_reg; + T* regStart = reinterpret_cast(reinterpret_cast(Vregister_) + + vReg * (rvv_vlen() >> 3)); + return regStart[n]; + } + + inline int32_t rvv_vs1_reg() { return instr_.Vs1Value(); } + inline reg_t rvv_vs1() { UNIMPLEMENTED(); } + inline int32_t rvv_vs2_reg() { return instr_.Vs2Value(); } + inline reg_t rvv_vs2() { UNIMPLEMENTED(); } + inline int32_t rvv_vd_reg() { return instr_.VdValue(); } + inline int32_t rvv_vs3_reg() { return instr_.VdValue(); } + inline reg_t rvv_vd() { UNIMPLEMENTED(); } + inline int32_t rvv_nf() { + return (instr_.InstructionBits() & kRvvNfMask) >> kRvvNfShift; + } + + inline void set_vrd() { UNIMPLEMENTED(); } + + inline void set_rvv_vtype(uint64_t value, bool trace = true) { + vtype_ = value; + } + inline void set_rvv_vl(uint64_t value, bool trace = true) { vl_ = value; } + inline void set_rvv_vstart(uint64_t value, bool trace = true) { + vstart_ = value; + } + inline void set_rvv_vxsat(uint64_t value, bool trace = true) { + vxsat_ = value; + } + inline void set_rvv_vxrm(uint64_t value, bool trace = true) { vxrm_ = value; } + inline void set_rvv_vcsr(uint64_t value, bool trace = true) { vcsr_ = value; } + inline void set_rvv_vlenb(uint64_t value, bool trace = true) { + vlenb_ = value; + } + + template + inline T CanonicalizeFPUOpFMA(Func fn, T dst, T src1, T src2) { + STATIC_ASSERT(std::is_floating_point::value); + auto alu_out = fn(dst, src1, src2); + // if any input or result is NaN, the result is quiet_NaN + if (std::isnan(alu_out) || std::isnan(src1) || std::isnan(src2) || + std::isnan(dst)) { + // signaling_nan sets kInvalidOperation bit + if (isSnan(alu_out) || isSnan(src1) || isSnan(src2) || isSnan(dst)) + set_fflags(kInvalidOperation); + alu_out = std::numeric_limits::quiet_NaN(); + } + return alu_out; + } + template inline T CanonicalizeFPUOp3(Func fn) { DCHECK(std::is_floating_point::value);