| #include <stdio.h> |
| #include <string.h> |
| #include <string> |
| #include <xbyak/xbyak.h> |
| #include <xbyak/xbyak_util.h> |
| #include <cybozu/inttype.hpp> |
| #include <cybozu/test.hpp> |
| #include <algorithm> |
| |
| using namespace Xbyak; |
| |
| CYBOZU_TEST_AUTO(ymm_with_sae) |
| { |
| struct Code : Xbyak::CodeGenerator { |
| Code() |
| { |
| vaddpd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vaddph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vaddps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vcmppd(k1, ymm2, ymm3 |T_sae, 3); |
| vcmpph(k1, ymm2, ymm3 |T_sae, 3); |
| vcmpps(k1, ymm2, ymm3 |T_sae, 3); |
| vcvtdq2ph(xmm1, ymm2 |T_rn_sae); |
| vcvtdq2ps(ymm1, ymm2 |T_rn_sae); |
| vcvtpd2dq(xmm1, ymm2 |T_rn_sae); |
| vcvtpd2ph(xmm1, ymm2 |T_rn_sae); |
| vcvtpd2ps(xmm1, ymm2 |T_rn_sae); |
| vcvtpd2qq(ymm1, ymm2 |T_rn_sae); |
| vcvtpd2udq(xmm1, ymm2 |T_rn_sae); |
| vcvtpd2uqq(ymm1, ymm2 |T_rn_sae); |
| vcvtph2dq(ymm1, xmm2 |T_rn_sae); |
| vcvtph2pd(ymm1, xmm2 |T_sae); |
| vcvtph2ps(ymm1, xmm2 |T_sae); |
| vcvtph2psx(ymm1, xmm2 |T_sae); |
| vcvtph2qq(ymm1, xmm2 |T_rn_sae); |
| vcvtph2udq(ymm1, xmm2 |T_rn_sae); |
| vcvtph2uqq(ymm1, xmm2 |T_rn_sae); |
| vcvtph2uw(ymm1, ymm2 |T_rn_sae); |
| vcvtph2w(ymm1, ymm2 |T_rn_sae); |
| vcvtps2dq(ymm1, ymm2 |T_rn_sae); |
| vcvtps2pd(ymm1, xmm2 |T_sae); |
| vcvtps2ph(xmm1, ymm2 |T_sae, 3); |
| vcvtps2phx(xmm1, ymm2 |T_rn_sae); |
| vcvtps2qq(ymm1, xmm2 |T_rn_sae); |
| vcvtps2udq(ymm1, ymm2 |T_rn_sae); |
| vcvtps2uqq(ymm1, xmm2 |T_rn_sae); |
| vcvtqq2pd(ymm1, ymm2 |T_rn_sae); |
| vcvtqq2ph(xmm1, ymm2 |T_rn_sae); |
| vcvtqq2ps(xmm1, ymm2 |T_rn_sae); |
| vcvttpd2dq(xmm1, ymm2 |T_sae); |
| vcvttpd2qq(ymm1, ymm2 |T_sae); |
| vcvttpd2udq(xmm1, ymm2 |T_sae); |
| vcvttpd2uqq(ymm1, ymm2 |T_sae); |
| vcvttph2dq(ymm1, xmm2 |T_sae); |
| vcvttph2qq(ymm1, xmm2 |T_sae); |
| vcvttph2udq(ymm1, xmm2 |T_sae); |
| vcvttph2uqq(ymm1, xmm2 |T_sae); |
| vcvttph2uw(ymm1, ymm2 |T_sae); |
| vcvttph2w(ymm1, ymm2 |T_sae); |
| vcvttps2dq(ymm1, ymm2 |T_sae); |
| vcvttps2qq(ymm1, xmm2 |T_sae); |
| vcvttps2udq(ymm1, ymm2 |T_sae); |
| vcvttps2uqq(ymm1, xmm2 |T_sae); |
| vcvtudq2ph(xmm1, ymm2 |T_rn_sae); |
| vcvtudq2ps(ymm1, ymm2 |T_rn_sae); |
| vcvtuqq2pd(ymm1, ymm2 |T_rn_sae); |
| vcvtuqq2ph(xmm1, ymm2 |T_rn_sae); |
| vcvtuqq2ps(xmm1, ymm2 |T_rn_sae); |
| vcvtuw2ph(ymm1, ymm2 |T_rn_sae); |
| vcvtw2ph(ymm1, ymm2 |T_rn_sae); |
| vdivpd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vdivph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vdivps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfcmaddcph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfcmulcph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfixupimmpd(ymm1, ymm2, ymm3 |T_sae, 3); |
| vfixupimmps(ymm1, ymm2, ymm3 |T_sae, 3); |
| vfmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddcph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmaddsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmsubadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfmulcph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vfnmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vgetexppd(ymm1, ymm2 |T_sae); |
| vgetexpph(ymm1, ymm2 |T_sae); |
| vgetexpps(ymm1, ymm2 |T_sae); |
| vgetmantpd(ymm1, ymm2 |T_sae, 3); |
| vgetmantph(ymm1, ymm2 |T_sae, 3); |
| vgetmantps(ymm1, ymm2 |T_sae, 3); |
| vmaxpd(ymm1, ymm2, ymm3 |T_sae); |
| vmaxph(ymm1, ymm2, ymm3 |T_sae); |
| vmaxps(ymm1, ymm2, ymm3 |T_sae); |
| vminpd(ymm1, ymm2, ymm3 |T_sae); |
| vminph(ymm1, ymm2, ymm3 |T_sae); |
| vminps(ymm1, ymm2, ymm3 |T_sae); |
| vmulpd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vmulph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vmulps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vrangepd(ymm1, ymm2, ymm3 |T_sae, 3); |
| vrangeps(ymm1, ymm2, ymm3 |T_sae, 3); |
| vreducepd(ymm1, ymm2 |T_sae, 3); |
| vreduceph(ymm1, ymm2 |T_sae, 3); |
| vreduceps(ymm1, ymm2 |T_sae, 3); |
| vrndscalepd(ymm1, ymm2 |T_sae, 3); |
| vrndscaleph(ymm1, ymm2 |T_sae, 3); |
| vrndscaleps(ymm1, ymm2 |T_sae, 3); |
| vscalefpd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vscalefph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vscalefps(ymm1, ymm2, ymm3 |T_rn_sae); |
| vsqrtpd(ymm1, ymm2 |T_rn_sae); |
| vsqrtph(ymm1, ymm2 |T_rn_sae); |
| vsqrtps(ymm1, ymm2 |T_rn_sae); |
| vsubpd(ymm1, ymm2, ymm3 |T_rn_sae); |
| vsubph(ymm1, ymm2, ymm3 |T_rn_sae); |
| vsubps(ymm1, ymm2, ymm3 |T_rn_sae); |
| } |
| } c; |
| const uint8_t tbl[] = { |
| 0x62, 0xf1, 0xe9, 0x18, 0x58, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x58, 0xcb, 0x62, 0xf1, 0x68, 0x18, |
| 0x58, 0xcb, 0x62, 0xf1, 0xe9, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf3, 0x68, 0x18, 0xc2, 0xcb, 0x03, |
| 0x62, 0xf1, 0x68, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf5, 0x78, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78, |
| 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0xe6, 0xca, 0x62, 0xf5, 0xf9, 0x18, 0x5a, 0xca, 0x62, |
| 0xf1, 0xf9, 0x18, 0x5a, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x79, |
| 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x78, |
| 0x18, 0x5a, 0xca, 0x62, 0xf2, 0x79, 0x18, 0x13, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x13, 0xca, 0x62, |
| 0xf5, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x79, |
| 0xca, 0x62, 0xf5, 0x78, 0x18, 0x7d, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0x79, |
| 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x5a, 0xca, 0x62, 0xf3, 0x79, 0x18, 0x1d, 0xd1, 0x03, |
| 0x62, 0xf5, 0x79, 0x18, 0x1d, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0x78, 0x18, |
| 0x79, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x79, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0xe6, 0xca, 0x62, 0xf5, |
| 0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0xe6, 0xca, |
| 0x62, 0xf1, 0xf9, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x78, 0xca, 0x62, 0xf1, 0xf9, 0x18, |
| 0x78, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7a, 0xca, 0x62, 0xf5, |
| 0x78, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x7c, 0xca, |
| 0x62, 0xf5, 0x79, 0x18, 0x7c, 0xca, 0x62, 0xf1, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x79, 0x18, |
| 0x7a, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x78, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5, |
| 0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0x7a, 0xca, |
| 0x62, 0xf5, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf5, 0x7b, 0x18, |
| 0x7d, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0xe9, 0x18, 0x5e, 0xcb, 0x62, 0xf5, |
| 0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf6, 0x6b, 0x18, 0x56, 0xcb, |
| 0x62, 0xf6, 0x6b, 0x18, 0xd6, 0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x54, 0xcb, 0x03, 0x62, 0xf3, 0x69, |
| 0x18, 0x54, 0xcb, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x98, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x98, 0xcb, |
| 0x62, 0xf2, 0x69, 0x18, 0x98, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa8, 0xcb, 0x62, 0xf6, 0x69, 0x18, |
| 0xa8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa8, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb8, 0xcb, 0x62, 0xf6, |
| 0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0x56, 0xcb, |
| 0x62, 0xf2, 0xe9, 0x18, 0x96, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x96, 0xcb, 0x62, 0xf2, 0x69, 0x18, |
| 0x96, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2, |
| 0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb6, 0xcb, |
| 0x62, 0xf2, 0x69, 0x18, 0xb6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9a, 0xcb, 0x62, 0xf6, 0x69, 0x18, |
| 0x9a, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9a, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xaa, 0xcb, 0x62, 0xf6, |
| 0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xba, 0xcb, |
| 0x62, 0xf6, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0xe9, 0x18, |
| 0x97, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2, |
| 0xe9, 0x18, 0xa7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa7, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa7, 0xcb, |
| 0x62, 0xf2, 0xe9, 0x18, 0xb7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb7, 0xcb, 0x62, 0xf2, 0x69, 0x18, |
| 0xb7, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0xd6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9c, 0xcb, 0x62, 0xf6, |
| 0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xac, 0xcb, |
| 0x62, 0xf6, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0xe9, 0x18, |
| 0xbc, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2, |
| 0xe9, 0x18, 0x9e, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x9e, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9e, 0xcb, |
| 0x62, 0xf2, 0xe9, 0x18, 0xae, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xae, 0xcb, 0x62, 0xf2, 0x69, 0x18, |
| 0xae, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xbe, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2, |
| 0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2, 0xf9, 0x18, 0x42, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x42, 0xca, |
| 0x62, 0xf2, 0x79, 0x18, 0x42, 0xca, 0x62, 0xf3, 0xf9, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x78, |
| 0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf1, 0xe9, 0x18, 0x5f, |
| 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0xe9, |
| 0x18, 0x5d, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5d, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5d, 0xcb, 0x62, |
| 0xf1, 0xe9, 0x18, 0x59, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x59, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x59, |
| 0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x50, 0xcb, 0x03, 0x62, 0xf3, 0x69, 0x18, 0x50, 0xcb, 0x03, 0x62, |
| 0xf3, 0xf9, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x79, |
| 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0xf9, 0x18, 0x09, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x08, |
| 0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x08, 0xca, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x2c, 0xcb, 0x62, |
| 0xf6, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf1, 0xf9, 0x18, 0x51, |
| 0xca, 0x62, 0xf5, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0xe9, |
| 0x18, 0x5c, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5c, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5c, 0xcb, |
| }; |
| const size_t n = sizeof(tbl) / sizeof(tbl[0]); |
| CYBOZU_TEST_EQUAL(c.getSize(), n); |
| CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); |
| } |
| |
| CYBOZU_TEST_AUTO(vmpsadbw) |
| { |
| struct Code : Xbyak::CodeGenerator { |
| Code() |
| { |
| setDefaultEncodingAVX10(); |
| vmpsadbw(xm1, xm3, xm15, 3); // vex(avx) |
| vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2) |
| setDefaultEncodingAVX10(AVX10v2Encoding); |
| vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2) |
| vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2) |
| } |
| } c; |
| const uint8_t tbl[] = { |
| 0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03, |
| 0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03, |
| 0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03, |
| 0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03, |
| }; |
| const size_t n = sizeof(tbl) / sizeof(tbl[0]); |
| CYBOZU_TEST_EQUAL(c.getSize(), n); |
| CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); |
| } |