64位乘以元素的元素,M256I_I64,而长长长长的元素
union sseUnion
{
int64_t position[4];
btSimdFloat4 mVec256;
};
// vector operator * : multiply element by element
__m256i mul64_haswell_mul(__m256i a, __m256i b) {
// instruction does not exist. Split into 32-bit multiplies
__m256i bswap = _mm256_shuffle_epi32(b, 0xB1); // swap H<->L
__m256i prodlh = _mm256_mullo_epi32(a, bswap); // 32 bit L*H products
__m256i zero = _mm256_setzero_si256(); // 0
__m256i prodlh2 = _mm256_hadd_epi32(prodlh, zero); // a0Lb0H+a0Hb0L,a1Lb1H+a1Hb1L,0,0
__m256i prodlh3 = _mm256_shuffle_epi32(prodlh2, 0x73); // 0, a0Lb0H+a0Hb0L, 0, a1Lb1H+a1Hb1L
__m256i prodll = _mm256_mul_epu32(a, b); // a0Lb0L,a1Lb1L, 64 bit unsigned products
__m256i prod = _mm256_add_epi64(prodll, prodlh3); // a0Lb0L+(a0Lb0H+a0Hb0L)<<32, a1Lb1L+(a1Lb1H+a1Hb1L)<<32
return prod;
}
int main()
{
sseUnion _sseUnion;
_sseUnion.mVec256 = _mm256_set_epi64x(1000000, 1000000, 1000000, 1000000);
sseUnion a2;
a2.mVec256 = _mm256_setr_epi64x(401000000, 401000000, 401000000, 401000000);
a2.mVec256 = _mm256_add_epi64(_sseUnion.mVec256, a2.mVec256);
a2.mVec256 = mul64_haswell_mul(_sseUnion.mVec256, a2.mVec256);
a2.mVec256 = mul64_haswell_mul(_sseUnion.mVec256, a2.mVec256);
printf("%d", a2.mVec256.m256i_i64[0]);
}
a2.position [0-4]
虽然比int64_t maxvalue大,而且我得到了错误的值,因为它的实际值是1461837445209416064。我只想将其更改为INT64_T MAXVALUE,我可以为此做什么?
union sseUnion
{
int64_t position[4];
btSimdFloat4 mVec256;
};
// vector operator * : multiply element by element
__m256i mul64_haswell_mul(__m256i a, __m256i b) {
// instruction does not exist. Split into 32-bit multiplies
__m256i bswap = _mm256_shuffle_epi32(b, 0xB1); // swap H<->L
__m256i prodlh = _mm256_mullo_epi32(a, bswap); // 32 bit L*H products
__m256i zero = _mm256_setzero_si256(); // 0
__m256i prodlh2 = _mm256_hadd_epi32(prodlh, zero); // a0Lb0H+a0Hb0L,a1Lb1H+a1Hb1L,0,0
__m256i prodlh3 = _mm256_shuffle_epi32(prodlh2, 0x73); // 0, a0Lb0H+a0Hb0L, 0, a1Lb1H+a1Hb1L
__m256i prodll = _mm256_mul_epu32(a, b); // a0Lb0L,a1Lb1L, 64 bit unsigned products
__m256i prod = _mm256_add_epi64(prodll, prodlh3); // a0Lb0L+(a0Lb0H+a0Hb0L)<<32, a1Lb1L+(a1Lb1H+a1Hb1L)<<32
return prod;
}
int main()
{
sseUnion _sseUnion;
_sseUnion.mVec256 = _mm256_set_epi64x(1000000, 1000000, 1000000, 1000000);
sseUnion a2;
a2.mVec256 = _mm256_setr_epi64x(401000000, 401000000, 401000000, 401000000);
a2.mVec256 = _mm256_add_epi64(_sseUnion.mVec256, a2.mVec256);
a2.mVec256 = mul64_haswell_mul(_sseUnion.mVec256, a2.mVec256);
a2.mVec256 = mul64_haswell_mul(_sseUnion.mVec256, a2.mVec256);
printf("%d", a2.mVec256.m256i_i64[0]);
}
a2.position[0-4]
while bigger than int64_t maxValue, and I get a wrong value, because it's real value is 14618374452099416064. I just wanna change it to int64_t maxValue, what can I do for it?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
我以这种方式得到了正确的结果。
and i get right result by this way.