如题,下面是我自己写的 AC 代码:
#pragma GCC target("avx2")
#include <iostream>
#include <immintrin.h>
using namespace std;
using i256 = __m256i;
const int N = 1.25e5 + 5;
int n, q;
i256 a[N];
void Add (int l, int r, int x) {
for (; (l & 7) && l <= r; ++l) {
((int*)&a)[l] += x;
}
if (l > r) return;
for (; ((r & 7) != 7) && l <= r; --r) {
((int*)&a)[r] += x;
}
if (l > r) return;
l >>= 3, r >>= 3;
i256 x0 = _mm256_set1_epi32(x);
for (; l <= r; ++l) {
a[l] = _mm256_add_epi32(a[l], x0);
}
}
int Query (int l, int r, int x) {
int res = 0;
for (; (l & 7) && l <= r; ++l) {
res += ((int*)&a)[l] >= x;
}
if (l > r) return res;
for (; ((r & 7) != 7) && l <= r; --r) {
res += ((int*)&a)[r] >= x;
}
if (l > r) return res;
l >>= 3, r >>= 3;
i256 x0 = _mm256_set1_epi32(x - 1), s = _mm256_set1_epi32(0);
for (; l <= r; ++l) {
s = _mm256_add_epi32(s, _mm256_cmpgt_epi32(a[l], x0) & 0x100000001);
}
for (int i = 0; i < 8; ++i) {
// res += ((int*)&s)[i];
res += (s[i] & 0xffffffff) + (s[i] >> 32);
}
return res;
}
int main () {
cin.tie(0)->sync_with_stdio(0);
cin >> n >> q;
for (int i = 0; i < n; ++i) {
cin >> ((int*)&a)[i];
}
for (char o; q--; ) {
int l, r, x;
cin >> o >> l >> r >> x, --l, --r;
if (o == 'M') {
Add(l, r, x);
}
else if (o == 'A') {
cout << Query(l, r, x) << '\n';
}
}
return 0;
}
将 Query 函数的倒数第 4 行换成注释部分,也就是将
res += (s[i] & 0xffffffff) + (s[i] >> 32);
改成
res += ((int*)&s)[i];
后 WA 了前 8 个点,问一下为什么,我觉得是等价的啊。