#include<bits/stdc++.h>
using namespace std;
#define re register
#define rint re int
#define ll long long
#define rll re ll
#define db double
#define rdb re db
#define rch re char
#define un unsigned
#define ull un ll
#define rull re ull
#define ldb long db
#define rldb re ldb
const int N=1048576;
char buf[N],*p1,*p2;
#define getchar() (p1==p2?(p2=buf+fread(p1=buf,1,N,stdin),p1==p2?EOF:*p1++):*p1++)
int read()
{
rch ch=getchar();rint ans=0;
for(;!isdigit(ch);ch=getchar());
for(;isdigit(ch);ch=getchar())
ans=ans*10+(ch&15);
return ans;
}
const ll mod=4179340454199820289;
const int gmod=3;
const int maxn=5e5+10;
inline ll Mul(rll x,rll y){return (x*y-(ull)((ldb)x/mod*y)*mod+mod)%mod;}
inline ll Plus(rll x,rll y){return x+=y,x>=mod?x-mod:x;}
inline ll Minus(rll x,rll y){return x-=y,x<0?x+mod:x;}
inline ll mypow(rll x,rll y)
{
rll ans=1;
for(;y;y>>=1,x=Mul(x,x))
if(y&1) ans=Mul(ans,x);
return ans;
}
inline int mypow(rint x,rint y,rint mod)
{
rint ans=1;
for(;y;y>>=1,x=1ll*x*x%mod)
if(y&1) ans=1ll*ans*x%mod;
return ans;
}
ll W[N];int R[N];
void ntt(rll a[],rint lim,rint type)
{
for(rint i=0;i<lim;i++) if(R[i]>i) swap(a[i],a[R[i]]);
for(rint d=1;d<lim;d<<=1)
for(rint i=0;i<lim;i+=d<<1)
for(rint j=i;j<i+d;j++)
{
rll tmp=Mul(a[j+d],W[j+d-i]);
a[j+d]=Minus(a[j],tmp);
a[j]=Plus(a[j],tmp);
}
if(~type) return;
reverse(a+1,a+lim);
rll inv=mypow(lim,mod-2);
for(rint i=0;i<lim;i++) a[i]=Mul(a[i],inv);
}
vector<int> vec;
void get_vec(rint n)
{
rint tmp=n;
vec.clear();
rint sq=sqrt(n+0.5);
for(rint i=2;i<=sq;i++)
if(n%i==0)
{
while(n%i==0) n/=i;
vec.push_back(tmp/i);
}
if(n>1) vec.push_back(tmp/n);
}
int check(rint n,rint p)
{
for(rint i=0;i<vec.size();i++)
if(mypow(n,vec[i],p)==1) return 0;
return 1;
}
int get_rt(rint p)
{
rint phi=p-1;
get_vec(phi);
for(rint i=2;i<p;i++)
if(mypow(i,phi,p)==1&&check(i,p))
return i;
return 1;
}
int a[maxn];
int b[maxn];
int w[maxn];
ll A[N],B[N];
int M;
void Blue(rint a[],rint n,rint lim,rint type)
{
for(rint i=0;i<n;i++) A[i]=1ll*a[i]*w[n-1ll*i*(i-1)/2%n]%M;
for(rint i=0;i<n*2;i++) B[i]=w[1ll*(n*2-i-1)*(n*2-i-2)/2%n];
memset(A+n,0,lim-n<<3),memset(B+n*2,0,lim-n*2<<3);
ntt(A,lim,1),ntt(B,lim,1);
for(rint i=0;i<lim;i++) A[i]=Mul(A[i],B[i]);
ntt(A,lim,-1);
for(rint i=0;i<n;i++) a[i]=A[n*2-i-1]%M*w[n-1ll*i*(i-1)/2%n]%M;
if(~type) return;
reverse(a+1,a+n);
rint inv=mypow(n,M-2,M);
for(rint i=0;i<n;i++) a[i]=1ll*a[i]*inv%M;
}
int main()
{
rint n=read(),C=read()%n;M=n+1;
for(rint i=0;i<n;i++) a[i]=read()%M;
for(rint i=0;i<n;i++) b[i]=read()%M;
rint lim=1;while(lim<n*2) lim<<=1;
for(rint i=0;i<lim;i++) R[i]=R[i>>1]>>1|(i&1?lim>>1:0);
W[lim/2]=1,W[lim/2+1]=mypow(gmod,(mod-1)/lim);
for(rint i=lim/2+2;i<lim;i++) W[i]=Mul(W[i-1],W[lim/2+1]);
for(rint i=lim/2-1;i;i--) W[i]=W[i<<1];
w[0]=w[n]=1,w[1]=get_rt(M);
for(rint i=2;i<n;i++) w[i]=1ll*w[i-1]*w[1]%M;
Blue(a,n,lim,1),Blue(b,n,lim,1);
for(rint i=0;i<n;i++) a[i]=1ll*a[i]*mypow(b[i],C,M)%M;
Blue(a,n,lim,-1);
for(rint i=0;i<n;i++) printf("%d\n",a[i]);
return 0;
}
优化:
1.因为这题值域很小,最多5e53所以可以用1e18的模数+快速乘代替mtt,这里我用的是29∗257+1
2.显然这题的长度可以不用开到n∗3,n∗2即可,因为后n个循环卷积到前面对结果没有影响
3.ntt单位根的处理(这就是你ntt板子的事了)
希望大家不要抢我最优解
说句闲话,学习Bluestein's Algotithm的最好办法就是...
A了这道题
祝你们好运