The first time you cut NTT, feel that NTT is the model version of the FFT, specifically can see http://blog.csdn.net/acdreamers/article/details/39026505 speak very clearly
And then the coefficients can be preprocessed out, the time can be reduced a lot, but I am the template for the brevity or in the thinking of coefficients.
Said so much nonsense, the following we get to the point: the topic of the simplest DP can be set to do DP[I][J]: The first i-bit product mod m for the program number J. Since the multiplication can not be transferred directly, so we find the root of M, the number of each to M of the original root of the x power so that can be transferred directly. We find that the complexity of this transfer is n*m^2, because the m^2 part can be regarded as a polynomial multiplication, can be simplified into MLOGM with NTT for n we can find that n shifts are the same so can be obtained with a fast power. Then the problem is solved smoothly.
#include <iostream>
#include <cstring>
#include <algorithm>
#include <cstdio>
#include <cmath>
using namespace std;
#define maxn 200000
#define mod 1004535809
#define ll long long
int quickpow(int a,int b,int c)
{
int res=1;
for(;b;b>>=1)
{
if(b&1) res=(1LL*res*a)%c;
a=(1LL*a*a)%c;
}
return res;
}
int n,invn,nn,R[maxn],p,root,x,m;
void NTT(int *a,int f)
{
int id=0;
for(int i=1;i<nn;i++) if(i<R[i]) swap(a[i],a[R[i]]);
for(int i=1;i<nn;i<<=1)
{
id++;
int wn=quickpow(3,f==1 ? (mod-1)/(1<<id) : mod-1-(mod-1)/(1<<id),mod);
for(int j=0;j<nn;j+=(i<<1))
{
int w=1;
for(int k=0;k<i;k++)
{
int x=a[j+k],y=(1LL*w*a[j+k+i])%mod;
a[j+k]=(1LL*(x+y))%mod;
a[j+k+i]=((1LL*(x-y))%mod+mod)%mod;
w=(1LL*w*wn)%mod;
}
}
}
if(f==-1)
{
for(int i=0;i<nn;i++) a[i]=(1LL*a[i]*invn)%mod;
}
}
int T[maxn],mm;
int aa[maxn],bb[maxn],cc[maxn],dd[maxn],pos[maxn];
inline bool judge(int x, int p)
{
for (int i = 2; i * i <= p; i ++)
if ((p - 1)%i==0&&quickpow(x,(p-1)/i,p)==1) return 0;
return 1;
}
inline int Find_Root(int p)
{
if (p == 2) return 1;
int res = 2;
for (; !judge(res, p); res ++) ;
return res;
}
int num[maxn];
void init()
{
scanf("%d%d%d%d",&n,&p,&x,&m);
for(int i=1;i<=m;i++)
scanf("%d",&T[i]);
mm=p*2-2;
int l=0;
for(nn=1;nn<=mm;nn<<=1)l++;
for(int i=1;i<nn;i++)
R[i]=(R[i>>1]>>1)|((i&1)<<(l-1));
invn=quickpow(nn,mod-2,mod);
root = Find_Root(p);
for (int i=0;i<p-1;i++)
{
num[i]=!i ? 1 : num[i-1]*root%p;
pos[num[i]] = i;
}
}
void mul(int *ret,int *num1,int *num2)
{
for (int i = 0; i < nn; i ++)
cc[i] = num1[i],dd[i]=num2[i];
NTT(cc,1);NTT(dd,1);
for(int i=0;i<nn;i++)
{
ret[i]=(1LL*cc[i]*dd[i])%mod;
}
NTT(ret,-1);
for (int i=nn-1;i>=p-1;i--)
{
ret[i-p+1]=1LL*(ret[i-p+1]+ret[i])%mod;
ret[i] = 0;
}
}
void quickpow2(int *a,int b)
{
aa[0]=1;
for(;b;b>>=1)
{
if(b&1)
{
mul(aa,a,aa);
}
mul(a,a,a);
}
}
void solve()
{
for (int i=1;i<=m;i++)
{
if(T[i] == 0) continue ;
bb[pos[T[i]]]++;
}
quickpow2(bb,n);
int ans=aa[pos[x]];
printf("%d\n",ans);
}
int main()
{
init();
solve();
return 0;
}