codeforces最慢的点跑了3904ms
https://codeforces.com/contest/321/submission/298673572
#pragma GCC optimize(2)
#include<bits/stdc++.h>
using namespace std;
const int N=4010,M=810;
int a[N][N];
long long sum[N][N];
long long w[N][N];
long long f[N][M];
int g[N][M];
int n,k;
int read(){
char ch=getchar();
while(!isdigit(ch)) ch=getchar();
return ch^48;
}
int main(){
scanf("%d%d",&n,&k);
for(int i=1;i<=n;i++){
for(int j=1;j<=n;j++) a[i][j]=read();
}
for(int j=1;j<=n;j++){
for(int i=1;i<=n;i++) sum[i][j]=sum[i-1][j]+a[i][j];
}
for(int i=1;i<=n;i++){
w[i][i]=0;
for(int j=i+1;j<=n;j++){
w[i][j]=w[i][j-1]+sum[j][j]-sum[i-1][j];
}
}
memset(f,0x3f,sizeof f);
f[0][0]=0;
for(int j=1;j<=k;j++){
g[n+1][j]=n;
for(int i=n;i>=1;i--){
for(int k=g[i][j-1];k<=g[i+1][j];k++){
if(f[k][j-1]+w[k+1][i]<f[i][j]){
f[i][j]=f[k][j-1]+w[k+1][i];
g[i][j]=k;
}
}
}
}
printf("%lld\n",f[n][k]);
return 0;
}