#include<cstdio>
#include<cstring>
#include<algorithm>
#include<iostream>
#include<queue>
#include<cmath>
#include<map>
#include<stack>
#include<set>
#include<bitset>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> pii;
#define ls (rt<<1)
#define rs (rt<<1|1)
#define mid (l+r>>1)
#define pb(x) push_back(x)
#define cls(x, val) memset(x, val, sizeof(x))
#define fi first
#define se second
#define mp(x, y) make_pair(x, y)
#define lowbit(x) (x&(-x))
#define inc(i, l, r) for(int i=l; i<=r; i++)
#define dec(i, r, l) for(int i=r; i>=l; i--)
const int inf = 0x3f3f3f3f;
const int maxn = 4000+10;
const double pi = acos(-1.0);
const double eps = 1e-7;
const int mod = 1e9+7;
int readint()
{
int x=0,f=1;char ch=getchar();
while(ch<'0'||ch>'9'){if(ch=='-')f=-1;ch=getchar();}
while(ch>='0'&&ch<='9'){x=x*10+ch-'0';ch=getchar();}
return x*f;
}
ll readll(){
ll x=0,f=1;char ch=getchar();
while(ch<'0'||ch>'9'){if(ch=='-')f=-1;ch=getchar();}
while(ch>='0'&&ch<='9'){x=x*10+ch-'0';ch=getchar();}
return x*f;
}
int n, k;
int unhappy[maxn][maxn];
int cost[maxn][maxn];
int pre_sum[maxn];
int dp[maxn][maxn];
void pre(){
inc(i, 1, n){
cls(pre_sum, 0);
inc(j, 1, n){
cost[i][j] = cost[i-1][j]+pre_sum[j-1]+unhappy[i][j];
pre_sum[j] = pre_sum[j-1]+unhappy[i][j];
}
}
}
int cal(int i, int j){
return cost[j][j]+cost[i-1][i-1]-cost[i-1][j]-cost[j][i-1];
}
int decision[maxn][maxn];
void solve(){
cls(dp, 0x3f);
inc(i, 1, n) dp[1][i] = cal(1, i), decision[1][i] = 1;
inc(i, 2, k){
decision[i][n+1] = n;
for(int j=n; j>0; j--){
for(int l=decision[i-1][j]; l<=decision[i][j+1]; l++){
if(dp[i-1][l]+cal(l+1, j)<dp[i][j]){
dp[i][j] = dp[i-1][l] + cal(l+1, j);
decision[i][j] = l;
}
}
}
}
printf("%d\n", dp[k][n]>>1);
}
int main()
{
n=readint(), k=readint();
inc(i ,1, n) inc(j, 1, n) unhappy[i][j] = readint();
pre();
solve();
return 0;
}