I used O(N * K^2 * P) but gives tle. My code:
#include <bits/stdc++.h>
using namespace std;
#define fo(i, n) for (int i = 0; i < (int)(n); ++i)
#define fr(i, n) for (int i = 1; i <= (int)(n); ++i)
#define ro(i, a, b) for (int i = (a); i <= (int)(b); ++i)
const int MOD = (int)1e9 + 7;
const int maxn = 1e2 + 5;
const int maxm = 3e2 + 5;
int N, K, P, dp[maxn][maxm][maxn];
int main() {
// ios::sync_with_stdio(false), cin.tie(0);
memset(dp, 0, sizeof dp);
fr(i, 100) // iterate on N
fr(j, 300) // iterate on K
fo(k, 101){ // iterate on P
int &ans = dp[i][j][k];
if(i == 1)ans = (k ? 0 : j);
else{
ro(l, 1, j){
// put l at ith place and <l at places behind.
if(l > 1)ans = (ans + dp[i - 1][l - 1][k - 1]) % MOD;
// put l at (i - 1)th place and any of 1..l at ith place.
ans = (ans + 1ll * (dp[i - 1][l][k] - dp[i - 1][l - 1][k]) * l) % MOD;
}
}
}
int t; scanf("%d", &t);
while(t--){
scanf("%d%d%d", &N, &K, &P);
printf("%d\n", dp[N][K][P]);
}
}