This is just a solution for this problem and a code share for this comment. Binary lifting is much simpler to implement but at that time I was still exploring the DFS and its similar variants.
Here, 2-pass dfs strongly connected components algorithm is done to allow processing the queries in particular order to allow for $$$O(n + m)$$$ where $$$n$$$ is the number of nodes and $$$m$$$ the number of queries, this is a bit better than $$$O((n + m)\log k )$$$ but recursive implementation is computationally expensive (Python solution in 3.12 will pass due to efficient cframes usage but currently CSES uses older versions).
#include <bits/stdc++.h>
using namespace std;
#define N 200001
vector<bool> V;
vector<int> G[N], T[N], L;
vector<vector<int>> S;
vector<pair<int, int>> Q[N];
int A[N];
void t(int x, vector<int> g[], vector<int> &r) {
V[x]= 0;
for(auto u: g[x]) {
if(V[u]) t(u, g, r);
}
r.push_back(x);
}
void solve(int I, vector<pair<int, int>> &cs) {
V[I]= 0;
vector<int> s= S[I];
if(s.size() > 1 || G[s[0]][0] == s[0]) {
for(int i= 0; i < s.size(); i++)
for(auto [q, k]: Q[s[i]]) { A[q]= s[(i + k) % s.size()]; }
} else {
int x= s[0];
for(auto [q, k]: Q[x]) {
if(k == 0) A[q]= x;
else {
int p= min(k, (int)cs.size());
auto [c, i]= cs[cs.size() - p];
k-= p;
A[q]= S[c][(i + k) % S[c].size()];
}
}
}
for(int j= 0; j < s.size(); j++) {
int x= s[j];
cs.emplace_back(I, j);
for(auto u: T[x]) {
if(V[L[u]]) solve(L[u], cs);
}
cs.pop_back();
}
}
int main() {
ios::sync_with_stdio(0), cin.tie(0);
int n, q;
cin >> n >> q;
for(int i= 0; i < n; ++i) {
int b;
cin >> b;
G[i].push_back(b - 1);
T[b - 1].push_back(i);
}
vector<int> O;
V.assign(n, 1);
for(int i= 0; i < n; i++) {
if(V[i]) t(i, G, O);
}
reverse(O.begin(), O.end());
V.assign(n, 1);
L.assign(n, 0);
for(auto i: O) {
if(V[i]) {
t(i, T, S.emplace_back());
for(auto x: S.back()) { L[x]= S.size() - 1; }
}
}
for(int i= 0; i < q; i++) {
int x, k;
cin >> x >> k;
Q[x - 1].emplace_back(i, k);
}
V.assign(n, 1);
for(int i= S.size() - 1; i >= 0; i--) {
if(V[i]) {
vector<pair<int, int>> cs;
solve(i, cs);
}
}
for(int i= 0; i < q; i++) { cout << A[i] + 1 << '\n'; }
}