I've seen CF tutorials for many other sections of CSES but didn't see one for strings, so I thought of writing one. Constructive criticism (or just regular criticism) is always welcome!
Note that there are many ways to do a problem. For instance, linear time string searching can be done with hashing, KMP, Z-Algorithm, Aho-Corasick Automaton, etc. . I used the way that was most intuitive for me, but there might exist other ways that have shorter code.
1. Word Combinations
This problem reduces to knapsack if there exists a way to quickly match multiple strings. Precomputing the hashes is too slow ($$$\mathcal{O}(nk)$$$), so a faster method is needed. Aho-Corasick automaton suffices.
#include <bits/stdc++.h>
using namespace std;
#define PB push_back
const int MOD = 1e9 + 7;
string S;
int K, I = 1, DP[5005];
vector<int> adj[500005];
struct node {
int fail, ch[26] = {};
vector<int> lens;
string s = "";
} T[500005];
void insert(string s) {
int x = 1;
for (int i = 0; i < s.size(); i++) {
if (T[x].ch[s[i] - 'a'] == 0)
T[x].ch[s[i] - 'a'] = ++I;
T[T[x].ch[s[i] - 'a']].s = T[x].s + s[i];
x = T[x].ch[s[i] - 'a'];
}
T[x].lens.PB(s.size());
}
// initializes lens
void dfs(int u) {
T[u].lens.insert(T[u].lens.end(), T[T[u].fail].lens.begin(), T[T[u].fail].lens.end());
for (int v : adj[u])
dfs(v);
}
void build() {
queue<int> Q;
int x = 1;
T[1].fail = 1;
for (int i = 0; i < 26; i++) {
if (T[x].ch[i])
T[T[x].ch[i]].fail = x, Q.push(T[x].ch[i]);
else
T[x].ch[i] = 1;
}
while (!Q.empty()) {
x = Q.front(); Q.pop();
for (int i = 0; i < 26; i++) {
if (T[x].ch[i])
T[T[x].ch[i]].fail = T[T[x].fail].ch[i], Q.push(T[x].ch[i]);
else
T[x].ch[i] = T[T[x].fail].ch[i];
}
}
for (int i = 2; i <= I; i++)
adj[T[i].fail].PB(i);
dfs(1);
}
int run(string s) {
DP[0] = 1;
for (int i = 1, x = 1; i < s.size(); i++) {
x = T[x].ch[s[i] - 'a'];
for (int l : T[x].lens)
if (l <= i)
DP[i] = (DP[i] + DP[i - l]) % MOD;
}
return DP[s.size() - 1];
}
int main() {
cin >> S >> K;
for (int i = 0; i < K; i++) {
string s; cin >> s;
insert(s);
}
build();
cout << run(" " + S) << '\n';
}
2. String Matching
There are many ways to solve this problem, including but not limited to: Knuth-Morris-Pratt Algorithm, Z-Algorithm, Rabin-Karp Algorithm, and Suffix Tree/Automaton.
The code below uses Z-Algorithm.
#include <bits/stdc++.h>
using namespace std;
string T, P, S;
int Z[2000005];
int main() {
ios_base::sync_with_stdio(0); cin.tie(0);
cin >> T >> P;
S = P + "$" + T;
int n = (int)S.size();
for (int i = 1, l = 0, r = 0; i < n; i++) {
if (i <= r)
Z[i] = min(Z[i - l], r - i + 1);
while (i + Z[i] < n && S[Z[i]] == S[i + Z[i]])
Z[i]++;
if (i + Z[i] - 1 > r)
l = i, r = i + Z[i] - 1;
}
int ans = 0;
for (int i = P.size() + 1; i < S.size(); i++)
if (Z[i] == P.size())
ans++;
cout << ans << '\n';
}
3. Finding Borders
This problem can be done with hashing. We will hash the entire string, and for each prefix we check if the corresponding suffix has the same hash value
#include <bits/stdc++.h>
using namespace std;
const ll R = 9973, MOD = 99999989;
string S;
ll H[1000005], P = 1;
int main() {
cin >> S;
for (int i = 0; i < S.size(); i++)
H[i] = ((i == 0 ? 0 : H[i - 1]) * R + (ll)S[i]) % MOD;
for (int k = 1; k < (int)S.size(); k++) {
P = (P * R) % MOD;
ll suf = (H[S.size() - 1] - (P * H[S.size() - k - 1]) % MOD + MOD) % MOD;
if (H[k - 1] == suf)
cout << k << ' ';
}
cout << '\n';
}
4. Finding Periods
We can use a modified KMP/Z-Algorithm to calculate the prefix/z-function. Then it suffices for each index $$$i \in [0, n)$$$ to check if $$$i + Z_i \geq n$$$.
#include <bits/stdc++.h>
using namespace std;
string S;
int Z[1000005];
int main() {
cin >> S;
int n = (int)S.size();
for (int i = 1, l = 0, r = 0; i < n; i++) {
if (i <= r)
Z[i] = min(Z[i - l], r - i + 1);
while (i + Z[i] < n && S[Z[i]] == S[i + Z[i]])
Z[i]++;
if (i + Z[i] - 1 > r)
l = i, r = i + Z[i] - 1;
}
for (int i = 0; i < n; i++)
if (i + Z[i] >= n)
cout << i << ' ';
cout << n << '\n';
}
5. Minimal Rotation
Multiple solution methods can be found here. Interestingly enough, $$$\mathcal{O}(n \log n)$$$ suffix array seems to TLE, but I have not tried a linear suffix array method yet.
def least_rotation(S: str) -> int:
"""Booth's algorithm."""
S += S # Concatenate string to it self to avoid modular arithmetic
f = [-1] * len(S) # Failure function
k = 0 # Least rotation of string found so far
for j in range(1, len(S)):
sj = S[j]
i = f[j - k - 1]
while i != -1 and sj != S[k + i + 1]:
if sj < S[k + i + 1]:
k = j - i - 1
i = f[i]
if sj != S[k + i + 1]: # if sj != S[k+i+1], then i == -1
if sj < S[k]: # k+i+1 = k
k = j
f[j - k] = -1
else:
f[j - k] = i + 1
return k
S = input("")
i = least_rotation(S)
print(S[i:] + S[:i])
6. Longest Palindrome
This is a textbook application of Manacher's Algorithm.
Interestingly enough, a shorter code can be attained by simply inserting a special character between two adjacent indicies (so "baacba" would become "b#a#a#c#b#a") then running the odd case of Manacher's Algorithm.
#include <bits/stdc++.h>
using namespace std;
string S;
int d1[1000005], d2[1000005];
int main() {
cin >> S;
int n = S.size();
for (int i = 0, l = 0, r = -1; i < n; i++) {
int k = (i > r) ? 1 : min(d1[l + r - i], r - i + 1);
while (0 <= i - k && i + k < n && S[i - k] == S[i + k]) {
k++;
}
d1[i] = k--;
if (i + k > r) {
l = i - k;
r = i + k;
}
}
for (int i = 0, l = 0, r = -1; i < n; i++) {
int k = (i > r) ? 0 : min(d2[l + r - i + 1], r - i + 1);
while (0 <= i - k - 1 && i + k < n && S[i - k - 1] == S[i + k]) {
k++;
}
d2[i] = k--;
if (i + k > r) {
l = i - k - 1;
r = i + k ;
}
}
int ans = 0, ind = -1;
for (int i = 0; i < n; i++) {
if (d1[i] * 2 - 1 > ans)
ans = d1[i] * 2 - 1, ind = i;
if (d2[i] * 2 > ans)
ans = d2[i] * 2, ind = i;
}
if (ans % 2 == 1)
cout << S.substr(ind - ans / 2, ans);
else
cout << S.substr(ind - ans / 2, ans);
}
7. Required Substring
8. Palindrome Queries
9. Finding Patterns
10. Counting Patterns
11. Pattern Positions
12. Distinct Substring
13. Repeating Substring
14. String Functions
15. Substring Order I
Using suffix automata, the solution becomes quite straightforward. An in-depth look can be found here.
~~~~~
~~~~~
16. Substring Order II
17. Substring Distribution
Let us build a suffix automata over the string $$$S$$$. The length of a path from the initial state $$$s_0$$$ corresponds to the length of a string. By definition, all substrings of the initial string are uniquely encoded. It suffices to count the number of different ways to get to any given node for all nodes in the suffix automaton.
We can take advantage of the fact that each node corresponds to exactly one set of substrings, and that each substring in a node is a suffix of another (except for the largest substring). Thus, for each node, there exists some $$$l, r$$$ such that it is always possible to reach it in $$$d \in [l, r]$$$ steps and impossible if $$$d \notin [l, r]$$$. Because $$$r$$$ has already been conveniently computed when initializing the suffix automaton, we simply have to compute $$$l$$$ for each node, which is just the shortest distance, It suffices to perform a BFS on the suffix automaton
#include <bits/stdc++.h>
using namespace std;
struct SuffixAuto {
struct State {
int len, link;
int next[26];
State(int _len = 0, int _link = -1) : len(_len), link(_link) {
memset(next, -1, sizeof(next));
}
};
vector<State> states;
int last;
SuffixAuto() {}
SuffixAuto(const string &S) {
init(S);
}
inline int state(int len = 0, int link = -1) {
states.emplace_back(len, link);
return states.size() - 1;
}
void init(const string &S) {
states.reserve(2 * S.size());
last = state();
for (char c : S)
extend(c);
}
void extend(char _c) {
int c = _c - 'a'; // change for different alphabet
int cur = state(states[last].len + 1), P = last;
while (P != -1 && states[P].next[c] == -1) {
states[P].next[c] = cur;
P = states[P].link;
}
if (P == -1)
states[cur].link = 0;
else {
int Q = states[P].next[c];
if (states[P].len + 1 == states[Q].len)
states[cur].link = Q;
else {
int C = state(states[P].len + 1, states[Q].link);
copy(states[Q].next, states[Q].next + 26, states[C].next);
while (P != -1 && states[P].next[c] == Q) {
states[P].next[c] = C;
P = states[P].link;
}
states[Q].link = states[cur].link = C;
}
}
last = cur;
}
void debug(int u = 0, string s = "") {
cout << "state " << u << " = " << s << " ->";
for (int i = 0; i < 26; i++)
if (states[u].next[i] != -1)
cout << ' ' << states[u].next[i];
cout << '\n';
for (int i = 0; i < 26; i++)
if (states[u].next[i] != -1)
debug(states[u].next[i], s + (char)(i + 'a'));
}
};
string S;
SuffixAuto sa;
int lb[200005], ans[100005];
int main() {
cin >> S;
sa.init(S);
queue<int> q;
memset(lb, -1, sizeof(lb));
q.push(0);
lb[0] = 0;
while (!q.empty()) {
int u = q.front(); q.pop();
int d = lb[u];
for (int i = 0; i < 26; i++) {
int v = sa.states[u].next[i];
if (v != -1 && lb[v] == -1) {
lb[v] = d + 1;
q.push(v);
}
}
}
for (int i = 1; i < sa.states.size(); i++)
ans[lb[i]]++, ans[sa.states[i].len + 1]--;
ans[0] = 0;
for (int i = 1; i <= S.size(); i++) {
ans[i] += ans[i - 1];
cout << ans[i] << ' ';
}
cout << '\n';
}