Idea: BledDest
Tutorial
Tutorial is loading...
Solution (BledDest)
#include<bits/stdc++.h>
using namespace std;
int main()
{
int t;
cin >> t;
for(int i = 0; i < t; i++)
{
string s1, s2;
cin >> s1 >> s2;
s1 += s2;
cout << set<char>(s1.begin(), s1.end()).size() - 1 << endl;
}
}
Idea: BledDest
Tutorial
Tutorial is loading...
Solution (awoo)
for _ in range(int(input())):
n, m, sx, sy, d = map(int, input().split())
if min(sx - 1, m - sy) <= d and min(n - sx, sy - 1) <= d:
print(-1)
else:
print(n + m - 2)
1721C - Min-Max Array Transformation
Idea: BledDest
Tutorial
Tutorial is loading...
Solution (adedalic)
fun main() {
repeat(readLine()!!.toInt()) {
val n = readLine()!!.toInt()
val a = readLine()!!.split(' ').map { it.toInt() }
val b = readLine()!!.split(' ').map { it.toInt() }
val indices = (0 until n).sortedBy { a[it] }
val mn = Array(n) { 0 }
val mx = Array(n) { 0 }
var lst = n
for (i in n - 1 downTo 0) {
val pos = indices[i]
mn[pos] = lowerBound(b, a[pos])
mx[pos] = lst - 1;
if (i == mn[pos])
lst = i
mn[pos] = b[mn[pos]] - a[pos]
mx[pos] = b[mx[pos]] - a[pos]
}
println(mn.joinToString(" "))
println(mx.joinToString(" "))
}
}
fun lowerBound(a: List<Int>, v: Int): Int {
var l = -1; var r = a.size
while (r - l > 1) {
val m = (l + r) / 2
if (a[m] < v)
l = m
else
r = m
}
return r
}
Idea: BledDest
Tutorial
Tutorial is loading...
Solution (Neon)
#include <bits/stdc++.h>
using namespace std;
int main() {
ios::sync_with_stdio(false); cin.tie(0);
int t;
cin >> t;
while (t--) {
int n;
cin >> n;
vector<int> a(n), b(n);
for (int& x : a) cin >> x;
for (int& x : b) cin >> x;
auto check = [&](int ans) {
map<int, int> cnt;
for (int x : a) ++cnt[x & ans];
for (int x : b) --cnt[~x & ans];
bool ok = true;
for (auto it : cnt) ok &= it.second == 0;
return ok;
};
int ans = 0;
for (int bit = 29; bit >= 0; --bit)
if (check(ans | (1 << bit)))
ans |= 1 << bit;
cout << ans << '\n';
}
}
1721E - Prefix Function Queries
Idea: BledDest
Tutorial
Tutorial is loading...
Solution (awoo)
#include <bits/stdc++.h>
#define forn(i, n) for (int i = 0; i < int(n); i++)
using namespace std;
const int AL = 26;
vector<int> prefix_function(const string &s){
int n = s.size();
vector<int> p(n);
for (int i = 1; i < n; ++i){
int k = p[i - 1];
while (k > 0 && s[i] != s[k])
k = p[k - 1];
k += (s[i] == s[k]);
p[i] = k;
}
return p;
}
int main() {
cin.tie(0);
iostream::sync_with_stdio(false);
string s;
cin >> s;
int n = s.size();
auto p = prefix_function(s);
vector<vector<int>> A(n, vector<int>(AL));
forn(i, n) forn(j, AL){
if (i > 0 && j != s[i] - 'a')
A[i][j] = A[p[i - 1]][j];
else
A[i][j] = i + (j == s[i] - 'a');
}
int q;
cin >> q;
vector<vector<int>> ans(q);
forn(z, q){
string t;
cin >> t;
int m = t.size();
s += t;
for (int i = n; i < n + m; ++i){
A.push_back(vector<int>(AL));
forn(j, AL){
if (j != s[i] - 'a')
A[i][j] = A[p[i - 1]][j];
else
A[i][j] = i + (j == s[i] - 'a');
}
p.push_back(A[p[i - 1]][s[i] - 'a']);
ans[z].push_back(p[i]);
}
forn(_, m){
p.pop_back();
s.pop_back();
A.pop_back();
}
}
for (auto &it : ans){
for (int x : it)
cout << x << ' ';
cout << '\n';
}
return 0;
}
Idea: BledDest
Tutorial
Tutorial is loading...
Solution (BledDest)
#include<bits/stdc++.h>
using namespace std;
const int N = 400043;
const int INF = int(1e9);
struct edge
{
int y, c, f;
edge() {};
edge(int y, int c, int f) : y(y), c(c), f(f) {};
};
vector<edge> e;
vector<int> g[N];
int S, T, V;
int d[N], lst[N];
int n1, n2, m, q;
map<pair<int, int>, int> es;
void add_edge(int x, int y, int c)
{
g[x].push_back(e.size());
e.push_back(edge(y, c, 0));
g[y].push_back(e.size());
e.push_back(edge(x, 0, 0));
}
int rem(int x)
{
return e[x].c - e[x].f;
}
bool bfs()
{
for (int i = 0; i < V; i++)
d[i] = INF;
d[S] = 0;
queue<int> q;
q.push(S);
while (!q.empty())
{
int k = q.front();
q.pop();
for (auto y : g[k])
{
if (rem(y) == 0)
continue;
int to = e[y].y;
if (d[to] > d[k] + 1)
{
q.push(to);
d[to] = d[k] + 1;
}
}
}
return d[T] < INF;
}
int dfs(int x, int mx)
{
if (x == T || mx == 0)
return mx;
int sum = 0;
for (; lst[x] < g[x].size(); lst[x]++)
{
int num = g[x][lst[x]];
int r = rem(num);
if (r == 0)
continue;
int to = e[num].y;
if (d[to] != d[x] + 1)
continue;
int pushed = dfs(to, min(r, mx));
if (pushed > 0)
{
e[num].f += pushed;
e[num ^ 1].f -= pushed;
sum += pushed;
mx -= pushed;
if (mx == 0)
return sum;
}
}
return sum;
}
int Dinic()
{
int ans = 0;
while (bfs())
{
for (int i = 0; i < V; i++)
lst[i] = 0;
int f = 0;
do
{
f = dfs(S, INF);
ans += f;
} while (f > 0);
}
return ans;
}
int main()
{
scanf("%d %d %d %d", &n1, &n2, &m, &q);
S = n1 + n2;
T = S + 1;
V = T + 1;
for (int i = 0; i < m; i++)
{
int x, y;
scanf("%d %d", &x, &y);
--x;
--y;
es[make_pair(x, y)] = i + 1;
add_edge(x, n1 + y, 1);
}
for (int i = 0; i < n1; i++)
add_edge(S, i, 1);
for (int i = 0; i < n2; i++)
add_edge(i + n1, T, 1);
Dinic();
bfs();
vector<int> vertex_cover;
map<int, int> index;
set<int> matched;
long long sum = 0;
for (int i = 0; i < n1; i++)
if (d[i] == INF)
{
vertex_cover.push_back(i + 1);
for (auto ei : g[i])
if (e[ei].f == 1)
{
int idx = es[make_pair(i, e[ei].y - n1)];
index[i + 1] = idx;
sum += idx;
matched.insert(idx);
}
}
for (int i = 0; i < n2; i++)
if (d[i + n1] != INF)
{
vertex_cover.push_back(-(i + 1));
for (auto ei : g[i + n1])
if (e[ei].f == -1)
{
int idx = es[make_pair(e[ei].y, i)];
index[-(i + 1)] = idx;
sum += idx;
matched.insert(idx);
}
}
for (int i = 0; i < q; i++)
{
int s;
scanf("%d", &s);
if (s == 1)
{
puts("1");
int v = vertex_cover.back();
vertex_cover.pop_back();
printf("%d\n", v);
sum -= index[v];
matched.erase(index[v]);
printf("%lld\n", sum);
}
else
{
printf("%d\n", (int)matched.size());
for(auto x : matched) printf("%d ", x);
puts("");
}
fflush(stdout);
}
}
Nice.
Definitely not intended, but simply storing the answers for all queries you processed worked for E too
(Feel free to hack it)
Submission
This same solution gives MLE for me though
Brute force can AC problem E because of the too small $$$|t|$$$. AC Submission.
That's too bad. I think problem E is really a bad problem.
How should they avoid that? Does $$$| t| \le 15$$$ for example help? Since then your hash wouldn't work, right? I'm not very familiar with hashes so I'm curious.
Hack the hash is one of the way to hack this solution. When $$$|t|$$$ is small, the hash can't be hacked.
Another way is let $$$s=$$$
aa...aa
, and then generate some different string $$$t$$$ with prefixaa...aa
. If $$$|t|$$$ is large enough, this solution will get TLE.Sorry for my bad English :(
My generator code is like below:
I think $$$|t| \le 100$$$ is large enough.
If $$$s = $$$
aaa...aaa
, then this is $$$O(26 \cdot |t| \cdot |s|)$$$, where $$$26 \cdot |t|$$$ are the times you have to compute the prefix function and $$$|s|$$$ is from the bad case of that $$$s$$$, right?Yes!
You can see my generator code above. :)
I do exactly the same but used trie instead of hash.
You can also sort the queries and start from the LCP.
How to show that this is fast enough? I know that this order does minimize the total number of operations that the KMP algorithms will make.
Someone please explain what does tilde "~" mean in C++
It's a negation. Google "negation in bitwise operator"
Am i the only guy who tried to solve D using Divide and Conquer approach with lengthy code.
My Divide and Conquer solution does not work since it has a flaw. Anyone who has the right implementation for D using Divide and Conquer
I tried writing it with a strange msb binary radix sort. I failed during the contest, but the approach should work in O(log max * n). Basically what you do is:
Sort both subarrays according to ith msb.(in O(n) with a quicksort-like method)
Reverse the second subarray.
If the ith msb is in the solution split the array where the ith bit changes (this means each subarray looks like this if you watch only the ith msb:
Subarray of a: 0..01..1
Subarray of b: 1..10..0)
Now you can repeat the sort, check, maybe split until last bit.
This is really bad for a contest, but maybe it can be nice to implement (didn't manage to debug it yet)
edit : it works 170011092
Here is problem D solution using trie
169864432
Seeing who came up with the ideas, I came up with this conclusion about this round.
Can anyone please explain the (Ai & mask) ^ (Bi & mask) = mask in problem D ?
You want to pair $$$A_i$$$ with a $$$B_i$$$ such that $$$(A_i ⊕ B_i)$$$ & $$$mask = mask$$$, and the left side is equal to $$$(A_i$$$ & $$$mask) ⊕ (B_i$$$ & $$$mask)$$$ (distributivity, similar to $$$(a+b).c = a.c + b.c$$$)
My approach to Problem D.
Let’s call an optimal pair the pair ($$$a_i$$$, $$$b_i$$$) after reordering $$$b$$$ to get the answer.
Initially the answer is $$$0$$$. At first we will try to make the highest bit in the answer into $$$1$$$ (I think this doesn’t need a proof). If we want the $$$i-th$$$ bit in the answer to be $$$1$$$, we need for every element of $$$c$$$ the $$$i-th$$$ bit to be $$$1$$$ and for this in every optimal pair we need $$$(a_i$$$ $$$\And$$$ $$$(1$$$ $$$\ll$$$ $$$i))$$$ $$$\oplus$$$ $$$(b_i$$$ $$$\And$$$ $$$(1$$$ $$$\ll$$$ $$$i))$$$ $$$= (1 \ll i)$$$. In other words, $$$($$$the $$$i-th$$$ bit of $$$a_i)$$$ $$$\oplus$$$ $$$($$$the $$$i-th$$$ bit of $$$b_i)$$$ $$$= 1$$$.
We’ll keep a vector $$$va$$$ of pairs of vectors. In each pair of $$$va$$$ there are elements of $$$a$$$ in the first and the elements of $$$b$$$ in the second element, such that the second elements of the optimal pairs, in which first elements are the elements of the first vector, will be the elements of the second vector (i.e. the elements of first and second vectors will construct an optimal pairs). Initially $$$va$$$ will contain only ($$$a$$$, $$$b$$$).
The algorithm is following : for every bit from $$$29-th$$$ to $$$0-th$$$ (let’s say $$$i-th$$$), we will do this: go threw the $$$va$$$, and for every pair of vectors, divide the first into $$$a0$$$ and $$$a1$$$, and the second into $$$b0$$$ and $$$b1$$$. In $$$a0$$$ there are all elements of the first vector, the $$$i-th$$$ bit of which is $$$0$$$, and in $$$a1$$$, the elements, the $$$i-th$$$ bit of which is $$$1$$$. $$$b0$$$ and $$$b1$$$ are constructed with the same way. Now there are two cases:
Time Complexity is $$$O(30 * n)$$$.
My code
My idea is not very different from the idea in the editorial, but my implementation is another.
My approach is similar, but it uses partitioning instead of a vector of pairs of vectors.
Our goal, as before, is to prioritize the highest bits to be $$$1$$$. We can know that it is possible to make a certain bit number $$$j$$$ 1 in our result by checking whether the number of values of $$$a_i$$$ such that the $$$j-th$$$ bit is $$$1$$$ is the same as the number of $$$b_i$$$ such that the $$$j-th$$$ bit is $$$0$$$.
This allows us to match $$$b_i$$$ with a $$$j-th$$$ bit of $$$0$$$ with $$$a_i$$$ with the $$$j-th$$$ bit of 1 and vice versa. We'll move both such values to the very front of their respective arrays using Hoare's Partition Scheme. For future less significant bits, we can use the same algorithm to rematch values within their partitions to preserve the higher bits we have already used. If we discover that any individual partition for a certain value of $$$j$$$ cannot be matched, we advance to the next value of $$$j$$$.
Now all we need to do is to keep track of the order of the partitions as we make them, which can be done with a set.
Code
Total Complexity: $$$O(N*log(N)+30*N)$$$.
Awesome !! Tsovak I had the same idea but couldn't find the right data structure for it. Your implementation clarified it completely.
Tsovak Can you check my implementation once? i am getting TLE although I tried to code the same idea as yours 170072227
You forgot to check if $$$a0$$$ and $$$b1$$$ are empty or not when adding it to $$$temp$$$. Same with $$$a1$$$ and $$$b0$$$.
Can I ask why we have to check whether a0 and b1 are empty or not when adding it to temp. I thought it won't affect the time complexity. Thanks!!!
D can be done with trie as well, however it is much harder to implement.
The partitioning and matching in D can easily be maintained through simple built-in sorting, without requiring any additional maps or data structures.
To check if the most significant bit can be 1 in the answer, we can simply sort $$$a$$$ and reverse-sort $$$b$$$, and then check if this bit position is different between $$$a_i$$$ and $$$b_i$$$ for each index $$$i$$$. If yes, then we set this bit to 1 in the answer, and check the next position. The sorted order already extends naturally to the next bit (values in which the highest bit position are equal will be sorted based on the next highest bit position), so checking this next bit position like this will automatically account for the partitioning of the earlier bit positions.
The only issue is when the check fails for some bit position, i.e., there is some bit position where some $$$a_i$$$ is the same as the corresponding $$$b_i$$$. This means we can't have 1 in this bit position for the answer, and also that we should not allow this bit position to restrict the sorted ordering for later bit positions. To deal with this, we can simply set this bit position to 0 for ALL values in $$$a$$$ and $$$b$$$, and then sort $$$a$$$ and reverse-sort $$$b$$$ again (where the rearrangement will NOT modify the ordering within higher bit positions).
My Submission: 169955540
Such an elegant implementation...
Here is my approach for solving D. May someone explain why it does not work?
Start with the MSB. Check if the number of elements with that bit set (1) in A is equal to those in B with that bit not set (0). If they aren't, proceed to the next bit. If they are equal, rearrange A and B such that the elements in A with that bit set precede those with that bit not set. Also rearrange B such that the elements in B with that bit unset precede those with that bit set. This way, that bit would look similar to this in A and B:
And hence after XORing the corresponding elements the result is n elements having that bit set. Now that A and B are partitioned into two sections, solve each section separately for the next bit.
It should work, in theory. But how exactly are you maintaining these partitions?
When you mentioned solving each section separately for the next bit, note that the check needs to succeed for all partitions corresponding to the current level. So we first check the entire A and B together until we find a bit position that works (first successful position), causing each of A and B to be partitioned into 2 sections each. So for the next bit position, we check each section separately. If we eventually find a second successful position where the check succeeds for both sections, then we again rearrange the values (in a manner that does not disturb the values of the bits from the first successful position), and each section is further partitioned into 2 sections each, resulting in 4 sections for each of A and B. For the third successful position, we need the check to succeed for all four sections, and again rearrange in a manner that doesn't disturb the values from the first and second successful positions, and so on. Note that the partitions are only fixed for successful bit positions, and not for unsuccessful bit positions (where the check fails).
If you're confident that your implementation fulfills these conditions all the way until the end, then it should work.
Probably, problem E 1721E - Запросы про префикс функцию can be solved with sorting of all queries in lexicographical order and processing of them strongly in this order. We need to revert changes only to largest common prefix of given query and previous one. My AC submission: 170039416
And why this solution is fast enough?
Can someone find out why on earth this code got RE? thks in advance.
Edit: solved. (found that sort got an infinite loop. :( )
(Copy-pasted):
Here's an interesting solution I found for E, not sure if it's new. Implementation here, featuring enough spaghetti to feed a family of five for a week.
We do some precomputation first. Use a
map<int, int> m
, where values are hashes of a substring of $$$S$$$, and the corresponding value is the maximal number $$$i$$$ such that the length $$$i$$$ prefix and suffix of $$$S$$$ are the same. To calculate this, we iterate $$$i$$$ independently of the substring, and check if the prefix and suffix have the same hashes in $$$O(1)$$$ time. If so, we add updatem
on substrings $$$S[i+1..i+1+j]$$$ (technically their hash values). As we'll see later, we only care about $$$j \leq 10$$$, so this takes $$$O(|S|\log |S|)$$$ time which is fast enough (we can use anunordered_map
orgp_hash_table
as well but time is not an issue). Note that if we double-check our hash equality by directly comparing the substrings, we TLE on TC 36 which is all a's, since comparing strings/generating substrings is linear in the length of the string so our program runs in $$$O(|s|^2)$$$ in that case.We consider every $$$|S|+i$$$ more or less independently (I will use uppercase letters). Let $$$T_i=T[1..i]$$$ denote the length $$$i$$$ prefix of $$$T$$$. Suppose we have some prefix string $$$x$$$ of $$$S+T_i$$$ that's also a suffix string. There are three possibilities:
(1): Both the prefix and suffix strings lie in both $$$S$$$ and $$$T_i$$$. In this case, we can break $$$x$$$ up into $$$x_1,x_2,x_3$$$ (in that order) defined as follows: $$$x_1$$$ is the portion of the suffix string lying in $$$S$$$, $$$x_3$$$ is the portion of the prefix string lying in $$$T_i$$$, and $$$x_2$$$ is the remainder of $$$x$$$. To check if any $$$x$$$ of this type work, we iterate over $$$|x_3|:=j$$$, checking if the length-$$$j$$$ prefix and suffix of $$$T_i$$$ are the same. If so, then $$$x$$$ works if and only if the rightmost occurrence of $$$x_2$$$ in $$$S$$$ is as far right as it can be (i.e. occupies starting position $$$|S|-|x_2|$$$). In this case, the length-$$$|x_1|$$$ suffix of $$$S$$$ is also $$$x_1$$$, so we can use our precomputed
m
. There are at most $$$10$$$ values for $$$|x_3|$$$, so iterating over them is fast.(2): The prefix string lies entirely in $$$S$$$, but the suffix string does not lie entirely in $$$T_i$$$. This is more or less a simpler version of (1). We split $$$x$$$ into $$$x_1,x_2$$$ such that $$$x_2$$$ is the portion of the suffix string lying in $$$T_i$$$, and $$$x_1$$$ is the remainder. Since $$$x_2$$$ must be $$$T_i$$$ by definition, $$$T_i$$$ must appear in $$$S$$$ as a substring. Further, $$$x_1$$$ must also be the suffix string of $$$S$$$ when this happens. We can use
m
for this as well. Note that there's only one possible value for $$$x_2$$$.(3): The suffix string lies entirely in $$$T_i$$$. If $$$|S|\geq i$$$, then that means the prefix string lies entirely in $$$S$$$. There's at most $$$10$$$ values for $$$x$$$, so we can iterate $$$j$$$ from $$$1$$$ to $$$i$$$ (inclusive) and check if $$$S[1..j]=T_i[i-j+1...i]$$$. Otherwise, we have to join $$$S$$$ and $$$T_i$$$, since the prefix string could partially lie in $$$T_i$$$ as well, but the actual check is the same. Since this case only occurs when $$$|S|\leq 10$$$, joining $$$S$$$ and $$$T_i$$$ is quick (if you join $$$S$$$ and $$$T_i$$$ no matter what, you should TLE). Either way, you iterate through at most $$$10$$$ possible values for $$$x$$$. Ignore the comment at the top lol
To find the final answer, we iterate through all $$$O(1)$$$ cases and update our maximum value each time. The final time complexity should be $$$O((|s|+q)\log |s|)$$$ which works if you're not being stupid.
hello the hazard
Why does F have to be online? I mean, is there any other way to solve it if one can do it offline?
I guess it is because the author wants to prevent solutions that run bipartite matching on every type $$$2$$$ query.
Could someone explain why rank2 tute7627's submission works? The idea is using kmp to optimize mp. I see there are several this kind of submissions so I wonder if the time complexity of this approach is correct and we can use it as a model solution.
Can anyone uphack my solution to D?
I want to share my ultra elegant solution for 1721D - Максимальное И. Look: 169910079
In Solution of D, why (&ans) is used instead of (ans^(x&ans))? I know they are apparently the same, but how can this formula come up in mind? If it is something obvious , please tell me the trick. BledDest
I wasn't the one implementing the model solution, but I guess that it was done for the shorter code. In fact,
x & ans
represents the mask of the number thatx
should be matched with (which should be done for one of the arrays nevertheless), and for the array $$$b$$$, the value of~x & ans
is exactly the same asans ^ (x & ans)
you were mentioning. So, since exactly one of the arrays was analyzed without flipping the bits, elements that have the same value in their corresponding bit operations should be matched. If you useans ^ (x & ans)
for both arrays, you should match the valuev
from the first array with the valuev ^ ans
from the second array.Why does my D TLEed on #15? 169888012
Can someone please have a look at my submission 170673948? The basic idea is that the longest prefix of s+t matching some suffix of s+t will either have length (1) >|t| or (2) <=|t| . For the >|t| part, some (proper)suffix of s will match some suffix of s i.e the z value(wrt the z algorithm) of the index i from which the suffix starts will be n-i (0-indexing assumed). For the <=|t| part we can just brute force to obtain the answer in O(|t|^2) (to obtain prefix function values of s+t for its last |t| characters). So I first precomputed the z values of string s and then wherever z[i] was n-i, I considered all substrings, {(i+1,i+1),(i+1,i+2)...(i+max(|t|),max(n-1,i+10)) and update the 'score' values of each of these substrings. (score value of a string s1 being the prefix function value of s+s1 at the last index, considering only the case when the prefix length is larger than |s1|.) This can be done in O(max(|t|)) per i using a trie structure. Now while processing the queries, I first computed the prefix functions for case (2) as listed above using brute force in O(|t|^2) and for case(1), I just used the score values calculated for all the prefixes of t and updated the prefix function array (by taking max of answers obtained in both cases for each index). This can also in total be done in O(|t|^2) using the trie structure mentioned above. Hence the total complexity of the solution must be O(n.max(|t|)+q.(max(|t|)^2) ~ 2e7 operations which should comfortably pass in 2 seconds, however it TLEs. So can someone find some operation in my algorithm or its implementation which might be the bottleneck time complexitywise?
P.S- n is |s|.
Problem D was very helpful for me to learn bitmasking
can anyone stress test my submission 175562002 of D or tell me where i am wrong?
Take a look at Ticket 16280 from CF Stress for a counter example.
thank you very much!
My approach for E:
First, look up all prefixs of s which equals to the suffix of same length. They can be find by calculating the prefix function of s in O(|s|). Then, for a certain prefix s[0...j-1], we store the result of s[j], s[j...j+1], s[j...j+2], ... , s[j...j+9] into a HashMap (where I used key of type int64, because |t|<=10 and number of smallcase letters is 26, which is smaller than (1<<5)=32, so we could use 5 bits for a letter and 50 bits for encoding a string t, which would not cause any collision, and the process can be done by bit operations otherwise may cause TLE). Also we should not let the value of the same key being smaller in the process.
Then for any query of t, we look the value of t[0...i], if we find the value, it will be outputed directly. Otherwise the value must be <=10, then we can find it by brute force.
The time complexity of preparation is O(|s|*|t|), and for each query is O(|t|^3). Total time complexity is O(|s|*|t|+q*|t|^3), which is at most about 10^8, fits the time limit.