Hello Codeforces ,today I want to introduce a technique that can solve some problems related to distinct values queries on a subtree .The only way I know to solve array distinct values queries with updates is using MO's algorithm .The approach I will introduce can solve the subtree version of this problem online with updates in $O((N+Q)log^2(N))$.So let's get started.↵
↵
**Prerequisites:**↵
↵
1.Heavy light decomposition (it's enough to know how it works and how to implement it).↵
↵
2.Lowest common ancestor.↵
↵
3.Euler tours (not necessary for this specific problem but I will use it to prove some fact).↵
↵
**Problem Statement:**↵
↵
You are given a tree rooted at vertex 1. You have to perform two types of queries:↵
↵
1 $i val$ : update the value of node $i$ to $val$.↵
↵
2 $u$ : output the sum of distinct values in the subtree of node $u$.↵
↵
**Solution:**↵
↵
Whenever you encounter such a strange problem try to make a shift in perspective .What is the first thing you thought when you have seen this problem? Euler tour? Persistent Segment trees? Isn't it similar to the array distinct values queries?↵
But wait ,I just said it's hard to solve this problem. Didn't I?↵
↵
Can we change the problem into path queries problem?↵
↵
Wait what are the nodes that are affected by an update?↵
↵
I turns out that the affected nodes are all in a range on the ancestry path of the updated node .Don't it seem like a standard HLD problem?↵
↵
Not yet .Here comes the idea .Consider our new problem:↵
↵
We want to update the answers in a range on the ancestry path of a node .So ,where does this range end?↵
If we can figure out how to do this then the problem becomes a standard path update and queries problem that can be easily solved using HLD .Now, here is the fun part.↵
↵
Isn't it obvious the endpoint is the first node on the ancestry path that doesn't have an occurrence of $val$?↵
↵
How can we find such a node? well this is the same as finding the first node on the path that have an occurrence then going down by one edge.↵
↵
It turns out that this node is the first node that have the closest occurrence of $val$ in its subtree.↵
But what does it mean for a node to be be the closest?↵
↵
Well ,let's consider this node to be the $lca$ of the updated node and the closet node since the $lca$ is the first node on the ancestry path from the updated node to the root .Now the problem boils down to finding the maximum depth $lca$ among all possible $lca$ s ,but how can we do this fast?↵
↵
Well, if we maintain a map of sets that stores for each number the $time in$ s in DFS order for the nodes with a value equal to that number then the node we are searching for is the node with maximum depth among $lca($updated node ,the first node with time in bigger than the $time in$ of the updated node $)$ and↵
$lca($updated node ,the first node with time in smaller than the $time in$ of the updated node $)$ .But why is this true in general?↵
↵
**Proof of the above fact:**↵
↵
Let's consider the case of (the first node with time in bigger than the time in of the updated node(let's call this node $u$)) then the second case will go through an identical reasoning.↵
↵
Consider taking a node with a bigger time in than $u$ (let's call it $v$).↵
↵
Let $x$ be the first node on the ancestry path then:↵
↵
If a node $d$ is the closest node to $i$ then $d$ must be in the subtree of $x$.↵
Because $in[i]<in[u]<in[v]$ if $v$ was in the subtree of $x$ and $i$ was in the subtree of $x$ then also $u$ is in the subtree of $x$ .Thus taking $u$ is at least as optimal as taking node v.↵
↵
**Note** that there is a corner case when there is at least one occurrence of $val$ in the subtree of $i$ we don't update anything.↵
↵
**Also** note that when we update a node we should remove the old value and add the new value but this is can be done in the same way the adding process is done thus I leave it as an exercise to the reader to check this.↵
↵
**A recap for what we did:**↵
↵
You could reflect on how we transformed a subtree queries problem into a path queries problem which turns out to be much easier if we think that way. Then we used a fact that facilitates using the bruteforce approach by finding optimal values to try .↵
↵
**Implementation:**↵
↵
There are many implementation details that I skipped above but if you are curious here is my code:↵
↵
https://codeforces.net/gym/521454/submission/258943402<spoiler summary="Code">↵
↵
```c++↵
↵
#include <bits/stdc++.h>↵
↵
using namespace std;↵
#define endl '\n'↵
#define ll long long↵
#define mid l+(r-l)/2↵
#define all(x) (x).begin(), (x).end()↵
#define F first↵
#define S second↵
vector<int>adj[100005];↵
ll a[100005];↵
ll ans[100005];↵
int sp[21][100005];↵
map<ll,set<pair<int,int>>>m;↵
set<ll> dfsc(int u,int v)↵
{↵
set<ll>s;↵
s.insert(a[u]);↵
ll anss=a[u];↵
for(auto x:adj[u])↵
{↵
if(x!=v)↵
{↵
set<ll>y=dfsc(x,u);↵
if(y.size()>s.size())↵
{↵
swap(s,y);↵
anss=ans[x];↵
}↵
for(auto z:y)↵
{↵
if(s.find(z)==s.end())↵
{↵
s.insert(z);↵
anss+=z;↵
}↵
}↵
}↵
}↵
ans[u]=anss;↵
return s;↵
}↵
int sz[100005];↵
int nx[100005];↵
int dis[100005];↵
int in[100005];↵
int out[100005];↵
int tt=0;↵
void init(int u,int v)↵
{↵
in[u]=++tt;↵
sp[0][u]=v;↵
m[a[u]].insert({in[u],u});↵
sz[u]=1;↵
int m=0;↵
int j=0;↵
nx[u]=u;↵
for(auto x:adj[u])↵
{↵
if(x!=v)↵
{↵
dis[x]=dis[u]+1;↵
init(x,u);↵
sz[u]+=sz[x];↵
if(sz[x]>m)↵
{↵
m=sz[x];↵
j=x;↵
}↵
}↵
}↵
nx[u]=j;↵
out[u]=tt;↵
}↵
int top[100005];↵
int chain[100005];↵
int chsz[100005];↵
int id[100005];↵
ll val[100005];↵
int k=0;↵
int c=1;↵
int n;↵
void hld(int u,int v)↵
{↵
chain[u]=c;↵
chsz[c]++;↵
id[u]=++k;↵
val[k]=ans[u];↵
if(!top[c])↵
{↵
top[c]=u;↵
}↵
if(nx[u]!=u&&nx[u]!=0)↵
{↵
hld(nx[u],u);↵
c++;↵
}↵
for(auto z:adj[u])↵
{↵
if(z!=v&&z!=nx[u])↵
{↵
hld(z,u);↵
c++;↵
}↵
}↵
}↵
ll tree[400005];↵
ll lz[800005];↵
void propagate(int node,int l,int r)↵
{↵
ll z=r-l+1;↵
tree[node]+=z*lz[node];↵
lz[2*node]+=lz[node];↵
lz[2*node+1]+=lz[node];↵
lz[node]=0;↵
}↵
void build(int node,int l,int r)↵
{↵
if(l==r)↵
{↵
tree[node]=val[l];↵
return;↵
}↵
build(2*node,l,mid);↵
build(2*node+1,mid+1,r);↵
tree[node]=tree[2*node]+tree[2*node+1];↵
}↵
void update(int node,int l,int r,int s,int e,ll z)↵
{↵
propagate(node,l,r);↵
if(l>e||r<s)↵
return;↵
if(l>=s&&r<=e)↵
{↵
lz[node]+=z;↵
propagate(node,l,r);↵
return;↵
}↵
update(2*node,l,mid,s,e,z);↵
update(2*node+1,mid+1,r,s,e,z);↵
tree[node]=tree[2*node]+tree[2*node+1];↵
}↵
ll query(int node,int l,int r,int i)↵
{↵
propagate(node,l,r);↵
if(i<l||i>r)↵
return 0;↵
if(l==r)↵
return tree[node];↵
return query(2*node,l,mid,i)+query(2*node+1,mid+1,r,i);↵
}↵
ll ask(int node)↵
{↵
return query(1,1,n,id[node]);↵
}↵
void upd(int u,int v,ll vx)↵
{↵
while(chain[u]!=chain[v])↵
{↵
if(dis[top[chain[u]]]<dis[top[chain[v]]])↵
{↵
swap(u,v);↵
}↵
update(1,1,n,id[top[chain[u]]],id[u],vx);↵
u=sp[0][top[chain[u]]];↵
}↵
if(dis[u]>dis[v])swap(u,v);↵
update(1,1,n,id[u],id[v],vx);↵
}↵
int lca(int u,int v)↵
{↵
if(dis[u]<dis[v])↵
swap(u,v);↵
int y=dis[u]-dis[v];↵
for(int j=20;j>=0;j--)↵
{↵
if(y&(1<<j))↵
{↵
y-=(1<<j);↵
u=sp[j][u];↵
}↵
}↵
if(u==v)↵
return u;↵
for(int i=20;i>=0;i--)↵
{↵
int x=sp[i][u];↵
int d=sp[i][v];↵
if(x!=d)↵
{↵
u=x;↵
v=d;↵
}↵
}↵
return sp[0][u];↵
}↵
int lift(int u,int d)↵
{↵
for(int i=20;i>=0;i--)↵
{↵
if(d&(1<<i))↵
{↵
d-=(1<<i);↵
u=sp[i][u];↵
}↵
}↵
return u;↵
}↵
int main()↵
{↵
ios_base::sync_with_stdio(0);↵
cin.tie(0);↵
cout.tie(0);↵
int q;↵
cin>>n>>q;↵
for(int i=1;i<=n-1;i++)↵
{↵
int x,y;↵
cin>>x>>y;↵
adj[x].push_back(y);↵
adj[y].push_back(x);↵
}↵
for(int i=1;i<=n;i++)↵
{↵
cin>>a[i];↵
m[a[i]].insert({0,0});↵
}↵
dfsc(1,0);↵
init(1,0);↵
hld(1,0);↵
build(1,1,n);↵
for(int j=1;j<=20;j++)↵
{↵
for(int i=1;i<=n;i++)↵
{↵
sp[j][i]=sp[j-1][sp[j-1][i]];↵
}↵
}↵
dis[0]=-1;↵
while(q--)↵
{↵
int t;↵
cin>>t;↵
if(t==1)↵
{↵
int i;↵
cin>>i;↵
ll v;↵
cin>>v;↵
bool w=0;↵
auto u=m[a[i]].find({in[i],i});↵
int x1=0,x2=0;↵
u--;↵
if(u!=m[a[i]].begin())↵
{↵
x1=lca(i,(*u).S);↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
}↵
u++;↵
u++;↵
if(u!=m[a[i]].end())↵
{↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
x2=lca(i,(*u).S);↵
}↵
u--;↵
if(dis[x1]<dis[x2])↵
swap(x1,x2);↵
if(!w)↵
{↵
x1=lift(i,dis[i]-dis[x1]-1);↵
upd(i,x1,-a[i]);}↵
m[a[i]].erase(u);↵
a[i]=v;↵
m[a[i]].insert({0,0});↵
m[a[i]].insert({in[i],i});↵
x1=0;↵
x2=0;↵
w=0;↵
u=m[a[i]].find({in[i],i});↵
u--;↵
if(u!=m[a[i]].begin())↵
{↵
x1=lca(i,(*u).S);↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
}↵
u++;↵
u++;↵
if(u!=m[a[i]].end())↵
{↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
x2=lca(i,(*u).S);↵
}↵
u--;↵
if(dis[x1]<dis[x2])↵
swap(x1,x2);↵
if(!w)↵
{↵
x1=lift(i,dis[i]-dis[x1]-1);↵
upd(i,x1,a[i]);}↵
}↵
else{↵
int x;↵
cin>>x;↵
cout<<ask(x)<<endl;↵
}↵
}↵
return 0;↵
}↵
```↵
</spoiler>↵
↵
I couldn't find any link for a problem that uses this technique so I made one .↵
↵
link: [problem:521454A]↵
↵
https://codeforces.net/contestInvitation/02c14ae6e9e06b8f0c6a3ce7eb957a1f3ba2f865↵
↵
Another problem suggested by [user:asdasdqwer,2024-05-01] :https://dmoj.ca/problem/dmopc20c1p5↵
↵
You can also optimize heavy light decomposition see https://codeforces.net/blog/entry/104997.↵
↵
Thank you for reading my blog.↵
↵
I spent a lot of time preparing this so I hope you loved it.↵
↵
**P.S. This is my first educational blog .Sorry for long text :) .**
↵
**Prerequisites:**↵
↵
1.Heavy light decomposition (it's enough to know how it works and how to implement it).↵
↵
2.Lowest common ancestor.↵
↵
3.Euler tours (not necessary for this specific problem but I will use it to prove some fact).↵
↵
**Problem Statement:**↵
↵
You are given a tree rooted at vertex 1. You have to perform two types of queries:↵
↵
1 $i val$ : update the value of node $i$ to $val$.↵
↵
2 $u$ : output the sum of distinct values in the subtree of node $u$.↵
↵
**Solution:**↵
↵
Whenever you encounter such a strange problem try to make a shift in perspective .What is the first thing you thought when you have seen this problem? Euler tour? Persistent Segment trees? Isn't it similar to the array distinct values queries?↵
But wait ,I just said it's hard to solve this problem. Didn't I?↵
↵
Can we change the problem into path queries problem?↵
↵
Wait what are the nodes that are affected by an update?↵
↵
I turns out that the affected nodes are all in a range on the ancestry path of the updated node .Don't it seem like a standard HLD problem?↵
↵
Not yet .Here comes the idea .Consider our new problem:↵
↵
We want to update the answers in a range on the ancestry path of a node .So ,where does this range end?↵
If we can figure out how to do this then the problem becomes a standard path update and queries problem that can be easily solved using HLD .Now, here is the fun part.↵
↵
Isn't it obvious the endpoint is the first node on the ancestry path that doesn't have an occurrence of $val$?↵
↵
How can we find such a node? well this is the same as finding the first node on the path that have an occurrence then going down by one edge.↵
↵
It turns out that this node is the first node that have the closest occurrence of $val$ in its subtree.↵
But what does it mean for a node to be be the closest?↵
↵
Well ,let's consider this node to be the $lca$ of the updated node and the closet node since the $lca$ is the first node on the ancestry path from the updated node to the root .Now the problem boils down to finding the maximum depth $lca$ among all possible $lca$ s ,but how can we do this fast?↵
↵
Well, if we maintain a map of sets that stores for each number the $time in$ s in DFS order for the nodes with a value equal to that number then the node we are searching for is the node with maximum depth among $lca($updated node ,the first node with time in bigger than the $time in$ of the updated node $)$ and↵
$lca($updated node ,the first node with time in smaller than the $time in$ of the updated node $)$ .But why is this true in general?↵
↵
**Proof of the above fact:**↵
↵
Let's consider the case of (the first node with time in bigger than the time in of the updated node(let's call this node $u$)) then the second case will go through an identical reasoning.↵
↵
Consider taking a node with a bigger time in than $u$ (let's call it $v$).↵
↵
Let $x$ be the first node on the ancestry path then:↵
↵
If a node $d$ is the closest node to $i$ then $d$ must be in the subtree of $x$.↵
Because $in[i]<in[u]<in[v]$ if $v$ was in the subtree of $x$ and $i$ was in the subtree of $x$ then also $u$ is in the subtree of $x$ .Thus taking $u$ is at least as optimal as taking node v.↵
↵
**Note** that there is a corner case when there is at least one occurrence of $val$ in the subtree of $i$ we don't update anything.↵
↵
**Also** note that when we update a node we should remove the old value and add the new value but this is can be done in the same way the adding process is done thus I leave it as an exercise to the reader to check this.↵
↵
**A recap for what we did:**↵
↵
You could reflect on how we transformed a subtree queries problem into a path queries problem which turns out to be much easier if we think that way. Then we used a fact that facilitates using the bruteforce approach by finding optimal values to try .↵
↵
**Implementation:**↵
↵
There are many implementation details that I skipped above but if you are curious here is my code:↵
↵
↵
```c++↵
↵
#include <bits/stdc++.h>↵
↵
using namespace std;↵
#define endl '\n'↵
#define ll long long↵
#define mid l+(r-l)/2↵
#define all(x) (x).begin(), (x).end()↵
#define F first↵
#define S second↵
vector<int>adj[100005];↵
ll a[100005];↵
ll ans[100005];↵
int sp[21][100005];↵
map<ll,set<pair<int,int>>>m;↵
set<ll> dfsc(int u,int v)↵
{↵
set<ll>s;↵
s.insert(a[u]);↵
ll anss=a[u];↵
for(auto x:adj[u])↵
{↵
if(x!=v)↵
{↵
set<ll>y=dfsc(x,u);↵
if(y.size()>s.size())↵
{↵
swap(s,y);↵
anss=ans[x];↵
}↵
for(auto z:y)↵
{↵
if(s.find(z)==s.end())↵
{↵
s.insert(z);↵
anss+=z;↵
}↵
}↵
}↵
}↵
ans[u]=anss;↵
return s;↵
}↵
int sz[100005];↵
int nx[100005];↵
int dis[100005];↵
int in[100005];↵
int out[100005];↵
int tt=0;↵
void init(int u,int v)↵
{↵
in[u]=++tt;↵
sp[0][u]=v;↵
m[a[u]].insert({in[u],u});↵
sz[u]=1;↵
int m=0;↵
int j=0;↵
nx[u]=u;↵
for(auto x:adj[u])↵
{↵
if(x!=v)↵
{↵
dis[x]=dis[u]+1;↵
init(x,u);↵
sz[u]+=sz[x];↵
if(sz[x]>m)↵
{↵
m=sz[x];↵
j=x;↵
}↵
}↵
}↵
nx[u]=j;↵
out[u]=tt;↵
}↵
int top[100005];↵
int chain[100005];↵
int chsz[100005];↵
int id[100005];↵
ll val[100005];↵
int k=0;↵
int c=1;↵
int n;↵
void hld(int u,int v)↵
{↵
chain[u]=c;↵
chsz[c]++;↵
id[u]=++k;↵
val[k]=ans[u];↵
if(!top[c])↵
{↵
top[c]=u;↵
}↵
if(nx[u]!=u&&nx[u]!=0)↵
{↵
hld(nx[u],u);↵
c++;↵
}↵
for(auto z:adj[u])↵
{↵
if(z!=v&&z!=nx[u])↵
{↵
hld(z,u);↵
c++;↵
}↵
}↵
}↵
ll tree[400005];↵
ll lz[800005];↵
void propagate(int node,int l,int r)↵
{↵
ll z=r-l+1;↵
tree[node]+=z*lz[node];↵
lz[2*node]+=lz[node];↵
lz[2*node+1]+=lz[node];↵
lz[node]=0;↵
}↵
void build(int node,int l,int r)↵
{↵
if(l==r)↵
{↵
tree[node]=val[l];↵
return;↵
}↵
build(2*node,l,mid);↵
build(2*node+1,mid+1,r);↵
tree[node]=tree[2*node]+tree[2*node+1];↵
}↵
void update(int node,int l,int r,int s,int e,ll z)↵
{↵
propagate(node,l,r);↵
if(l>e||r<s)↵
return;↵
if(l>=s&&r<=e)↵
{↵
lz[node]+=z;↵
propagate(node,l,r);↵
return;↵
}↵
update(2*node,l,mid,s,e,z);↵
update(2*node+1,mid+1,r,s,e,z);↵
tree[node]=tree[2*node]+tree[2*node+1];↵
}↵
ll query(int node,int l,int r,int i)↵
{↵
propagate(node,l,r);↵
if(i<l||i>r)↵
return 0;↵
if(l==r)↵
return tree[node];↵
return query(2*node,l,mid,i)+query(2*node+1,mid+1,r,i);↵
}↵
ll ask(int node)↵
{↵
return query(1,1,n,id[node]);↵
}↵
void upd(int u,int v,ll vx)↵
{↵
while(chain[u]!=chain[v])↵
{↵
if(dis[top[chain[u]]]<dis[top[chain[v]]])↵
{↵
swap(u,v);↵
}↵
update(1,1,n,id[top[chain[u]]],id[u],vx);↵
u=sp[0][top[chain[u]]];↵
}↵
if(dis[u]>dis[v])swap(u,v);↵
update(1,1,n,id[u],id[v],vx);↵
}↵
int lca(int u,int v)↵
{↵
if(dis[u]<dis[v])↵
swap(u,v);↵
int y=dis[u]-dis[v];↵
for(int j=20;j>=0;j--)↵
{↵
if(y&(1<<j))↵
{↵
y-=(1<<j);↵
u=sp[j][u];↵
}↵
}↵
if(u==v)↵
return u;↵
for(int i=20;i>=0;i--)↵
{↵
int x=sp[i][u];↵
int d=sp[i][v];↵
if(x!=d)↵
{↵
u=x;↵
v=d;↵
}↵
}↵
return sp[0][u];↵
}↵
int lift(int u,int d)↵
{↵
for(int i=20;i>=0;i--)↵
{↵
if(d&(1<<i))↵
{↵
d-=(1<<i);↵
u=sp[i][u];↵
}↵
}↵
return u;↵
}↵
int main()↵
{↵
ios_base::sync_with_stdio(0);↵
cin.tie(0);↵
cout.tie(0);↵
int q;↵
cin>>n>>q;↵
for(int i=1;i<=n-1;i++)↵
{↵
int x,y;↵
cin>>x>>y;↵
adj[x].push_back(y);↵
adj[y].push_back(x);↵
}↵
for(int i=1;i<=n;i++)↵
{↵
cin>>a[i];↵
m[a[i]].insert({0,0});↵
}↵
dfsc(1,0);↵
init(1,0);↵
hld(1,0);↵
build(1,1,n);↵
for(int j=1;j<=20;j++)↵
{↵
for(int i=1;i<=n;i++)↵
{↵
sp[j][i]=sp[j-1][sp[j-1][i]];↵
}↵
}↵
dis[0]=-1;↵
while(q--)↵
{↵
int t;↵
cin>>t;↵
if(t==1)↵
{↵
int i;↵
cin>>i;↵
ll v;↵
cin>>v;↵
bool w=0;↵
auto u=m[a[i]].find({in[i],i});↵
int x1=0,x2=0;↵
u--;↵
if(u!=m[a[i]].begin())↵
{↵
x1=lca(i,(*u).S);↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
}↵
u++;↵
u++;↵
if(u!=m[a[i]].end())↵
{↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
x2=lca(i,(*u).S);↵
}↵
u--;↵
if(dis[x1]<dis[x2])↵
swap(x1,x2);↵
if(!w)↵
{↵
x1=lift(i,dis[i]-dis[x1]-1);↵
upd(i,x1,-a[i]);}↵
m[a[i]].erase(u);↵
a[i]=v;↵
m[a[i]].insert({0,0});↵
m[a[i]].insert({in[i],i});↵
x1=0;↵
x2=0;↵
w=0;↵
u=m[a[i]].find({in[i],i});↵
u--;↵
if(u!=m[a[i]].begin())↵
{↵
x1=lca(i,(*u).S);↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
}↵
u++;↵
u++;↵
if(u!=m[a[i]].end())↵
{↵
if((*u).F>in[i]&&out[i]>=(*u).F)↵
w=1;↵
x2=lca(i,(*u).S);↵
}↵
u--;↵
if(dis[x1]<dis[x2])↵
swap(x1,x2);↵
if(!w)↵
{↵
x1=lift(i,dis[i]-dis[x1]-1);↵
upd(i,x1,a[i]);}↵
}↵
else{↵
int x;↵
cin>>x;↵
cout<<ask(x)<<endl;↵
}↵
}↵
return 0;↵
}↵
```↵
</spoiler>↵
↵
I couldn't find any link for a problem that uses this technique so I made one .↵
↵
link: [problem:521454A]↵
↵
https://codeforces.net/contestInvitation/02c14ae6e9e06b8f0c6a3ce7eb957a1f3ba2f865↵
↵
Another problem suggested by [user:asdasdqwer,2024-05-01] :https://dmoj.ca/problem/dmopc20c1p5↵
↵
You can also optimize heavy light decomposition see https://codeforces.net/blog/entry/104997.↵
↵
Thank you for reading my blog.↵
↵
I spent a lot of time preparing this so I hope you loved it.↵
↵
**P.S. This is my first educational blog .Sorry for long text :) .**