Recently i did Leetcode [Longest Common Subsequence](https://leetcode.com/problems/longest-common-subsequence/description/) problem. I used the basic dp approach and solved it in 18 ms using 12 mb space. After that i decided to check the fastest solutions for the problem and got this code↵
↵
<spoiler summary="Fastest LCS code">↵
↵
~~~~~↵
class Solution {↵
public:↵
int longestCommonSubsequence(string X, string Y) {↵
if ( X.size() < Y.size() ) swap(X,Y) ;↵
int m = X.size() , n = Y.size();↵
if (m == 0 || n == 0) return 0;↵
int w = (m + 31) >> 5;↵
std::uint32_t S[256][w];↵
std::memset(S, 0, sizeof(std::uint32_t) * 256 * w);↵
std::int32_t set = 1;↵
for (int i = 0, j = 0; i < m; ++i) {↵
S[X[i]][j] |= set;↵
set <<= 1;↵
if (!set) set++,j++;↵
}↵
std::uint32_t L[w];↵
std::memset(L, 0, sizeof(std::uint32_t) * w);↵
for (int i = 0; i < n; ++i) {↵
std::uint32_t b1 = 1;↵
std::uint32_t b2 = 0;↵
for (int j = 0; j < w; ++j) {↵
std::uint32_t U = L[j] | S[Y[i]][j];↵
std::uint32_t c = L[j] >> 31;↵
std::uint32_t V = U - (L[j] << 1 | b1+b2);↵
b1 = c;↵
b2 = (V > U);↵
L[j] = U & (~V);↵
}↵
} ↵
int res = 0;↵
for (int i = 0; i < w; ++i)↵
for (; L[i]; ++res)↵
L[i] &= L[i] - 1;↵
return res;↵
}↵
};↵
~~~~~↵
↵
↵
</spoiler>↵
↵
I submitted it and it ran in 4 ms using 7.5 mb space. I tried to deconstruct the algorithm and understand how it was working. Imodificleaned the code a bit and got it to run in 3 ms, but iI still could not understand it.↵
↵
<spoiler summary="My optimized code">↵
↵
~~~~~↵
#include <cstring>↵
↵
typedef unsigned int uint;↵
↵
class Solution {↵
public:↵
int lcs(string X, string Y){↵
if ( X.size() < Y.size() ) swap(X,Y) ;↵
int m = X.size() , n = Y.size();↵
if (m == 0 || n == 0) return 0;↵
int w = (m + 31) >> 5;↵
uint S[256][w];↵
memset(S, 0, sizeof(S));↵
uint set = 1;↵
for (int i = 0, j = 0; i < m; ++i) {↵
S[X[i]][j] |= set;↵
set <<= 1;↵
if (!set) set++,j++;↵
}↵
uint L[w];↵
memset(L, 0, sizeof(L));↵
for (int i = 0; i < n; ++i) {↵
uint b1 = 1;↵
uint b2 = 0;↵
for (int j = 0; j < w; ++j) {↵
uint U = L[j] | S[Y[i]][j];↵
uint V = U - (L[j] << 1 | b1+b2);↵
b1 = L[j] >> 31;↵
b2 = (V > U);↵
L[j] = U & (~V);↵
}↵
} ↵
int res = 0;↵
for (int i = 0; i < w; ++i)↵
res += __builtin_popcount(L[i]);↵
return res;↵
}↵
↵
int longestCommonSubsequence(string text1, string text2) {↵
return lcs(text1, text2);↵
}↵
};↵
~~~~~↵
↵
↵
</spoiler>↵
↵
I could not find any relevant blogs explaining it. Can anyone help me understand how this works or link any blogs. Thanks.↵
↵
<spoiler summary="Fastest LCS code">↵
↵
~~~~~↵
class Solution {↵
public:↵
int longestCommonSubsequence(string X, string Y) {↵
if ( X.size() < Y.size() ) swap(X,Y) ;↵
int m = X.size() , n = Y.size();↵
if (m == 0 || n == 0) return 0;↵
int w = (m + 31) >> 5;↵
std::uint32_t S[256][w];↵
std::memset(S, 0, sizeof(std::uint32_t) * 256 * w);↵
std::int32_t set = 1;↵
for (int i = 0, j = 0; i < m; ++i) {↵
S[X[i]][j] |= set;↵
set <<= 1;↵
if (!set) set++,j++;↵
}↵
std::uint32_t L[w];↵
std::memset(L, 0, sizeof(std::uint32_t) * w);↵
for (int i = 0; i < n; ++i) {↵
std::uint32_t b1 = 1;↵
std::uint32_t b2 = 0;↵
for (int j = 0; j < w; ++j) {↵
std::uint32_t U = L[j] | S[Y[i]][j];↵
std::uint32_t c = L[j] >> 31;↵
std::uint32_t V = U - (L[j] << 1 | b1+b2);↵
b1 = c;↵
b2 = (V > U);↵
L[j] = U & (~V);↵
}↵
} ↵
int res = 0;↵
for (int i = 0; i < w; ++i)↵
for (; L[i]; ++res)↵
L[i] &= L[i] - 1;↵
return res;↵
}↵
};↵
~~~~~↵
↵
↵
</spoiler>↵
↵
I submitted it and it ran in 4 ms using 7.5 mb space. I tried to deconstruct the algorithm and understand how it was working. I
↵
<spoiler summary="My optimized code">↵
↵
~~~~~↵
#include <cstring>↵
↵
typedef unsigned int uint;↵
↵
class Solution {↵
public:↵
int lcs(string X, string Y){↵
if ( X.size() < Y.size() ) swap(X,Y) ;↵
int m = X.size() , n = Y.size();↵
if (m == 0 || n == 0) return 0;↵
int w = (m + 31) >> 5;↵
uint S[256][w];↵
memset(S, 0, sizeof(S));↵
uint set = 1;↵
for (int i = 0, j = 0; i < m; ++i) {↵
S[X[i]][j] |= set;↵
set <<= 1;↵
if (!set) set++,j++;↵
}↵
uint L[w];↵
memset(L, 0, sizeof(L));↵
for (int i = 0; i < n; ++i) {↵
uint b1 = 1;↵
uint b2 = 0;↵
for (int j = 0; j < w; ++j) {↵
uint U = L[j] | S[Y[i]][j];↵
uint V = U - (L[j] << 1 | b1+b2);↵
b1 = L[j] >> 31;↵
b2 = (V > U);↵
L[j] = U & (~V);↵
}↵
} ↵
int res = 0;↵
for (int i = 0; i < w; ++i)↵
res += __builtin_popcount(L[i]);↵
return res;↵
}↵
↵
int longestCommonSubsequence(string text1, string text2) {↵
return lcs(text1, text2);↵
}↵
};↵
~~~~~↵
↵
↵
</spoiler>↵
↵
I could not find any relevant blogs explaining it. Can anyone help me understand how this works or link any blogs. Thanks.↵