article.php/c13137
'RESULTS' table:
REF_STRING - TEXT 250
TEST_STRING - TEXT 250
MATCH_VALU - SINGLE FIXED 2 DECIMAL PLACES
(I also have three additional fields but they are not used in this
query - id, fid, good_match)
>From the query:INSERT INTO RESULTS ( REF_STRING, TEST_STRING, MATCH_VALU )
SELECT REF_LIST.REF_STRING, TEST_LIST.TEST_STRING, LCS([REF_STRING],
[TEST_STRING]) AS Expr1
FROM REF_LIST, TEST_LIST
WHERE (((LCS([REF_STRING],[TEST_STRING]))>0.79));
---
Public Function LCS(String1 As String, String2 As String, Optional
AlgInUse) As Single
'* *************** Longest Common Subsequence *********************
'* The LCS is calculated by the length of the longest common,
'* not necessarily contiguous, sub-sequence of characters divided by
'* the average character lengths of both strings.
'* In this case: c(m, n) / (((Str1Len) + (Str2Len)) / 2))
'* LCS is symmetric.
'*
'* str1 and str2 are arrays.
'************************************************* ***********************
Dim STR1() As Byte
Dim STR2() As Byte
STR1 = StrConv(String1, vbFromUnicode)
STR2 = StrConv(String2, vbFromUnicode)
Dim i As Integer, j As Integer, m As Integer, n As Integer
Dim c() As Integer, b() As Integer, X$(), Y$()
Dim Str1Len As Integer, Str2Len As Integer, SmStr(), LgStr()
If IsMissing(AlgInUse) Then AlgInUse = ""
Str1Len = UBound(STR1)
Str2Len = UBound(STR2)
n = Minimum(Str1Len, Str2Len)
m = Maximum(Str1Len, Str2Len)
ReDim X(m)
ReDim Y(m)
ReDim c(m, m)
ReDim b(m, m)
If Str1Len Str2Len Then
For i = 0 To Str1Len - 1
X(i) = STR1(i)
Next i
For i = 0 To Str2Len - 1
Y(i) = STR2(i)
Next i
Else
For i = 0 To Str2Len - 1
X(i) = STR2(i)
Next i
For i = 0 To Str1Len - 1
Y(i) = STR1(i)
Next i
End If ' Str1Len Str2Len
For i = 1 To m
For j = 1 To n
If X(i - 1) = Y(j - 1) Then
c(i, j) = c(i - 1, j - 1) + 1
b(i, j) = 1 ' /* from north west */
ElseIf c(i - 1, j) >= c(i, j - 1) Then
c(i, j) = c(i - 1, j)
b(i, j) = 2 ' /* from north */
Else
c(i, j) = c(i, j - 1)
b(i, j) = 3 ' /* from west */
End If
Next j
Next i
' return c[m][n];
If c(m, n) 0 Then
If AlgInUse = "Dmph" Or AlgInUse = "SSLCS" Then
LCS = c(m, n) ' Longest Common Subsequence
Else
LCS = CSng(Format((c(m, n) / (((Str1Len) + (Str2Len)) / 2)),
"#.##"))
End If
Else
LCS = 0
End If
Erase X
Erase Y
Erase c
Erase b
End Function