You can have it no problem,
if you could just send me the Metaphome Algoritm in return, thanks:
'**************************************
' Name: SoundEx - Daitch-Mokotoff algorithm, 6 character result
' Description:
' Encodes an alphabetic name to a six character Daitch-Mokotoff code
' following the Daitch-Mokotoff (D-M) rules available at the sites
' listed in the source code. The D-M algorithm resolves some
' deficiencies that occur in the older Miracode/Soundex system (also
' known as the "Russell"/NARA system - used by the US Census Bureau).
' The benefits include: 1) Six meaningful letter sounds (versus four
' so that Peters is different from Peterson). 2) The initial letter
' is also sound encoded. 3) More sound variations (10 basic codes
' versus seven and double code sounds). 4) Improves sound matching
' for Jewish, Slavic, and Germanic names.
'
' By: Greg Julius, Copyright 2000,
Gr*********@cyconsult.com
' The author would appreciate getting bug reports/fixes and
' any improvements made to this code.
'
' Permission to use and modify is given, please give the author
' credit and pass along the modifications.
'
' Usage: strSoundExResult = SoundEx(strStringToEncode)
' Inputs: An aphabetic string, usually representing a person/place
name
' Returns: Six digit D-M code string
'
' Requires: Code runs in Visual Basic Module
'
' Side Effects: None known.
'
' Searching on the internet finds these sites. Variously they explain
' some history on the D-M Sound Encoding, How D-M coding works, show
the
' D-M sound table, and provide some examples.
'
http://www.everton.com/oe3-10/soundex.htm
'
http://www.jewishgen.org/infofiles/soundex.txt
'
http://www.avotaynu.com/soundex.html
'
http://www.gcis.net/cjhs/aguideto.htm
'
' The following web-based D-M calculators to test SoundEx results,
' Some errors in the samples were found on two of the above sites.
'
http://jgsr.net/database/DM6.cgi (only D-M soundex)
'
http://www.jewishgen.org/jos (both D-M and NARA/Russell)
'
' Reference:
' Steuart, Bradley W. The Soundex Daitch-Mokotoff Reference Guide. 2 v.
' Precision Indexing, 1994. Provides Soundex codes to over 125,000
surnames.
Option Explicit
Private Type DM_Structure
DM_String As String
DM_Matchlen As Integer
DM_Start As String
DM_Vowel As String
DM_Other As String
End Type
Private DM_Table As Collection
Private DM_TableLoaded As Boolean ' false until initialized.
'
Public Function SOUNDEX(strToEncode As String) As String
' Usage: SoundEx = SoundEx(strToEncode)
' Purpose: Return six character D-M encoding of input name
' Inputs:
' strToEncode - An aphabetic string, usually representing a
person/place name
' Returns (Function):
' SoundEx - Six digit D-M code string
Dim intEncodeStrLen As Integer
Dim intDMArray() As Integer
Dim strToEncodelen As Integer
Dim i As Integer
Dim strEncodedString As String
Dim strLastCode As String
Dim DM_Map As DM_Structure
Call LoadDMTable ' Load the DM_Table - only loaded once
' Clean the incoming name. Upper case, nothing but letters
strToEncode = RemoveNotChars(Trim(UCase(strToEncode)),
"ABCDEFGHIJKLMNOPQRSTUVWXYZ")
intEncodeStrLen = Len(strToEncode)
strEncodedString = ""
strLastCode = ""
'Potentially search the whole string for meaningful sounds. Stop
after 6 are found
For i = 1 To intEncodeStrLen
If Len(strEncodedString) >= 6 Then
SOUNDEX = Left(strEncodedString, 6)
Exit Function
End If
'Lookup in the DM_Table
Call FindDMMatch(i, strToEncode, DM_Map)
If DM_Map.DM_Matchlen = -1 Then
'Should not happen if table is complete and the input is clean
MsgBox "No Match found via DM lookup", vbOKOnly, "SoundEx
Error"
SOUNDEX = "000000"
Exit Function
End If
'Depending upon where the found sound is, encode from the DM_Map
If i = 1 Then
'Start of string, use the start value if valid
Call AddToEncodedString(strEncodedString, strLastCode,
DM_Map.DM_Start)
ElseIf DM_Map.DM_Matchlen = 2 And DM_Map.DM_Other = "-1" _
And InStr("AEIOUJY", Mid(strToEncode, i +
DM_Map.DM_Matchlen, 1)) <0 Then
'A vowel pair preceeding another vowel, use the vowel value if
valid
Call AddToEncodedString(strEncodedString, strLastCode,
DM_Map.DM_Vowel)
ElseIf DM_Map.DM_String = "H" _
And InStr("AEIOUJY", Mid(strToEncode, i +
DM_Map.DM_Matchlen, 1)) <0 Then
'An H preceeding another vowel, use the vowel value if valid
Call AddToEncodedString(strEncodedString, strLastCode,
DM_Map.DM_Vowel)
Else
'Use all other case value
Call AddToEncodedString(strEncodedString, strLastCode,
DM_Map.DM_Other)
End If
i = i + DM_Map.DM_Matchlen - 1 ' adjust indexed based upon
matched string length
Next i
SOUNDEX = Left(strEncodedString & "000000", 6) ' Ensure the string
is at least 6 long
End Function
Private Sub AddToEncodedString(strToAddTo As String, strLastCode As
String, strToAdd As String)
' Usage: Called by SoundEx Function
' Purpose: Append sound to encoded string if rules permit
' Inputs:
' strToAdd - Encoded sound value from DM_Map
' Returns (Modified Parameters):
' strToAddTo - Passed string containing sounds encoded so far
' strLastCode - Passed string containing last sound passed to this
routine
If strToAdd = strLastCode Then ' Drop duplicate sounds
Exit Sub
End If
strLastCode = strToAdd
If strToAdd = "-1" Then ' Value from table means ignore sound
Exit Sub
End If
strToAddTo = strToAddTo & strToAdd ' Append new sound
End Sub
Private Sub LoadDMTable()
' Usage: Called by SoundEx Function
' Purpose: Generate DM_Table values
' Inputs: None - values generated by routine
' Returns (Module level):
' DM_Table - Collection of items. The key of which is the
sound to encode
' and the item data is the DM_Map structure
values
' DM_TableLoaded - Boolean value to flag if the DM_Table has been
loaded.
If DM_TableLoaded = True Then
Exit Sub ' Already loaded
End If
Set DM_Table = New Collection
'Load each of the D-M sounds and their rules to the DM_Table
LoadDMElements ("AI,AJ,AY;0;1;-1")
LoadDMElements ("AU;0;7;-1")
LoadDMElements ("A;0;-1;-1")
LoadDMElements ("B;7;7;7")
LoadDMElements ("CHS;5;54;54")
LoadDMElements ("CH;5;5;5")
LoadDMElements ("CK;5;5;5")
LoadDMElements ("CZ,CS,CSZ,CZS;4;4;4")
LoadDMElements ("C;4;4;4")
LoadDMElements ("DRZ,DRS;4;4;4")
LoadDMElements ("DS,DSH,DSZ;4;4;4")
LoadDMElements ("DZ,DZH,DZS;4;4;4")
LoadDMElements ("D,DT;3;3;3")
LoadDMElements ("EI,EJ,EY;0;1;-1")
LoadDMElements ("EU;1;1;-1")
LoadDMElements ("E;0;-1;-1")
LoadDMElements ("FB;7;7;7")
LoadDMElements ("F;7;7;7")
LoadDMElements ("G;5;5;5")
LoadDMElements ("H;5;5;-1")
LoadDMElements ("IA,IE,IO,IU;1;-1;-1")
LoadDMElements ("I;0;-1;-1")
LoadDMElements ("J;1;-1;-1")
LoadDMElements ("KS;5;54;54")
LoadDMElements ("KH;5;5;5")
LoadDMElements ("K;5;5;5")
LoadDMElements ("L;8;8;8")
LoadDMElements ("MN;-1;66;66")
LoadDMElements ("M;6;6;6")
LoadDMElements ("NM;-1;66;66") ' not a duplicate! look carefully NM
vs MN
LoadDMElements ("N;6;6;6")
LoadDMElements ("OI,OJ,OY;0;1;-1")
LoadDMElements ("O;0;-1;-1")
LoadDMElements ("P,PF,PH;7;7;7")
LoadDMElements ("Q;5;5;5")
LoadDMElements ("RZ,RS;4;4;4")
LoadDMElements ("R;9;9;9")
LoadDMElements ("SCHTSCH,SCHTSH,SCHTCH;2;4;4")
LoadDMElements ("SCH;4;4;4")
LoadDMElements ("SHTCH,SHCH,SHTSH;2;4;4")
LoadDMElements ("SHT,SCHT,SCHD;2;43;43")
LoadDMElements ("SH;4;4;4")
LoadDMElements ("STCH,STSCH,SC;2;4;4")
LoadDMElements ("STRZ,STRS,STSH;2;4;4")
LoadDMElements ("ST;2;43;43")
LoadDMElements ("SZCZ,SZCS;2;4;4")
LoadDMElements ("SZT,SHD,SZD,SD;2;43;43")
LoadDMElements ("SZ;4;4;4")
LoadDMElements ("S;4;4;4")
LoadDMElements ("TCH,TTCH,TTSCH;4;4;4")
LoadDMElements ("TH;3;3;3")
LoadDMElements ("TRZ,TRS;4;4;4")
LoadDMElements ("TSCH,TSH;4;4;4")
LoadDMElements ("TS,TTS,TTSZ,TC;4;4;4")
LoadDMElements ("TZ,TTZ,TZS,TSZ;4;4;4")
LoadDMElements ("T;3;3;3")
LoadDMElements ("UI,UJ,UY;0;1;-1")
LoadDMElements ("U,UE;0;-1;-1")
LoadDMElements ("V;7;7;7")
LoadDMElements ("W;7;7;7")
LoadDMElements ("X;5;54;54")
LoadDMElements ("Y;1;-1;-1")
LoadDMElements ("ZDZ,ZDZH,ZHDZH;2;4;4")
LoadDMElements ("ZD,ZHD;2;43;43")
LoadDMElements ("ZH,ZS,ZSCH,ZSH;4;4;4")
LoadDMElements ("Z;4;4;4")
DM_TableLoaded = True ' Flag to not load again
End Sub
Private Sub LoadDMElements(strLoadString As String)
' Usage: Called by LoadDMTable Subroutine
' Purpose: Parse and Add DM_Table items for passed D-M sound
' Inputs:
' strLoadString
' Returns (Module level):
' DM_Table - Collection of items. The key of which is the
sound to encode
' and the item data is the DM_Map structure
values
Dim strItemPart As String
Dim strItemKey As String
Dim strKeyParts As String
Dim intPosition As Integer
'Separate the passed sound into its two parts
intPosition = InStr(1, strLoadString, ";")
If intPosition = 0 Then
MsgBox "invalid parameter to LoadDMElements: " & strLoadString
Exit Sub
End If
strKeyParts = Left(strLoadString, intPosition - 1) & ","
strItemPart = Mid(strLoadString, intPosition + 1)
'Add the Item Part (sound values) for each letter combination
Do While True
intPosition = InStr(1, strKeyParts, ",")
If intPosition = 0 Then
Exit Sub
End If
strItemKey = Left(strKeyParts, intPosition - 1)
strKeyParts = Mid(strKeyParts, intPosition + 1)
DM_Table.Add strItemPart, strItemKey
Loop ' Do While True
End Sub
Private Sub FindDMMatch(intStartMatchPos As Integer, strToTest As
String, dmLocalDM As DM_Structure)
' Usage: Called by SoundEx Function
' Purpose: Find largest matching DM_Table entry at the indicated
position
' of the input name string
' Populate the passed DM_Structure with data from the
DM_Table
' Inputs:
' DM_Table - Module level table of letter combinations and
sound values
' intStartMatchPos - Place in the passed string to start looking
for letter combinations
' strToTest - String passed containing the name to encode
' Returns (Modified Parameters):
' dmLocalDM - Structure to contain data from the DM_Table
Dim strMatchString As String
Dim i As Integer
Dim strItemData As String
Dim intPosition As Integer
For i = Min(7, Len(strToTest) - intStartMatchPos + 1) To 1 Step -1
strMatchString = Mid(strToTest, intStartMatchPos, i)
strItemData = ""
On Error Resume Next ' trap error that happens when item does
not match
strItemData = DM_Table.Item(strMatchString)
On Error GoTo 0 ' turn off error handling
If strItemData <"" Then ' Parse into DM Map structure
dmLocalDM.DM_String = strMatchString
dmLocalDM.DM_Matchlen = i
intPosition = InStr(1, strItemData, ";")
dmLocalDM.DM_Start = Left(strItemData, intPosition - 1)
strItemData = Mid(strItemData, intPosition + 1)
intPosition = InStr(1, strItemData, ";")
dmLocalDM.DM_Vowel = Left(strItemData, intPosition - 1)
strItemData = Mid(strItemData, intPosition + 1)
dmLocalDM.DM_Other = strItemData
Exit Sub ' DMMatched, so return
End If
Next i
dmLocalDM.DM_Matchlen = -1 ' Should not happen if table is well
formed!!
MsgBox "String not found in DM table." & vbCr & _
"String: '" & strToTest & "'" & vbCr & _
"at position: " & intStartMatchPos, vbOKOnly, "FindDMMatch
Error"
End Sub
Public Function Min(lNumber1 As Long, lNumber2 As Long) As Long
' Usage: Min = Min(lNumber1, lNumber2)
' Purpose: Return minimum of two input numbers
' Inputs:
' lNumber1, lNumber2 - Arbitrary numbers to compare
' Returns (Function):
' Min - Smaller of the two arbitrary numbers passed
If lNumber1 < lNumber2 Then
Min = lNumber1
Else
Min = lNumber2
End If
End Function
Public Function RemoveNotChars(strToCleanUp As String, strCharsToKeep
As String, Optional varStartPosition) As String
' Usage: RemoveNotChars = RemoveNotChars(strToCleanUp,
strCharsToKeep)
' Purpose: Remove characters in passed string that are not in the
keep string
' Inputs:
' strToCleanUp - String to clean up
' strCharsToKeep - String identifying all characters to keep
' Returns (Function):
' RemoveNotChars - String with all 'not keepable' characters
removed
Dim strBuildString As String
Dim strTestChar As String
Dim intStartPos As Integer
Dim i As Integer
RemoveNotChars = ""
' If no string to clean up, or keep string is empty, then an empty
string is returned
If Len(strToCleanUp) = 0 Or Len(strCharsToKeep) = 0 Then
Exit Function
End If
' Initialize return string in light of starting position
If Not IsMissing(varStartPosition) Then
If Not IsNumeric(varStartPosition) Or varStartPosition <= 0 Then
MsgBox "StartPosition must be numeric, greater than zero",
vbOKOnly, "RemoveChars Error"
Exit Function
End If
intStartPos = varStartPosition
strBuildString = Left(strToCleanUp, varStartPosition - 1)
Else
intStartPos = 1
strBuildString = ""
End If
For i = intStartPos To Len(strToCleanUp)
strTestChar = Mid(strToCleanUp, i, 1)
If InStr(strCharsToKeep, strTestChar) <0 Then
strBuildString = strBuildString & strTestChar ' add onto end
End If
Next i
RemoveNotChars = strBuildString
End Function
Public Function SOUNDEXARAB(Surname As String) As String
Dim Result As String, c As String * 1
Dim Location As Integer
Surname = UCase(Surname)
' remove Çá from the word
'************************************************* ***
If Left(Surname, 2) = "Çá" Then
Surname = Mid(Surname, 3)
End If
'************************************************* ***
' get the code for each character in the word
'************************************************* ***
Result = ""
For Location = 1 To Len(Surname)
Result = Result & Category(Mid(Surname, Location, 1))
Next Location
'************************************************* ***
'Remove the repeated character
'************************************************* ***
Location = 1
Do While Location < Len(Result)
If Mid(Result, Location, 1) = Mid(Result, Location + 1, 1)
Then
Result = Left(Result, Location) & Mid(Result, Location
+ 2)
Else
Location = Location + 1
End If
Loop
'************************************************* ***
'
'************************************************* ***
If Category(Left(Result, 1)) = Mid(Result, 2, 1) Then
Result = Left(Result, 1) & Mid(Result, 3)
End If
'************************************************* ***
'remove the unkown characeter
'************************************************* ****
For Location = 1 To Len(Result)
If Mid(Result, Location, 1) = "/" Then
Result = Left(Result, Location - 1) & Mid(Result,
Location + 1)
End If
Next
'************************************************* ****
'get the first 4 haracters
'************************************************* ****
Select Case Len(Result)
Case 4
SOUNDEXARAB = Result
Case Is < 4
SOUNDEXARAB = Result & String(4 - Len(Result), "0")
Case Is 4
SOUNDEXARAB = Left(Result, 4)
End Select
'************************************************* ****
End Function
Private Function Category(c) As String
Select Case True
Case c Like "[åÍ]"
Category = "1"
Case c Like "[ËÓÕ]"
Category = "2"
Case c Like "[ßÞ]"
Category = "3"
Case c Like "[ÐÒ]"
Category = "4"
Case c Like "[ÚÁ]"
Category = "5"
Case c Like "[ÖÙØ]"
Category = "6"
Case c Like "[áäÑ]"
Category = "7"
Case c Like "[ÛÎ]"
Category = "8"
Case c Like "[Ô]"
Category = "9"
Case c Like "[ÊÏÌ]"
Category = "A"
Case c Like "[í]"
Category = "B"
Case c Like "[ãÈ]"
Category = "C"
Case c Like "[Ý]"
Category = "D"
Case Else
Category = ""
End Select
End Function
Marco
Mike Gramelspacher schreef:
In article <11**********************@75g2000cwc.googlegroups. com>,
vo***********@gmail.com says...
So would I better use another soundex algo then?
Marco
It probably depends which algorithm works best for the names you need to
encode. I think I read that Soundex was patented around 1918 and was
used for the U.S. Census. The microfilms I saw were indexed by Soundex,
so obviously you need to use the same algorithm to encode the name for
which you are searching as was used to encode the microfilms. Soundex
is not the best for all names. Others have been developed over the
years. I also have code for the Metaphone Algorithm.
Regarding my question for obtaining the Deitch-Mokotoff code, you
nevered answered. Where can a person download or buy an implementation
of Deitch-Mokotoff?
Mike Gramelspacher