Hi,
I've proto-typed a solution to match customer records from varying sources. There may be up to 1million records in the customers table so I need to progress this into a set-based solution for efficiency (if that’s possible). Can any one clear the haze for me?
The select SQL below is just one of 5 queries that will run and to try and match the customer records. The idea is that when certain records are matched they will be inserted into the match table with a common id/BureauID that links them together. As new customer records arrive in the database they may match with an existing record that already has a BureauID so I need to examine the match records to extract the ID if it exists.
I had initially proto-typed a set-based query using ROW_NUMBER to assign a number to those records that satisfied the criteria* - with the idea of moving down the resultset and inserting into the match table on each break in the number, but the introduction of synonyms on Surname, Firstname and SecondName threw out the PARTITION BY clause….and now I'm stuck.
Any ideas greatly appreciated.
* Matching criteria
Surname (equal or synonym) AND
Firstname (equal or synonym) AND
SecondName (equal or synonym or Initial) AND
DateOfBirth (minimum 6 out of 8 - looking for transposed digits) AND
Gender (equal) AND
Streetname (equal to first 6 characters) OR
PostCode (equal)
SELECT TOP (100) PERCENT t.SubscriberID, t.CustomerID, t.Surname, t.FirstName, t.SecondName, t.Initial, t.SecondName_RestOf, t.DateOfBirth, t.GenderID,
t.PostCode, t.Street6, dbo.tblMatch.BureauID
FROM dbo.tblCustomer AS t LEFT OUTER JOIN
dbo.tblMatch ON t.CustomerID = dbo.tblMatch.CustomerID AND t.SubscriberID = dbo.tblMatch.SubscriberID
WHERE (t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename
WHERE (Name = @Firstname))) AND (t.SecondName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_1
WHERE (Name = @SecondName))) AND (dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND
(t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname AS NameX_Surname_4
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_5
WHERE (Name = @Firstname))) AND (t.SecondName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_1
WHERE (Name = @SecondName))) AND (dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND
(t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname AS NameX_Surname_3
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_4
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_RestOf = '') AND (dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname AS NameX_Surname_3
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_4
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_RestOf = '') AND (dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname AS NameX_Surname_2
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_3
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (t.SecondName_RestOf = '') AND (dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname AS NameX_Surname_2
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_3
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (t.SecondName_RestOf = '') AND (dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname AS NameX_Surname_1
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_2
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_RestOf = t.SecondName_RestOf) AND
(dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surname AS NameX_Surname_1
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Forename AS NameX_Forename_2
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_RestOf = t.SecondName_RestOf) AND
(dbo.fnCheckDOB(t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode)
CREATE TABLE [dbo].[tblCustomer](
[SubscriberID] [nvarchar](4) NOT NULL,
[CustomerID] [int] IDENTITY(1,1) NOT NULL,
[Surname] [varchar](35) NOT NULL,
[FirstName] [varchar](35) NOT NULL,
[SecondName] [varchar](35) NULL,
[SecondName_RestOf] [varchar](35) NULL,
[Initial] [char](1) NULL,
[GenderID] [char](1) NOT NULL,
[DateOfBirth] [int] NOT NULL,
[Employer] [nvarchar](50) NULL,
[Occupation] [nvarchar](50) NULL,
[Street6] [varchar](6) NULL,
[PostCode] [int] NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[tblMatch](
[BureauID] [bigint] NOT NULL,
[SubscriberID] [int] NOT NULL,
[CustomerID] [int] NOT NULL,
[Date] [datetime] NOT NULL,
[UserID] [nvarchar](10) NOT NULL,
) ON [PRIMARY]
CREATE TABLE [dbo].[NameX_Surname](
[Name] [varchar](50) NOT NULL,
[NameMatch] [varchar](50) NOT NULL,
[Score] [int] NOT NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[NameX_Forename](
[Name] [varchar](50) NULL,
[NameMatch] [varchar](50) NULL,
[Score] [int] NULL
) ON [PRIMARY]