Example output
Here's a robust implementation of the function in VB.NET:
Imports System.Text.RegularExpressions
Module AddressComparison
Function CompareAddresses(address1 As String, address2 As String, Optional additionalAbbreviations As Dictionary(Of String, String) = Nothing) As Double
If String.IsNullOrWhiteSpace(address1) OrElse String.IsNullOrWhiteSpace(address2) Then
Return 0.0
End If
' Default abbreviation dictionary
Dim abbreviations As New Dictionary(Of String, String)(StringComparer.OrdinalIgnoreCase) From {
{"ST", "Street"},
{"BLVD", "Boulevard"},
{"CT", "Court"},
{"AV", "Avenue"},
{"RD", "Road"},
{"DR", "Drive"}
}
' Merge additional abbreviations if provided
If additionalAbbreviations IsNot Nothing Then
For Each kvp In additionalAbbreviations
abbreviations(kvp.Key) = kvp.Value
Next
End If
' Normalize the addresses
Dim normalizedAddress1 As String = NormalizeAddress(address1, abbreviations)
Dim normalizedAddress2 As String = NormalizeAddress(address2, abbreviations)
' Split normalized addresses into words and compare them
Dim words1 As HashSet(Of String) = New HashSet(Of String)(normalizedAddress1.Split(" "c), StringComparer.OrdinalIgnoreCase)
Dim words2 As HashSet(Of String) = New HashSet(Of String)(normalizedAddress2.Split(" "c), StringComparer.OrdinalIgnoreCase)
Dim intersectionCount As Integer = words1.Intersect(words2).Count()
Dim unionCount As Integer = words1.Union(words2).Count()
' Calculate percentage match
Dim matchPercentage As Double = If(unionCount = 0, 0.0, (intersectionCount / unionCount) * 100.0)
Return Math.Round(matchPercentage, 2)
End Function
Private Function NormalizeAddress(address As String, abbreviations As Dictionary(Of String, String)) As String
' Remove special characters
Dim cleanedAddress As String = Regex.Replace(address, "[^a-zA-Z0-9\s]", "")
' Convert to lowercase
cleanedAddress = cleanedAddress.ToLower()
' Replace abbreviations
For Each kvp In abbreviations
cleanedAddress = Regex.Replace(cleanedAddress, $"\b{Regex.Escape(kvp.Key.ToLower())}\b", kvp.Value.ToLower(), RegexOptions.IgnoreCase)
Next
' Trim extra spaces
cleanedAddress = Regex.Replace(cleanedAddress, "\s+", " ").Trim()
Return cleanedAddress
End Function
End Module
Explanation:
1. Normalization:
Special characters are removed using a regular expression.
The string is converted to lowercase to ensure case-insensitive comparison.
Common abbreviations are replaced with their full forms using the abbreviations dictionary.
2. Custom Abbreviations:
An optional parameter allows users to provide additional abbreviations. These are merged with the default abbreviations.
3. Word Comparison:
Both addresses are split into words, and order is ignored by using HashSet. The intersection and union of words determine the matching percentage.
4. Edge Cases:
Handles empty or whitespace-only addresses gracefully.
Supports additional abbreviations without overwriting defaults.
5. Percentage Match:
Uses the formula (intersection / union) * 100 to calculate the matching percentage.
Usage Example:
Sub Main()
Dim address1 As String = "123 Main St."
Dim address2 As String = "Main Street 123"
Dim customAbbreviations As New Dictionary(Of String, String) From {
{"LN", "Lane"}
}
Dim matchPercentage As Double = CompareAddresses(address1, address2, customAbbreviations)
Console.WriteLine($"Match Percentage: {matchPercentage}%")
End Sub
Output:
Match Percentage: 100.0%