SQL
Parsing names is an age-old process of taking a full name (i.e. “Mr. John C. Smith, Jr.”) and extracting the first name, last name, middle name, etc.
The following SQL Server function handles 99% of the situations, including prefixes, suffixes and hyphenated names. It will get tripped up with two first names, but that is extremely difficult to trap for.
To call this function, use use the following syntax:
select dbo.PARSE_NAME_UDF(full_name,’F') as FirstName, dbo.PARSE_NAME_UDF(full_name,’L') as LastName from myTable
create function [dbo].[PARSE_NAME_UDF](@NameString varchar(100), @NameFormat varchar(20))
returns varchar(100) as
begin–PARSE_NAME_UDF decodes a NameString into its component parts and returns it in a requested format.
–@NameString is the raw value to be parsed.
–@NameFormat is a string that defines the output format. Each letter in the string represents
–a component of the name in the order that it is to be returned.
– [H] = Full honorific
– [h] = Abbreviated honorific
– [F] = First name
– [f] = First initial
– [M] = Middle name
– [m] = Middle initial
– [L] = Last name
– [l] = Last initial
– [S] = Full suffix
– [s] = Abbreviated suffix
– [.] = Period
– [,] = Comma
– [ ] = Space
–Test variables
– declare @NameString varchar(50)
– declare @NameFormat varchar(20)
– set @NameFormat = ‘F M L S’
– set @NameString = ‘Melvin Carter, Jr’Declare @Honorific varchar(20)
Declare @FirstName varchar(20)
Declare @MiddleName varchar(30)
Declare @LastName varchar(30)
Declare @Suffix varchar(20)
Declare @TempString varchar(100)
Declare @TempString2 varchar(100)
Declare @IgnorePeriod char(1)–Prepare the string
–Make sure each period is followed by a space character.
set @NameString = rtrim(ltrim(replace(@NameString, ‘.’, ‘. ‘)))–Remove disallowed characters
declare @PatternString varchar(50)
set @PatternString = ‘%[^a-z ,-]%’
while patindex(@PatternString, @NameString) > 0 set @NameString = stuff(@NameString, patindex(@PatternString, @NameString), 1, ‘ ‘)–Remove telephone ext
set @NameString = ltrim(rtrim(replace(‘ ‘ + @NameString + ‘ ‘, ‘ EXT ‘, ‘ ‘)))–Eliminate double-spaces.
while charindex(‘ ‘, @NameString) > 0 set @NameString = replace(@NameString, ‘ ‘, ‘ ‘)–Eliminate periods
while charindex(‘.’, @NameString) > 0 set @NameString = replace(@NameString, ‘.’, ”)–Remove spaces around hyphenated names
set @NameString = replace(replace(@NameString, ‘- ‘, ‘-’), ‘ -’, ‘-’)–Remove commas before suffixes
set @NameString = replace(@NameString, ‘, Jr’, ‘ Jr’)
set @NameString = replace(@NameString, ‘, Sr’, ‘ Sr’)
set @NameString = replace(@NameString, ‘, II’, ‘ II’)
set @NameString = replace(@NameString, ‘, III’, ‘ III’)–Temporarily join multi-word surnames
set @NameString = ltrim(replace(‘ ‘ + @NameString, ‘ Del ‘, ‘ Del~’))
set @NameString = ltrim(replace(‘ ‘ + @NameString, ‘ Van ‘, ‘ Van~’))
set @NameString = ltrim(replace(‘ ‘ + @NameString, ‘ Von ‘, ‘ Von~’))
set @NameString = ltrim(replace(‘ ‘ + @NameString, ‘ Mc ‘, ‘ Mc~’))
set @NameString = ltrim(replace(‘ ‘ + @NameString, ‘ Mac ‘, ‘ Mac~’))
set @NameString = ltrim(replace(‘ ‘ + @NameString, ‘ La ‘, ‘ La~’)) –Must be checked before “De”, to handle “De La [Surname]“s.
set @NameString = ltrim(replace(‘ ‘ + @NameString, ‘ De ‘, ‘ De~’))–If the lastname is listed first, strip it off.
set @TempString = rtrim(left(@NameString, charindex(‘ ‘, @NameString)))
–Below logic now handled by joining multi-word surnames above.
–if @TempString in (‘VAN’, ‘VON’, ‘MC’, ‘Mac’, ‘DE’) set @TempString = rtrim(left(@NameString, charindex(‘ ‘, @NameString, len(@TempString)+2)))–Search for suffixes trailing the LastName
set @TempString2 = ltrim(right(@NameString, len(@NameString) – len(@TempString)))
set @TempString2 = rtrim(left(@TempString2, charindex(‘ ‘, @TempString2)))if right(@TempString2, 1) = ‘,’
begin
set @Suffix = left(@TempString2, len(@TempString2)-1)
set @LastName = left(@TempString, len(@TempString))
end
if right(@TempString, 1) = ‘,’ set @LastName = left(@TempString, len(@TempString)-1)
if len(@LastName) > 0 set @NameString = ltrim(right(@NameString, len(@NameString) – len(@TempString)))
if len(@Suffix) > 0 set @NameString = ltrim(right(@NameString, len(@NameString) – len(@TempString2)))–Get rid of any remaining commas
while charindex(‘,’, @NameString) > 0 set @NameString = replace(@NameString, ‘,’, ”)
–Get Honorific and strip it out of the string
set @TempString = rtrim(left(@NameString, charindex(‘ ‘, @NameString + ‘ ‘)))
if @TempString in (
‘Admiral’, ‘Adm’,
‘Captain’, ‘Cpt’, ‘Capt’,
‘Commander’, ‘Cmd’,
‘Corporal’, ‘Cpl’,
‘Doctor’, ‘Dr’,
‘Father’, ‘Fr’,
‘General’, ‘Gen’,
‘Governor’, ‘Gov’,
‘Honorable’, ‘Hon’,
‘Lieutenant’, ‘Lt’,
‘Madam’, ‘Mdm’,
‘Madame’, ‘Mme’,
‘Mademoiselle’, ‘Mlle’,
‘Major’, ‘Maj’,
‘Miss’, ‘Ms’,
‘Mr’,
‘Mrs’,
‘President’, ‘Pres’,
‘Private’, ‘Pvt’,
‘Professor’, ‘Prof’,
‘Rabbi’,
‘Reverend’, ‘Rev’,
‘Senior’, ‘Sr’,
‘Seniora’, ‘Sra’,
‘Seniorita’, ‘Srta’,
‘Sergeant’, ‘Sgt’,
‘Sir’,
‘Sister’) set @Honorific = @TempString
if len(@Honorific) > 0 set @NameString = ltrim(right(@NameString, len(@NameString) – len(@TempString)))
–Get Suffix and strip it out of the string
if @Suffix is null
begin
set @TempString = ltrim(right(@NameString, charindex(‘ ‘, Reverse(@NameString) + ‘ ‘)))
if @TempString in (
‘Attorney’, ‘Att’, ‘Atty’,
‘BA’,
‘BS’,
‘CPA’,
‘DDS’,
‘DVM’,
‘Esquire’, ‘Esq’,
‘II’,
‘III’,
‘IV’,
‘Junior’, ‘Jr’,
‘MBA’,
‘MD’,
‘OD’,
‘PHD’,
‘Senior’, ‘Sr’) set @Suffix = @TempString
if len(@Suffix) > 0 set @NameString = rtrim(left(@NameString, len(@NameString) – len(@TempString)))
endif @LastName is null
begin
–Get LastName and strip it out of the string
set @LastName = ltrim(right(@NameString, charindex(‘ ‘, Reverse(@NameString) + ‘ ‘)))
set @NameString = rtrim(left(@NameString, len(@NameString) – len(@LastName)))
–Below logic now handled by joining multi-word surnames above.
/* –Check to see if the last name has two parts
set @TempString = ltrim(right(@NameString, charindex(‘ ‘, Reverse(@NameString) + ‘ ‘)))
if @TempString in (‘VAN’, ‘VON’, ‘MC’, ‘Mac’, ‘DE’)
begin
set @LastName = @TempString + ‘ ‘ + @LastName
set @NameString = rtrim(left(@NameString, len(@NameString) – len(@TempString)))
end
*/
end
–Get FirstName and strip it out of the string
set @FirstName = rtrim(left(@NameString, charindex(‘ ‘, @NameString + ‘ ‘)))
set @NameString = ltrim(right(@NameString, len(@NameString) – len(@FirstName)))
–Anything remaining is MiddleName
set @MiddleName = @NameString
–Create the output string
set @TempString = ”
while len(@NameFormat) > 0
begin
if @IgnorePeriod = ‘F’ or left(@NameFormat, 1) <> ‘.’
begin
set @IgnorePeriod = ‘F’
set @TempString = @TempString +
case ascii(left(@NameFormat, 1))
when ’32′ then case right(@TempString, 1)
when ‘ ‘ then ”
else ‘ ‘
end
when ’44′ then case right(@TempString, 1)
when ‘ ‘ then ”
else ‘,’
end
when ’46′ then case right(@TempString, 1)
when ‘ ‘ then ”
else ‘.’
end
when ’70′ then isnull(@FirstName, ”)
when ’72′ then case @Honorific
when ‘Adm’ then ‘Admiral’
when ‘Capt’ then ‘Captain’
when ‘Cmd’ then ‘Commander’
when ‘Cpl’ then ‘Corporal’
when ‘Cpt’ then ‘Captain’
when ‘Dr’ then ‘Doctor’
when ‘Fr’ then ‘Father’
when ‘Gen’ then ‘General’
when ‘Gov’ then ‘Governor’
when ‘Hon’ then ‘Honorable’
when ‘Lt’ then ‘Lieutenant’
when ‘Maj’ then ‘Major’
when ‘Mdm’ then ‘Madam’
when ‘Mlle’ then ‘Mademoiselle’
when ‘Mme’ then ‘Madame’
when ‘Ms’ then ‘Miss’
when ‘Pres’ then ‘President’
when ‘Prof’ then ‘Professor’
when ‘Pvt’ then ‘Private’
when ‘Sr’ then ‘Senior’
when ‘Sra’ then ‘Seniora’
when ‘Srta’ then ‘Seniorita’
when ‘Rev’ then ‘Reverend’
when ‘Sgt’ then ‘Sergeant’
else isnull(@Honorific, ”)
end
when ’76′ then isnull(@LastName, ”)
when ’77′ then isnull(@MiddleName, ”)
when ’83′ then case @Suffix
when ‘Att’ then ‘Attorney’
when ‘Atty’ then ‘Attorney’
when ‘Esq’ then ‘Esquire’
when ‘Jr’ then ‘Junior’
when ‘Sr’ then ‘Senior’
else isnull(@Suffix, ”)
end
when ’102′ then isnull(left(@FirstName, 1), ”)
when ’104′ then case @Honorific
when ‘Admiral’ then ‘Adm’
when ‘Captain’ then ‘Capt’
when ‘Commander’ then ‘Cmd’
when ‘Corporal’ then ‘Cpl’
when ‘Doctor’ then ‘Dr’
when ‘Father’ then ‘Fr’
when ‘General’ then ‘Gen’
when ‘Governor’ then ‘Gov’
when ‘Honorable’ then ‘Hon’
when ‘Lieutenant’ then ‘Lt’
when ‘Madam’ then ‘Mdm’
when ‘Madame’ then ‘Mme’
when ‘Mademoiselle’ then ‘Mlle’
when ‘Major’ then ‘Maj’
when ‘Miss’ then ‘Ms’
when ‘President’ then ‘Pres’
when ‘Private’ then ‘Pvt’
when ‘Professor’ then ‘Prof’
when ‘Reverend’ then ‘Rev’
when ‘Senior’ then ‘Sr’
when ‘Seniora’ then ‘Sra’
when ‘Seniorita’ then ‘Srta’
when ‘Sergeant’ then ‘Sgt’
else isnull(@Honorific, ”)
end
when ’108′ then isnull(left(@LastName, 1), ”)
when ’109′ then isnull(left(@MiddleName, 1), ”)
when ’115′ then case @Suffix
when ‘Attorney’ then ‘Atty’
when ‘Esquire’ then ‘Esq’
when ‘Junior’ then ‘Jr’
when ‘Senior’ then ‘Sr’
else isnull(@Suffix, ”)
end
else ”
end
–The following honorifics and suffixes have no further abbreviations, and so should not be followed by a period:
if ((ascii(left(@NameFormat, 1)) = 72 and @Honorific in (‘Rabbi’, ‘Sister’))
or (ascii(left(@NameFormat, 1)) = 115 and @Suffix in (‘BA’, ‘BS’, ‘DDS’, ‘DVM’, ‘II’, ‘III’, ‘IV’, ‘V’, ‘MBA’, ‘MD’, ‘PHD’)))
set @IgnorePeriod = ‘T’
end
set @NameFormat = right(@NameFormat, len(@NameFormat) – 1)
end
–select replace(@TempString, ‘~’, ‘ ‘)
Return replace(@TempString, ‘~’, ‘ ‘)
end
Cursors are frowned upon, but I have found times when performance of update query sets are too slow. I believe this happens when the recovery model is full.
The T-SQL below creates a cursor on a table and then steps through each record and updates several fields. This ended up taking about 4 minutes on an 18k record database, which is small, but would take 20 minutes just to update 200 records in the same table with an update query.
ALTER procedure [dbo].[sp_updateDemoData] as
declare @mallId nchar(6)
declare @RowNum int
declare @showme intset @showme = 0
declare malllist cursor for
select mallcode from mall_exp_trend_2010 where pop_20 > 0
OPEN malllist
FETCH NEXT FROM malllist
INTO @mallId
set @RowNum = 0
WHILE @@FETCH_STATUS = 0
BEGIN
set @RowNum = @RowNum + 1
set @showme = @showme + 1Update e set
e.NUMBER_OF_HOUSEHOLDS_10 = ns.HH_10, e.NUMBER_OF_HOUSEHOLDS_20 = ns.HH_20, e.NUMBER_OF_HOUSEHOLDS_5 = ns.HH_5,
e.AVREGE_HOUSEHOLD_INC_10 = ns.INC_10, e.AVREGE_HOUSEHOLD_INC_20 = ns.INC_20, e.AVREGE_HOUSEHOLD_INC_5 = ns.INC_5,
e.MEDIAN_AGE_10 = ns.AGE_10, e.MEDIAN_AGE_20 = ns.AGE_20, e.MEDIAN_AGE_5 = ns.AGE_5,
e.POPULATION_10 = ns.POP_10, e.POPULATION_20 = ns.POP_20, POPULATION_5 = ns.POP_5
from mallsysNetT1.dbo._mall e, mall_exp_trend_2010 ns
where ns.MALLCODE = e.MALLCODE and e.MALLCODE = @mallId;if @showme = 100
BEGIN
print ‘processed records: ‘ + cast(@RowNum as char(6)) — + ‘ ‘ + @mallId
set @showme = 0
ENDFETCH NEXT FROM malllist
INTO @mallId
END
CLOSE malllist
DEALLOCATE malllist
It would be nice for SQL Server to copy rows like Excel. It is a pain to include all the fields. Below is dynamic SQL to copy the entire row based on the auto id number:
[code]
declare @sql varchar(8000)
DECLARE @TableName varchar(100)
SELECT @tableName = 'tblMyTable' --put urTableName Here
SELECT @SQL = COALESCE(@sql+',','')+ COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tableName AND COLUMNPROPERTY(OBJECT_ID(@tableName),COLUMN_NAME, 'IsIdentity') = 0
EXEC( 'INSERT INTO '+@tableName+' SELECT '+@SQL+' FROM '+@TableName +' WHERE auto_id = theIDNum') -- put auto id fieldname and id num
[/code]
Floats field types have their perils. It is the field type that often defaults on an import from Excel, where the field type has to be guessed.
If you try to change the field type to VarChar you get something like “4.00309e+006″
You can first convert from float to bigint and then to VarChar, or create a new VarChar field and use the following SQL. This is for SQL Server.
update tblMyTable set VarCharField = cast(CONVERT(bigint, floatField) as varchar(10))



