Simple "Changed Rows" SSIS Conditional Split Expression Generator
When building a dimension load data flow for SSIS ETL, I want to detect changed rows using a Conditional Split Transformation.
To demonstrate, I built a generic package:
The Lookup outer joins to the destination table and returns the surrogate key. If this returns a NULL, the record is new. But what if the row is there and there's no good way to tell it's been modified? You have to compare each and every column in the source and destination rows.
I find it painful to type all that out, so I have created this script which works well if:
- The column names in the source match the column names in the destination.
- A consistent aliasing convention is used for the Destination columns (here, I have aliased all my destination columns by adding the prefix "DW_" to each column name).
I just pop it into SSMS, execute it, copy the results to the clipboard...
... and then paste them into the Changed Rows condition of my Conditional Split transformation:
Voila! With some modification, it could do even more.
Here's the script:
declare @SourceServer varchar(255)
declare @DestinationServer varchar(255)
declare @SourceDB varchar(255)
declare @DestinationDB varchar(255)
declare @SourceSchema varchar(255)
declare @DestinationSchema varchar(255)
declare @SourceTable varchar(255)
declare @DestinationTable varchar(255)
declare @Sql varchar(8000)
declare @Output varchar(8000)
declare @DestinationPrefix varchar(25)
set @SourceServer=''
set @DestinationServer=''
set @SourceDB = 'Stage'
set @DestinationDB = 'DataWarehouse'
set @SourceSchema = 'Stg'
set @DestinationSchema = 'DW'
set @SourceTable = 'Table1'
set @DestinationTable = 'Dimension1'
set @DestinationPrefix = 'DW_'
set @Sql = 'USE tempdb;
if exists(select *
from sys.objects
where name
like ''##tmpSourceFields%'')
drop table ##tmpSourceFields;
use ' + @SourceDB +
';select c.name
into ##tmpSourceFields
from sys.columns c
inner join sys.tables t on t.object_id
= c.object_id
inner join sys.schemas s on s.schema_id
= t.schema_id
where t.name = ''' + @SourceTable +
''' and s.name = ''' + @SourceSchema
+ ''';'
--print @Sql
Exec(@Sql);
set @Sql = '';
set @Sql = 'USE tempdb;
if exists(select *
from sys.objects
where name
like ''##tmpDestinationFields%'')
drop table ##tmpDestinationFields;
use ' + @DestinationDB +
';select c.name
into ##tmpDestinationFields
from sys.columns c
inner join sys.tables t on t.object_id
= c.object_id
inner join sys.schemas s on s.schema_id
= t.schema_id
where t.name = ''' + @DestinationTable
+
''' and s.name = ''' +
@DestinationSchema + ''';'
--print @Sql
Exec(@Sql);
set @Output = ''
select @Output = @Output +
'(' + s.name + '!=' + @DestinationPrefix + d.name + ') || '
from ##tmpSourceFields s
inner join ##tmpDestinationFields d on
d.name = s.name;
set @Output = @Output + 'False';
select @Output; |
:{> Andy
Technorati Tags: SSIS Expression Changed Rows T-SQL script