create table jobs (
job_id varchar2(100),
job_title varchar2(35),
min_salary number(6,0),
max_salary number(6,0)
)
Table created.
create table jobs_stage (
job_id varchar2(100),
job_title varchar2(35),
min_salary number(6,0),
max_salary number(6,0)
)
Table created.
create table employees as
select * from hr.employees
where job_id <> 'SA_MAN'
Table created.
begin
for i in 1 .. 3 loop
insert /*+ append */into employees
select * from employees;
commit;
end loop;
insert into jobs values ( 'AD_PRES','President', 20000, 40000 );
insert into jobs values ( 'IT_PROG', 'Programmer', 4000, 10000 );
insert into jobs values ( 'AC_MGR', 'Accounting Manager', 8200, 16000);
insert into jobs values ( 'FI_ACCOUNT', 'Accountant', 4200, 9000);
insert into jobs values ( 'SA_MAN', 'Sales Manager', 10000, 20000);
insert into jobs_stage values ( 'AD_PRES','President', 20000, 40000 );
insert into jobs_stage values ( 'IT_PROG', 'Programmer', 4000, 15000 );
insert into jobs_stage values ( 'AC_MGR', 'Accounting Manager', 8200, 16000);
insert into jobs_stage values ( 'SA_MAN', 'Sales Manager', 10000, 20000);
insert into jobs_stage values ( 'SA_REP', 'Sales Representative', 6000, 12000);
commit;
end;
Statement processed.
Get the differences with set operations
select * from (
select * from jobs
minus
select * from jobs_stage
) union all (
select * from jobs_stage
minus
select * from jobs
)
JOB_ID | JOB_TITLE | MIN_SALARY | MAX_SALARY | FI_ACCOUNT | Accountant | 4200 | 9000 | IT_PROG | Programmer | 4000 | 10000 | IT_PROG | Programmer | 4000 | 15000 | SA_REP | Sales Representative | 6000 | 12000 |
---|
Natural full join
with t1 as (
select 't1' as t1, t.* from jobs t
), t2 as (
select 't2' as t2, t.* from jobs_stage t
)
select *
from t1 t1
natural full join t2 t2
/* Filter to show only the differences */
where t1 is null or t2 is null
JOB_ID | JOB_TITLE | MIN_SALARY | MAX_SALARY | T1 | T2 | IT_PROG | Programmer | 4000 | 15000 | - | t2 | SA_REP | Sales Representative | 6000 | 12000 | - | t2 | IT_PROG | Programmer | 4000 | 10000 | t1 | - | FI_ACCOUNT | Accountant | 4200 | 9000 | t1 | - |
---|
JSON full join
with t1_json as (
select 't1' t, job_id, json_object (*) jdoc_t1 from jobs
), t2_json as (
select 't2' t, job_id, json_object (*) jdoc_t2 from jobs_stage
)
select
coalesce ( m.job_id, s.job_id ) job_id,
coalesce ( m.t, s.t ) src,
coalesce ( m.jdoc_t1, s.jdoc_t2 ) jdoc
from t1_json m
full join t2_json s
/* include PK in join for performance */
on m.job_id = s.job_id
and json_equal ( jdoc_t1, jdoc_t2 )
/* return the unmatched rows from each table */
where m.job_id is null or s.job_id is null
order by job_id
JOB_ID | SRC | JDOC | FI_ACCOUNT | t1 | {"JOB_ID":"FI_ACCOUNT","JOB_TITLE":"Accountant","MIN_SALARY":4200,"MAX_SALARY":9000} | IT_PROG | t1 | {"JOB_ID":"IT_PROG","JOB_TITLE":"Programmer","MIN_SALARY":4000,"MAX_SALARY":10000} | IT_PROG | t2 | {"JOB_ID":"IT_PROG","JOB_TITLE":"Programmer","MIN_SALARY":4000,"MAX_SALARY":15000} | SA_REP | t2 | {"JOB_ID":"SA_REP","JOB_TITLE":"Sales Representative","MIN_SALARY":6000,"MAX_SALARY":12000} |
---|
Group by row comparison
with rws as (
select t.*, 1 t1, 0 t2 from jobs t
union all
select t.*, 0 t1, 1 t2 from jobs_stage t
)
select job_id, job_title, min_salary, max_salary,
sum ( t1 ), sum ( t2 )
from rws
group by job_id, job_title, min_salary, max_salary
/* Filter to only show mismatches */
having sum ( t1 ) <> sum ( t2 )
order by job_id
JOB_ID | JOB_TITLE | MIN_SALARY | MAX_SALARY | SUM(T1) | SUM(T2) | FI_ACCOUNT | Accountant | 4200 | 9000 | 1 | 0 | IT_PROG | Programmer | 4000 | 10000 | 1 | 0 | IT_PROG | Programmer | 4000 | 15000 | 0 | 1 | SA_REP | Sales Representative | 6000 | 12000 | 0 | 1 |
---|
Row comparison table macro
create or replace function compare_tables (
t1 dbms_tf.table_t, t2 dbms_tf.table_t, comparison_columns dbms_tf.columns_t
) return clob sql_macro as
stmt clob;
column_list clob;
begin
for col in 1 .. comparison_columns.count loop
column_list := column_list || comparison_columns ( col ) || ',';
end loop;
column_list := rtrim ( column_list, ',' );
stmt := q'!
select ##COLUMNS##,
case
when sum ( t1 ) > sum ( t2 ) then 't1'
else 't2'
end as source_table
from (
select t1.*, 1 t1, 0 t2 from t1
union all
select t2.*, 0 t1, 1 t2 from t2
)
group by ##COLUMNS##
having sum ( t1 ) <> sum ( t2 ) !';
stmt := replace ( stmt, '##COLUMNS##', column_list );
return stmt;
end compare_tables;
Function created.
Using the SQL macro
select * from compare_tables (
jobs, jobs_stage,
columns ( job_id, job_title, min_salary, max_salary )
)
JOB_ID | JOB_TITLE | MIN_SALARY | MAX_SALARY | SOURCE_TABLE | FI_ACCOUNT | Accountant | 4200 | 9000 | t1 | SA_REP | Sales Representative | 6000 | 12000 | t2 | IT_PROG | Programmer | 4000 | 15000 | t2 | IT_PROG | Programmer | 4000 | 10000 | t1 |
---|
create or replace view original_query as
select j.*,
( select min ( min_salary ) from jobs ) min_min,
( select max ( max_salary ) from jobs ) max_max,
( select avg ( max_salary - min_salary ) from jobs ) avg_range,
( select count (*) from employees e where e.job_id = j.job_id ) emp#
from jobs j
View created.
create or replace view new_query as
select j.*,
min ( min_salary ) over () min_min,
max ( max_salary ) over () max_max,
avg ( max_salary - min_salary ) over () avg_range,
count (*) emp#
from jobs j
left join employees e
on j.job_id = e.job_id
group by j.job_id, j.job_title, j.min_salary, j.max_salary
View created.
Comparing queries
select * from compare_tables (
original_query, new_query,
columns ( job_id, min_min, max_max, avg_range, emp# )
)
JOB_ID | MIN_MIN | MAX_MAX | AVG_RANGE | EMP# | SOURCE_TABLE | SA_MAN | 4000 | 40000 | 9720 | 1 | t2 | SA_MAN | 4000 | 40000 | 9720 | 0 | t1 |
---|
Column comaprison
with t1_cols as (
select * from (
select job_id, job_title,
to_char ( min_salary ) min_salary,
to_char ( max_salary ) max_salary
from jobs
)
unpivot (
/* omit PK */
val for col in ( job_title, min_salary, max_salary )
)
), t2_cols as (
select * from (
select job_id, job_title,
to_char ( min_salary ) min_salary,
to_char ( max_salary ) max_salary
from jobs_stage
)
unpivot (
/* omit PK */
val for col in ( job_title, min_salary, max_salary )
)
)
select * from compare_tables (
t1_cols, t2_cols,
columns ( job_id, col, val )
)
order by job_id
JOB_ID | COL | VAL | SOURCE_TABLE | FI_ACCOUNT | JOB_TITLE | Accountant | t1 | FI_ACCOUNT | MAX_SALARY | 9000 | t1 | FI_ACCOUNT | MIN_SALARY | 4200 | t1 | IT_PROG | MAX_SALARY | 10000 | t1 | IT_PROG | MAX_SALARY | 15000 | t2 | SA_REP | JOB_TITLE | Sales Representative | t2 | SA_REP | MAX_SALARY | 12000 | t2 | SA_REP | MIN_SALARY | 6000 | t2 |
---|
Load data for performance tests
begin
for i in 1 .. 13 loop
insert into jobs select lpad ( i, 2, '0' ) || job_id, job_title, min_salary, max_salary + 100 from jobs;
insert into jobs_stage select lpad ( i, 2, '0' ) || job_id, job_title, min_salary, max_salary from jobs_stage;
commit;
end loop;
end;
Statement processed.
select count(*) from jobs
COUNT(*) | 40960 |
---|
select count(*) from jobs_stage
COUNT(*) | 40960 |
---|
alter table jobs add primary key ( job_id )
Table altered.
Performance comparison - tables
declare
iterations pls_integer := 5;
start_time pls_integer;
procedure format_time ( operation varchar2, run_time integer ) as
begin
dbms_output.put_line ( operation || to_char ( ( run_time / 100 ), 'FM990.00' ) || ' seconds ' );
end;
begin
for i in 1 .. iterations loop
start_time := dbms_utility.get_time();
for rws in (
select * from (
select * from jobs
minus
select * from jobs_stage
) union all (
select * from jobs_stage
minus
select * from jobs
)
) loop
null;
end loop;
format_time ( 'Set operations = ', dbms_utility.get_time() - start_time );
start_time := dbms_utility.get_time();
for rws in (
with rws as (
select t.*, 1 t1, 0 t2 from jobs t
union all
select t.*, 0 t1, 1 t2 from jobs_stage t
)
select job_id, job_title, min_salary, max_salary,
sum ( t1 ), sum ( t2 )
from rws
group by job_id, job_title, min_salary, max_salary
/* Filter to only show mismatches */
having sum ( t1 ) <> sum ( t2 )
) loop
null;
end loop;
format_time ( 'Group by = ', dbms_utility.get_time() - start_time );
start_time := dbms_utility.get_time();
for rws in (
with t1 as (
select 't1' as t1, t.* from jobs t
), t2 as (
select 't2' as t2, t.* from jobs_stage t
)
select *
from t1 t1
natural full join t2 t2
/* Filter to show only the differences */
where t1 is null or t2 is null
) loop
null;
end loop;
format_time ( 'Full join = ', dbms_utility.get_time() - start_time );
start_time := dbms_utility.get_time();
for rws in (
with t1_json as (
select 't1' t, job_id, json_object (*) jdoc_t1 from jobs
), t2_json as (
select 't2' t, job_id, json_object (*) jdoc_t2 from jobs_stage
)
select
coalesce ( m.job_id, s.job_id ) job_id,
coalesce ( m.t, s.t ) src,
coalesce ( m.jdoc_t1, s.jdoc_t2 ) jdoc
from t1_json m
full join t2_json s
/* include PK in join for performance */
on m.job_id = s.job_id
and json_equal ( jdoc_t1, jdoc_t2 )
/* return the unmatched rows from each table */
where m.job_id is null or s.job_id is null
) loop
null;
end loop;
format_time ( 'JSON join = ', dbms_utility.get_time() - start_time );
dbms_output.put_line ( '' );
dbms_output.put_line ( '******************' );
dbms_output.put_line ( '' );
end loop;
end;
Statement processed.
Set operations = 0.13 seconds
Group by = 0.08 seconds
Full join = 0.09 seconds
JSON join = 0.87 seconds
******************
Set operations = 0.12 seconds
Group by = 0.07 seconds
Full join = 0.08 seconds
JSON join = 0.63 seconds
******************
Set operations = 0.12 seconds
Group by = 0.07 seconds
Full join = 0.08 seconds
JSON join = 0.64 seconds
******************
Set operations = 0.11 seconds
Group by = 0.08 seconds
Full join = 0.08 seconds
JSON join = 0.63 seconds
******************
Set operations = 0.11 seconds
Group by = 0.08 seconds
Full join = 0.07 seconds
JSON join = 0.63 seconds
******************
Performance comparison - queries
declare
start_time pls_integer;
procedure format_time ( operation varchar2, run_time integer ) as
begin
dbms_output.put_line ( operation || to_char ( ( run_time / 100 ), 'FM990.00' ) || ' seconds ' );
end;
begin
start_time := dbms_utility.get_time();
for rws in (
select * from (
select * from original_query
minus
select * from new_query
) union all (
select * from new_query
minus
select * from original_query
)
) loop
null;
end loop;
format_time ( 'Set operations = ', dbms_utility.get_time() - start_time );
start_time := dbms_utility.get_time();
for rws in (
with rws as (
select t.*, 1 t1, 0 t2 from original_query t
union all
select t.*, 0 t1, 1 t2 from new_query t
)
select job_id, min_min, max_max, avg_range, emp#,
sum ( t1 ), sum ( t2 )
from rws
group by job_id, min_min, max_max, avg_range, emp#
/* Filter to only show mismatches */
having sum ( t1 ) <> sum ( t2 )
) loop
null;
end loop;
format_time ( 'Group by = ', dbms_utility.get_time() - start_time );
start_time := dbms_utility.get_time();
for rws in (
with t1 as (
select 't1' as t1, t.* from original_query t
), t2 as (
select 't2' as t2, t.* from new_query t
)
select *
from t1 t1
natural full join t2 t2
/* Filter to show only the differences */
where t1 is null or t2 is null
) loop
null;
end loop;
format_time ( 'Full join = ', dbms_utility.get_time() - start_time );
end;
Statement processed.
Set operations = 2.40 seconds
Group by = 1.23 seconds
Full join = 1.28 seconds
merge into jobs j
using jobs_stage s
on ( j.job_id = s.job_id )
when not matched then
insert values ( s.job_id, s.job_title, s.min_salary, s.max_salary )
when matched then
update
set j.job_title = s.job_title, j.min_salary = s.min_salary, j.max_salary = s.max_salary
Statement processed.
commit
Statement processed.
Find changes over time
with old_state as (
select * from jobs as of timestamp sysdate - interval '2' second
)
select * from compare_tables (
jobs, old_state,
columns ( job_id, job_title, min_salary, max_salary )
)
order by job_id
fetch first 10 rows only
JOB_ID | JOB_TITLE | MIN_SALARY | MAX_SALARY | SOURCE_TABLE | 01AC_MGR | Accounting Manager | 8200 | 16000 | t1 | 01AC_MGR | Accounting Manager | 8200 | 16100 | t2 | 01AD_PRES | President | 20000 | 40000 | t1 | 01AD_PRES | President | 20000 | 40100 | t2 | 01IT_PROG | Programmer | 4000 | 10100 | t2 | 01IT_PROG | Programmer | 4000 | 15000 | t1 | 01SA_MAN | Sales Manager | 10000 | 20000 | t1 | 01SA_MAN | Sales Manager | 10000 | 20100 | t2 | 01SA_REP | Sales Representative | 6000 | 12000 | t1 | 0201AC_MGR | Accounting Manager | 8200 | 16000 | t1 |
---|