drop table ta_time_series
drop table ta_time_series cascade constraints;
create table ta_time_series
create table TA_TIME_SERIES
( a_id number not null
, a_status char(1) not null
);
alter table TA_TIME_SERIES
add constraint PK_TIME_SERIES primary key (a_id)
using index
;
Table created.
insert values into ta_time_series
insert into TA_TIME_SERIES (a_id, a_status)
values ( 1, 'A' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 2, 'A' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 3, 'B' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 4, 'C' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 5, 'C' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 6, 'B' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 7, 'B' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 8, 'A' );
insert into TA_TIME_SERIES (a_id, a_status)
values ( 9, 'A' );
commit;
select * from ta_time_series order by a_id;
just grouping by a_status not good enough
select t.a_status, min(a_id), max(a_id)
from ta_time_series t
group by t.a_status
order by 2,3
;
trying with analytic functions
with q1 as
(select t.a_id
, lead(t.a_id) over (order by t.a_id) as a_id_next
, lag(t.a_status) over (order by t.a_id) as a_status_before
, decode( t.a_status
, nvl( lag(t.a_status) over (order by t.a_id), t.a_status)
,0,1
) as a_status_before_change
, t.a_status
, decode( t.a_status
, lead(t.a_status) over (order by t.a_id)
,0,1
) as a_status_next_change
, lead(t.a_status) over (order by t.a_id) as a_status_next
from ta_time_series t
order by t.a_id
)
--
select q1.a_id, q1.a_id_next
, q1.a_status_before
, q1.a_status_before_change
, sum( q1.a_status_before_change ) over
(order by q1.a_id
rows between unbounded preceding
and current row ) as sum_status_before_change
, q1.a_status
, q1.a_status_next_change
, q1.a_status_next
from q1
order by q1.a_id
;
trying with analytic functions, now aggregating
with q1 as
(select t.a_id
, lead(t.a_id) over (order by t.a_id) as a_id_next
, lag(t.a_status) over (order by t.a_id) as a_status_before
, decode( t.a_status
, nvl( lag(t.a_status) over (order by t.a_id), t.a_status)
,0,1
) as a_status_before_change
, t.a_status
, decode( t.a_status
, lead(t.a_status) over (order by t.a_id)
,0,1
) as a_status_next_change
, lead(t.a_status) over (order by t.a_id) as a_status_next
from ta_time_series t
order by t.a_id
), q2 as
(select q1.a_id, q1.a_id_next
, q1.a_status_before
, q1.a_status_before_change
, sum( q1.a_status_before_change ) over
(order by q1.a_id
rows between unbounded preceding
and current row ) as sum_status_before_change
, q1.a_status
, q1.a_status_next_change
, q1.a_status_next
from q1
order by q1.a_id
)
--
select q2.a_status, q2.sum_status_before_change
, min(q2.a_id) as min_id, max(q2.a_id) as max_id, count(*) as cnt_all
from q2
group by q2.a_status, q2.sum_status_before_change
order by min(q2.a_id)
;
using subqueries
with q1 as
( select t.*
, (select nvl(max(t1.a_id),0)
from ta_time_series t1
where t1.a_status <> t.a_status
and t1.a_id < t.a_id
) as max_id_status_before
, (select nvl(min(t2.a_id),1e9)
from ta_time_series t2
where t2.a_status <> t.a_status
and t2.a_id > t.a_id
) as min_id_status_after
from ta_time_series t
order by t.a_id
), q2 as
( select q1.*
, (select min(t3.a_id)
from ta_time_series t3
where t3.a_status = q1.a_status
and t3.a_id > q1.max_id_status_before
) as start_id_status_current
, (select max(t4.a_id)
from ta_time_series t4
where t4.a_status = q1.a_status
and t4.a_id < q1.min_id_status_after
) as end_id_status_current
from q1
order by q1.a_id
)
--
select q2.*
from q2
order by q2.a_id
;
using subqueries, aggregated
with q1 as
( select t.*
, (select nvl(max(t1.a_id),0)
from ta_time_series t1
where t1.a_status <> t.a_status
and t1.a_id < t.a_id
) as max_id_status_before
, (select nvl(min(t2.a_id),1e9)
from ta_time_series t2
where t2.a_status <> t.a_status
and t2.a_id > t.a_id
) as min_id_status_after
from ta_time_series t
order by t.a_id
), q2 as
( select q1.*
, (select min(t3.a_id)
from ta_time_series t3
where t3.a_status = q1.a_status
and t3.a_id > q1.max_id_status_before
) as start_id_status_current
, (select max(t4.a_id)
from ta_time_series t4
where t4.a_status = q1.a_status
and t4.a_id < q1.min_id_status_after
) as end_id_status_current
from q1
order by q1.a_id
)
--
select q2.start_id_status_current, q2.end_id_status_current, q2.a_status, count(*)
from q2
group by q2.start_id_status_current, q2.end_id_status_current, q2.a_status
order by q2.start_id_status_current
;