create table fw_store (
  shopid      varchar2(10) primary key,
  containers  integer
)
/
begin
   insert into fw_store values ('AALBORG'  , 4);
   insert into fw_store values ('GLOSTRUP' , 4);
   insert into fw_store values ('HADERSLEV', 3);
   commit;
end;
/
create table fw_daybudget (
  shopid      varchar2(10) references fw_store (shopid),
  budgetdate  date,
  budgetnem   number
)
/
begin
   insert into fw_daybudget values ('AALBORG'  , date '2011-12-27', 150);
   insert into fw_daybudget values ('AALBORG'  , date '2011-12-28', 200);
   insert into fw_daybudget values ('AALBORG'  , date '2011-12-29', 300);
   insert into fw_daybudget values ('AALBORG'  , date '2011-12-30', 500);
   insert into fw_daybudget values ('AALBORG'  , date '2011-12-31', 400);
   insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-27', 150);
   insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-28', 200);
   insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-29', 300);
   insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-30', 500);
   insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-31', 400);
   insert into fw_daybudget values ('HADERSLEV', date '2011-12-27', 100);
   insert into fw_daybudget values ('HADERSLEV', date '2011-12-28', 150);
   insert into fw_daybudget values ('HADERSLEV', date '2011-12-29', 200);
   insert into fw_daybudget values ('HADERSLEV', date '2011-12-30', 400);
   insert into fw_daybudget values ('HADERSLEV', date '2011-12-31', 300);
   commit;
end;
/
create table fw_hourbudget (
  hour        integer,
  percent     number
)
/
begin
   insert into fw_hourbudget values ( 9,  4);
   insert into fw_hourbudget values (10,  8);
   insert into fw_hourbudget values (11, 10);
   insert into fw_hourbudget values (12, 12);
   insert into fw_hourbudget values (13, 12);
   insert into fw_hourbudget values (14, 12);
   insert into fw_hourbudget values (15, 14);
   insert into fw_hourbudget values (16, 14);
   insert into fw_hourbudget values (17, 10);
   insert into fw_hourbudget values (18,  4);
   commit;
end;
/
create table fw_sales (
  shopid      varchar2(10) references fw_store (shopid),
  saleshour   date,
  salesnem    number
)
/
begin
   insert into fw_sales
   select shopid
        , day + numtodsinterval(hour,'hour') saleshour
        , salesnem
   from (
      select 'AALBORG' shopid, date '2011-12-27' day,  4 h9,  6 h10,  5 h11, 20 h12, 19 h13, 22 h14, 27 h15, 11 h16, 16 h17,  4 h18 from dual union all
      select 'AALBORG'  , date '2011-12-28',  7, 17, 18, 13, 27, 28, 20, 14, 10, 19 from dual union all
      select 'AALBORG'  , date '2011-12-29', 10, 14, 20, null, null, null, null, null, null, null from dual union all
      select 'GLOSTRUP' , date '2011-12-27',  1,  6,  6, 14, 17, 17, 13, 15,  7,  7 from dual union all
      select 'GLOSTRUP' , date '2011-12-28',  4, 14, 30, 35, 22, 21, 35, 34, 15, 25 from dual union all
      select 'GLOSTRUP' , date '2011-12-29',  6, 13, 50, null, null, null, null, null, null, null from dual union all
      select 'HADERSLEV', date '2011-12-27',  4,  7, 13, 15, 17, 13, 18, 19, 10,  3 from dual union all
      select 'HADERSLEV', date '2011-12-28',  8,  5, 14, 18, 20, 18, 15, 24, 12,  1 from dual union all
      select 'HADERSLEV', date '2011-12-29',  1, 19, 33, null, null, null, null, null, null, null from dual
   ) s1
   unpivot exclude nulls (
     salesnem for hour in (
       h9  as  9,
       h10 as 10,
       h11 as 11,
       h12 as 12,
       h13 as 13,
       h14 as 14,
       h15 as 15,
       h16 as 16,
       h17 as 17,
       h18 as 18
     )
   );
   commit;
end;
/
create table inventory (
   item  varchar2(10)   -- identification of the item
 , loc   varchar2(10)   -- identification of the location
 , qty   number         -- quantity present at that location
 , purch date           -- date that quantity was purchased
);
insert into inventory values('Ale' , '1-A-20', 18, DATE '2014-02-01');
insert into inventory values('Ale' , '1-A-31', 12, DATE '2014-02-05');
insert into inventory values('Ale' , '1-C-05', 18, DATE '2014-02-03');
insert into inventory values('Ale' , '2-A-02', 24, DATE '2014-02-02');
insert into inventory values('Ale' , '2-D-07',  9, DATE '2014-02-04');
insert into inventory values('Bock', '1-A-02', 18, DATE '2014-02-06');
insert into inventory values('Bock', '1-B-11',  4, DATE '2014-02-05');
insert into inventory values('Bock', '1-C-04', 12, DATE '2014-02-03');
insert into inventory values('Bock', '1-B-15',  2, DATE '2014-02-02');
insert into inventory values('Bock', '2-D-23',  1, DATE '2014-02-04');
commit;
create table orderline (
   ordno number         -- id-number of the order
 , item  varchar2(10)   -- identification of the item
 , qty   number         -- quantity ordered
);
insert into orderline values (42, 'Ale' , 24);
insert into orderline values (42, 'Bock', 18);
commit;
create table running_log ( 
  run_date       date not null,  
  time_in_s      int  not null, 
  distance_in_km int  not null 
)
;
-- A sample data set of running training logs.
begin  
  insert into running_log values (date'2020-04-01', 310, 1);  
  insert into running_log values (date'2020-04-02', 1600, 5);  
  insert into running_log values (date'2020-04-03', 3580, 11);  
  insert into running_log values (date'2020-04-06', 1550, 5);  
  insert into running_log values (date'2020-04-07', 300, 1);  
  insert into running_log values (date'2020-04-10', 280, 1);  
  insert into running_log values (date'2020-04-13', 1530, 5);  
  insert into running_log values (date'2020-04-14', 295, 1);  
  insert into running_log values (date'2020-04-15', 292, 1);  
  commit;  
end; 
/
create table emp as 
  select * from scott.emp;To start we'll build a prediction engine, estimating when shops will run out of fireworks to sell. This will use data from tables storing the shops, order received, and daily and hourly sales predictions:
select * from fw_store;
select * from fw_sales;
select * from fw_daybudget;
select * from fw_hourbudget;
By adding the OVER clause to SUM, you can calculate running totals. This has three clauses:
This returns the cumulative sales for each shop by date:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
select sales.shopid
     , sales.salesnem
     , sales.saleshour
     , sum(sales.salesnem) over (
         partition by sales.shopid
         order by sales.saleshour
         rows between unbounded preceding and current row
       ) salesnemacc
  from fw_sales sales
order by sales.shopid, sales.saleshour;
The clauses in this SUM work as follows:
partition by sales.shopidcalculates the running total for each shop separately
order by sales.saleshoursorts the rows by hour, so this gives the cumulative sales by date
rows between unbounded preceding and current rowsums the sales for this row and all previous rows
The budget figures are daily. To combine these with hourly sales, we need to convert them to hourly figures.
This query does this by cross joining the day and hour budget tables. Then multiplying the daily budget by the hourly percentage to give the expected sales for that hour:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
with shop as (
  select s.shopid
       , s.containers * 250 startnem
    from fw_store s
), budget as (
  select db.shopid
       , db.budgetdate + numtodsinterval(hb.hour,'hour') budgethour
       , db.budgetnem * hb.percent / 100 budgetnem
  from   fw_daybudget db
  cross  join fw_hourbudget hb
)
  select budget.shopid
       , shop.startnem
       , budget.budgethour hour
       , budget.budgetnem qtynem
  from   shop
  join   budget
  on     budget.shopid = shop.shopid;
We can now combine the hourly actual and predicted sales figures. We want to include actual figures up to the time of the last recorded sale. After this, the query should use the expected figures.
This query returns the real or projected sales figures. Then computes the running total of sales of this combined figure:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
with shop as (
  select s.shopid
       , s.containers * 250 startnem
  from   fw_store s
), budget as (
  select db.shopid
       , db.budgetdate + numtodsinterval(hb.hour,'hour') budgethour
       , db.budgetnem * hb.percent / 100 budgetnem
  from   fw_daybudget db
  cross  join fw_hourbudget hb
), nem as (
  select budget.shopid
       , shop.startnem
       , budget.budgethour hour
       , case
           when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
           then 'S'
           else 'B'
         end salesbudget
       , case
           when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
           then nvl(sales.salesnem,0)
           else budget.budgetnem
         end qtynem
  from   shop
  join   budget
  on     budget.shopid = shop.shopid
  left outer join fw_sales sales
  on     sales.shopid = budget.shopid
  and    sales.saleshour = budget.budgethour
)
select nem.shopid
     , nem.hour
     , nem.salesbudget
     , nem.qtynem
     , sum(nem.qtynem) over (
         partition by nem.shopid
         order by nem.hour
         rows between unbounded preceding and current row
       ) sumnem
     , greatest(nem.startnem - nvl(
         sum(nem.qtynem) over (
           partition by nem.shopid
           order by nem.hour
           rows between unbounded preceding and 1 preceding
         )
       ,0),0) stocknem
  from nem
 order by shopid, hour;
It also predicts the remaining stock at each time by subtracting the running total of hourly sales (real or predicted) from the starting stock. Notice that this has this window clause:
rows between unbounded preceding and 1 preceding
This means include all the previous rows and exclude the current row. This is because we want to return the expected stock at the start of the hour. Including the current row returns the expected level at the end of the hour.
The previous query returned every hour and the expected stock level. To find the time stock is expected to run out, find the last date where the remaining stock is greater than zero:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
with shop as (
  select s.shopid
       , s.containers * 250 startnem
    from fw_store s
), budget as (
  select db.shopid
       , db.budgetdate + numtodsinterval(hb.hour,'hour') budgethour
       , db.budgetnem * hb.percent / 100 budgetnem
    from fw_daybudget db
   cross join fw_hourbudget hb
), nem as (
  select budget.shopid
       , shop.startnem
       , budget.budgethour hour
       , case
           when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
           then 'S'
           else 'B'
         end salesbudget
       , case
           when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
           then nvl(sales.salesnem,0)
           else budget.budgetnem
         end qtynem
  from   shop
  join   budget
  on     budget.shopid = shop.shopid
  left outer join fw_sales sales
  on     sales.shopid = budget.shopid
  and    sales.saleshour = budget.budgethour
)
select shopid
     , max(hour)
     + numtodsinterval(
         max(stocknem) keep (dense_rank last order by hour)
         / max(qtynem) keep (dense_rank last order by hour)
         ,'hour'
       ) zerohour
  from (
  select nem.shopid
       , nem.hour
       , nem.salesbudget
       , nem.qtynem
       , sum(nem.qtynem) over (
           partition by nem.shopid
           order by nem.hour
           rows between unbounded preceding and current row
         ) sumnem
       , greatest(nem.startnem - nvl(
           sum(nem.qtynem) over (
             partition by nem.shopid
             order by nem.hour
             rows between unbounded preceding and 1 preceding
           )
         ,0),0) stocknem
    from nem
)
where stocknem > 0
group by shopid
order by shopid;
To estimate the exact time stock will run out, we can take the ratio of expected sales to remaining stock for the hour it's due to run out. The query does this with these functions:
     , max(hour)
     + numtodsinterval(
         max(stocknem) keep (dense_rank last order by hour)
         / max(qtynem) keep (dense_rank last order by hour)
         ,'hour'
       ) zerohour
This uses the KEEP clause to return the value for stock and quantity for the maximum HOUR. This is necessary because STOCKNEM & QTYNUM are not in the GROUP BY or in an aggregate function.
This technique of calculating the previous running total up to some limit has many other applications. These include:
We'll look at how you can use this to do stock picking algorithms next
Next we'll look at how to use SQL to find which stock location to get inventory from to fulfil orders. This will use these tables:
select * from orderline;
select * from inventory;
The algorithm needs to list all the locations stock pickers need to choose from. There must be enough locations for the sum of quantities in stock reaches the ordered quantity of that product.
The algorithm needs to keep selecting locations until the total quantity picked is greater than the ordered quantity. As with the previous problem, this is possible with a running SUM.
This query gets the cumulative selected quantity for the current row and the previous quantity:
select o.item
     , o.qty ord_qty
     , i.loc
     , i.purch
     , i.qty loc_qty
     , sum(i.qty) over (
         partition by i.item
         order by i.purch, i.loc
         rows between unbounded preceding and current row
       ) sum_qty
     , sum(i.qty) over (
         partition by i.item
         order by i.purch, i.loc
         rows between unbounded preceding and 1 preceding
       ) sum_prv_qty
from   orderline o
join   inventory i
on     i.item  = o.item
where  o.ordno = 42
order by o.item, i.purch, i.loc;
To filter this to those locations needed to fulfil the order, we need all the rows where the previous picked quantity is less than the ordered quantity.
Like the previous problem, the SQL needs to keep adding rows to the result until the running total for the previous row is greater than the ordered quantity.
This starts the SQL to find all the needed locations. You can implement different stock picking algorithms by changing the ORDER BY for the running total. Experiment by replacing /* TODO */ with different columns to see what effect this has on the locations chosen:
select s.*
     , least(s.loc_qty, s.ord_qty - s.sum_prv_qty) pick_qty
from (
 select o.item
      , o.qty ord_qty
      , i.loc
      , i.purch
      , i.qty loc_qty
      , nvl(sum(i.qty) over (
          partition by i.item
          order by /* TODO */
          rows between unbounded preceding and 1 preceding
        ),0) sum_prv_qty
  from  orderline o
  join  inventory i
  on    i.item  = o.item
  where o.ordno = 42
) s
where s.sum_prv_qty < s.ord_qty
order by s.item, s.purch, s.loc;Whichever algorithm you decide to use to select stock, it may choose a poor route for the stock picker to walk around the warehouse. This can lead to them walking back down an aisle, when it would be better to continue up to the top of the aisle. Then walk back down the next one.
One way to do this is to number the aisles needed. Then walk up the odds and back down the evens. This means locations in the same aisle need the same aisle number. We want to assign these numbers!
Oracle Database has three row numbering functions:
This compares the different ranking functions:
with inv_locations as ( 
  select s.loc
       , to_number(substr(s.loc,1,1)) warehouse
       , substr(s.loc,3,1) aisle
       , to_number(substr(s.loc,5,2)) position
  from   inventory s
)
  select loc
       , warehouse
       , aisle
       , row_number () over (
           order by warehouse, aisle
         ) rn
       , rank () over (
           order by warehouse, aisle
         ) rk
       , dense_rank () over (
           order by warehouse, aisle
         ) dr
  from   inv_locations;To improve the routing algorithm, we want to give locations row numbers. Locations on the same aisle must have the same rank. We can then alternate the route up and down the aisles by sorting:
To do this locations in the same aisle must have the same rank. And there must be no gaps in the ranks. Thus DENSE_RANK is the correct function to use here. This sorts by warehouse number, then aisle:
      , dense_rank() over (
           order by to_number(substr(s.loc,1,1))     -- warehouse
                  , substr(s.loc,3,1)                -- aisle
        ) aisle_no
To alternate ascending/descending sorts, take this rank modulus two. Return the position when it's one and negate the position when it's zero:
      , case
           when mod(s2.aisle_no,2) = 1 then s2.position
           else                            -s2.position
        end;
The complete query for this is:
select s2.warehouse, s2.aisle, s2.aisle_no, s2.position
     , s2.loc, s2.item, s2.pick_qty
from (
  select to_number(substr(s.loc,1,1)) warehouse
      , substr(s.loc,3,1) aisle
      , dense_rank() over (
           order by to_number(substr(s.loc,1,1))     -- warehouse
                  , substr(s.loc,3,1)                -- aisle
        ) aisle_no
      , to_number(substr(s.loc,5,2)) position
      , s.loc, s.item
      , least(s.loc_qty, s.ord_qty - s.sum_prv_qty) pick_qty
  from (
    select o.item, o.qty ord_qty, i.loc, i.purch, i.qty loc_qty
         , nvl(sum(i.qty) over (
             partition by i.item
             order by i.qty, i.loc
             rows between unbounded preceding and 1 preceding
           ),0) sum_prv_qty
    from   orderline o
    join   inventory i
    on     i.item  = o.item
    where  o.ordno = 42
  ) s
  where s.sum_prv_qty < s.ord_qty
) s2
order by s2.warehouse
      , s2.aisle_no
      , case
           when mod(s2.aisle_no,2) = 1 then s2.position
           else                            -s2.position
        end;For the third problem we're searching for consecutive dates in this running log:
select * from running_log;
The goal is to split these rows into groups of consecutive dates. For each group, return the start date and number of days in it.
There's a trick you can use to do this:
After applying this method, consecutive dates will have the same value:
with grps as (  
  select run_date
       , row_number () over ( order by run_date ) rn 
       , run_date - row_number () over ( order by run_date ) grp  
  from   running_log  
)  
  select *
  from   grps
  order  by run_date;
You can then summarise these data by grouping by the expression above and returning min, max, and counts to find start, end, and numbers of rows:
with grps as (  
  select run_date, row_number () over ( order by run_date ) rn ,  
         run_date - row_number () over ( order by run_date ) grp  
  from   running_log  
)  
  select min ( run_date ) first_run, count(*) runs  
  from   grps  
  group  by grp  
  order  by first_run;
This technique is referred to at the Tabibitosan method.
Added in Oracle Database 12c, the row pattern matching clause, MATCH_RECOGNIZE, offers another way to solve this problem.
To do this, you need to define pattern variables: criteria for rows to meet. Then create a regular expression using these variables.
To find consecutive dates, you need to look for rows where the current date equals the previous date plus one. This pattern variable does this:
consecutive as run_date = prev ( run_date ) + 1
To search for a series of consecutive rows, use this pattern:
pattern ( init consecutive* )
This matches one instance of INIT followed by any number of CONSECUTIVE.
But what's this INIT variable? It has no definition!
Undefined variables are "always true". This matches any row. This enables the pattern to match the first row in the data set. Without this CONSECUTIVE will always be false, because the previous RUN_DATE will always be null.
This query returns the first date and number of rows in each group:
select *   
from   running_log   
match_recognize (  
  order by run_date  
  measures   
    first ( run_date ) as start_date,  
    count (*) as days  
  pattern ( init consecutive* )  
  define   
    consecutive as run_date = prev ( run_date ) + 1
);
By default MATCH_RECOGNIZE returns one row per group. To make it easier to see what's going on, this query adds the clause:
all rows per match
This returns all the matched rows. In the MEASURES clause it also adds the CLASSIFIER function. This returns the name of the pattern variable this row matches:
select *   
from   running_log   
match_recognize (  
  order by run_date  
  measures   
    classifier () as var,
    first ( run_date ) as start_date,  
    count (*) as days  
  all rows per match
  pattern ( init consecutive* )  
  define   
    consecutive as run_date = prev ( run_date ) + 1
);To finish, we'll build an organization tree using the classic EMP table:
select empno
     , lpad(' ', (level-1)*2) || ename as ename
from   emp
start with mgr is null
connect by mgr = prior empno
order siblings by empno;
We want to augment this by adding the total number of reports each person has. I.e. count the number of nodes in the tree below this one.
You can do this with the following query:
select empno
     , lpad(' ', (level-1)*2) || ename as ename
     , (
         select count(*)
           from emp sub
          start with sub.mgr = emp.empno
          connect by sub.mgr = prior sub.empno
       ) subs
from   emp
start with mgr is null
connect by mgr = prior empno
order siblings by empno;
The subquery calculates the hierarchy for every row in the table. So it queries EMP once for each row in the table. This leads to a huge amount of extra work. You can see this by getting its execution plan:
select /*+ gather_plan_statistics */empno
     , lpad(' ', (level-1)*2) || ename as ename
     , (
         select count(*)
           from emp sub
          start with sub.mgr = emp.empno
          connect by sub.mgr = prior sub.empno
       ) subs
from   emp
start with mgr is null
connect by mgr = prior empno
order siblings by empno;
select * 
from   table(dbms_xplan.display_cursor(:LIVESQL_LAST_SQL_ID, format => 'ROWSTATS LAST'));
Note the FULL TABLE SCAN at line three happens 14 times. Reading a total of 196 rows. As you add more rows to EMP, this query will scale terribly.
You can overcome the performance issues for the previous query with this algorithm:
This uses the fact that when using depth-first search, all the children of a node will be at a lower depth. The next node that is the same depth or higher as the current is not a child.
You can implement this in MATCH_RECOGNIZE with these clauses:
      pattern ( strt higher* )
   define
      higher as higher.lvl > strt.lvl
Like searching for consecutive rows, this starts with an always true variable. Then looks for zero or more rows which are at a greater depth.
A key difference is this clause:
after match skip to next row
After matching the pattern for the first row, this instructs the database to repeat the process for the second row in the data set. Then the third, fourth, etc.
This contrasts with the default behaviour for MATCH_RECOGNIZE: after completing a pattern, continue the search from the last matched row. Because all rows are children of the root, the default would match every row. Then have nothing left to match! So it only returns the count of the child for KING!
All together this gives:
with hierarchy as (
  select lvl, empno, ename, rownum as rn
  from (
    select level as lvl, empno, ename
    from   emp
    start  with mgr is null
    connect by mgr = prior empno
    order siblings by empno
  )
)
select /*+ gather_plan_statistics */empno
     , lpad(' ', (lvl-1)*2) || ename as ename
     , subs
from hierarchy
  match_recognize (
   order by rn
   measures
      strt.rn as rn
    , strt.lvl as lvl
    , strt.empno as empno
    , strt.ename as ename
    , count(higher.lvl) as subs
   one row per match
   after match skip to next row
   pattern ( strt higher* )
   define
      higher as higher.lvl > strt.lvl
)
order by rn;
select * 
from   table(dbms_xplan.display_cursor(:LIVESQL_LAST_SQL_ID, format => 'ROWSTATS LAST'));
Notice that now the pattern only reads EMP once, for a total of 14 rows read. This scales significantly better than using a subquery!