create table fw_store (
shopid varchar2(10) primary key,
containers integer
)
/
begin
insert into fw_store values ('AALBORG' , 4);
insert into fw_store values ('GLOSTRUP' , 4);
insert into fw_store values ('HADERSLEV', 3);
commit;
end;
/
create table fw_daybudget (
shopid varchar2(10) references fw_store (shopid),
budgetdate date,
budgetnem number
)
/
begin
insert into fw_daybudget values ('AALBORG' , date '2011-12-27', 150);
insert into fw_daybudget values ('AALBORG' , date '2011-12-28', 200);
insert into fw_daybudget values ('AALBORG' , date '2011-12-29', 300);
insert into fw_daybudget values ('AALBORG' , date '2011-12-30', 500);
insert into fw_daybudget values ('AALBORG' , date '2011-12-31', 400);
insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-27', 150);
insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-28', 200);
insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-29', 300);
insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-30', 500);
insert into fw_daybudget values ('GLOSTRUP' , date '2011-12-31', 400);
insert into fw_daybudget values ('HADERSLEV', date '2011-12-27', 100);
insert into fw_daybudget values ('HADERSLEV', date '2011-12-28', 150);
insert into fw_daybudget values ('HADERSLEV', date '2011-12-29', 200);
insert into fw_daybudget values ('HADERSLEV', date '2011-12-30', 400);
insert into fw_daybudget values ('HADERSLEV', date '2011-12-31', 300);
commit;
end;
/
create table fw_hourbudget (
hour integer,
percent number
)
/
begin
insert into fw_hourbudget values ( 9, 4);
insert into fw_hourbudget values (10, 8);
insert into fw_hourbudget values (11, 10);
insert into fw_hourbudget values (12, 12);
insert into fw_hourbudget values (13, 12);
insert into fw_hourbudget values (14, 12);
insert into fw_hourbudget values (15, 14);
insert into fw_hourbudget values (16, 14);
insert into fw_hourbudget values (17, 10);
insert into fw_hourbudget values (18, 4);
commit;
end;
/
create table fw_sales (
shopid varchar2(10) references fw_store (shopid),
saleshour date,
salesnem number
)
/
begin
insert into fw_sales
select shopid
, day + numtodsinterval(hour,'hour') saleshour
, salesnem
from (
select 'AALBORG' shopid, date '2011-12-27' day, 4 h9, 6 h10, 5 h11, 20 h12, 19 h13, 22 h14, 27 h15, 11 h16, 16 h17, 4 h18 from dual union all
select 'AALBORG' , date '2011-12-28', 7, 17, 18, 13, 27, 28, 20, 14, 10, 19 from dual union all
select 'AALBORG' , date '2011-12-29', 10, 14, 20, null, null, null, null, null, null, null from dual union all
select 'GLOSTRUP' , date '2011-12-27', 1, 6, 6, 14, 17, 17, 13, 15, 7, 7 from dual union all
select 'GLOSTRUP' , date '2011-12-28', 4, 14, 30, 35, 22, 21, 35, 34, 15, 25 from dual union all
select 'GLOSTRUP' , date '2011-12-29', 6, 13, 50, null, null, null, null, null, null, null from dual union all
select 'HADERSLEV', date '2011-12-27', 4, 7, 13, 15, 17, 13, 18, 19, 10, 3 from dual union all
select 'HADERSLEV', date '2011-12-28', 8, 5, 14, 18, 20, 18, 15, 24, 12, 1 from dual union all
select 'HADERSLEV', date '2011-12-29', 1, 19, 33, null, null, null, null, null, null, null from dual
) s1
unpivot exclude nulls (
salesnem for hour in (
h9 as 9,
h10 as 10,
h11 as 11,
h12 as 12,
h13 as 13,
h14 as 14,
h15 as 15,
h16 as 16,
h17 as 17,
h18 as 18
)
);
commit;
end;
/
create table inventory (
item varchar2(10) -- identification of the item
, loc varchar2(10) -- identification of the location
, qty number -- quantity present at that location
, purch date -- date that quantity was purchased
);
insert into inventory values('Ale' , '1-A-20', 18, DATE '2014-02-01');
insert into inventory values('Ale' , '1-A-31', 12, DATE '2014-02-05');
insert into inventory values('Ale' , '1-C-05', 18, DATE '2014-02-03');
insert into inventory values('Ale' , '2-A-02', 24, DATE '2014-02-02');
insert into inventory values('Ale' , '2-D-07', 9, DATE '2014-02-04');
insert into inventory values('Bock', '1-A-02', 18, DATE '2014-02-06');
insert into inventory values('Bock', '1-B-11', 4, DATE '2014-02-05');
insert into inventory values('Bock', '1-C-04', 12, DATE '2014-02-03');
insert into inventory values('Bock', '1-B-15', 2, DATE '2014-02-02');
insert into inventory values('Bock', '2-D-23', 1, DATE '2014-02-04');
commit;
create table orderline (
ordno number -- id-number of the order
, item varchar2(10) -- identification of the item
, qty number -- quantity ordered
);
insert into orderline values (42, 'Ale' , 24);
insert into orderline values (42, 'Bock', 18);
commit;
create table running_log (
run_date date not null,
time_in_s int not null,
distance_in_km int not null
)
;
-- A sample data set of running training logs.
begin
insert into running_log values (date'2020-04-01', 310, 1);
insert into running_log values (date'2020-04-02', 1600, 5);
insert into running_log values (date'2020-04-03', 3580, 11);
insert into running_log values (date'2020-04-06', 1550, 5);
insert into running_log values (date'2020-04-07', 300, 1);
insert into running_log values (date'2020-04-10', 280, 1);
insert into running_log values (date'2020-04-13', 1530, 5);
insert into running_log values (date'2020-04-14', 295, 1);
insert into running_log values (date'2020-04-15', 292, 1);
commit;
end;
/
create table emp as
select * from scott.emp;
To start we'll build a prediction engine, estimating when shops will run out of fireworks to sell. This will use data from tables storing the shops, order received, and daily and hourly sales predictions:
select * from fw_store;
select * from fw_sales;
select * from fw_daybudget;
select * from fw_hourbudget;
By adding the OVER clause to SUM, you can calculate running totals. This has three clauses:
This returns the cumulative sales for each shop by date:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
select sales.shopid
, sales.salesnem
, sales.saleshour
, sum(sales.salesnem) over (
partition by sales.shopid
order by sales.saleshour
rows between unbounded preceding and current row
) salesnemacc
from fw_sales sales
order by sales.shopid, sales.saleshour;
The clauses in this SUM work as follows:
partition by sales.shopidcalculates the running total for each shop separately
order by sales.saleshoursorts the rows by hour, so this gives the cumulative sales by date
rows between unbounded preceding and current rowsums the sales for this row and all previous rows
The budget figures are daily. To combine these with hourly sales, we need to convert them to hourly figures.
This query does this by cross joining the day and hour budget tables. Then multiplying the daily budget by the hourly percentage to give the expected sales for that hour:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
with shop as (
select s.shopid
, s.containers * 250 startnem
from fw_store s
), budget as (
select db.shopid
, db.budgetdate + numtodsinterval(hb.hour,'hour') budgethour
, db.budgetnem * hb.percent / 100 budgetnem
from fw_daybudget db
cross join fw_hourbudget hb
)
select budget.shopid
, shop.startnem
, budget.budgethour hour
, budget.budgetnem qtynem
from shop
join budget
on budget.shopid = shop.shopid;
We can now combine the hourly actual and predicted sales figures. We want to include actual figures up to the time of the last recorded sale. After this, the query should use the expected figures.
This query returns the real or projected sales figures. Then computes the running total of sales of this combined figure:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
with shop as (
select s.shopid
, s.containers * 250 startnem
from fw_store s
), budget as (
select db.shopid
, db.budgetdate + numtodsinterval(hb.hour,'hour') budgethour
, db.budgetnem * hb.percent / 100 budgetnem
from fw_daybudget db
cross join fw_hourbudget hb
), nem as (
select budget.shopid
, shop.startnem
, budget.budgethour hour
, case
when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
then 'S'
else 'B'
end salesbudget
, case
when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
then nvl(sales.salesnem,0)
else budget.budgetnem
end qtynem
from shop
join budget
on budget.shopid = shop.shopid
left outer join fw_sales sales
on sales.shopid = budget.shopid
and sales.saleshour = budget.budgethour
)
select nem.shopid
, nem.hour
, nem.salesbudget
, nem.qtynem
, sum(nem.qtynem) over (
partition by nem.shopid
order by nem.hour
rows between unbounded preceding and current row
) sumnem
, greatest(nem.startnem - nvl(
sum(nem.qtynem) over (
partition by nem.shopid
order by nem.hour
rows between unbounded preceding and 1 preceding
)
,0),0) stocknem
from nem
order by shopid, hour;
It also predicts the remaining stock at each time by subtracting the running total of hourly sales (real or predicted) from the starting stock. Notice that this has this window clause:
rows between unbounded preceding and 1 preceding
This means include all the previous rows and exclude the current row. This is because we want to return the expected stock at the start of the hour. Including the current row returns the expected level at the end of the hour.
The previous query returned every hour and the expected stock level. To find the time stock is expected to run out, find the last date where the remaining stock is greater than zero:
alter session set nls_date_format='YYYY-MM-DD HH24:MI:SS';
with shop as (
select s.shopid
, s.containers * 250 startnem
from fw_store s
), budget as (
select db.shopid
, db.budgetdate + numtodsinterval(hb.hour,'hour') budgethour
, db.budgetnem * hb.percent / 100 budgetnem
from fw_daybudget db
cross join fw_hourbudget hb
), nem as (
select budget.shopid
, shop.startnem
, budget.budgethour hour
, case
when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
then 'S'
else 'B'
end salesbudget
, case
when budget.budgethour < to_date('2011-12-29 12:00:00','YYYY-MM-DD HH24:MI:SS')
then nvl(sales.salesnem,0)
else budget.budgetnem
end qtynem
from shop
join budget
on budget.shopid = shop.shopid
left outer join fw_sales sales
on sales.shopid = budget.shopid
and sales.saleshour = budget.budgethour
)
select shopid
, max(hour)
+ numtodsinterval(
max(stocknem) keep (dense_rank last order by hour)
/ max(qtynem) keep (dense_rank last order by hour)
,'hour'
) zerohour
from (
select nem.shopid
, nem.hour
, nem.salesbudget
, nem.qtynem
, sum(nem.qtynem) over (
partition by nem.shopid
order by nem.hour
rows between unbounded preceding and current row
) sumnem
, greatest(nem.startnem - nvl(
sum(nem.qtynem) over (
partition by nem.shopid
order by nem.hour
rows between unbounded preceding and 1 preceding
)
,0),0) stocknem
from nem
)
where stocknem > 0
group by shopid
order by shopid;
To estimate the exact time stock will run out, we can take the ratio of expected sales to remaining stock for the hour it's due to run out. The query does this with these functions:
, max(hour) + numtodsinterval( max(stocknem) keep (dense_rank last order by hour) / max(qtynem) keep (dense_rank last order by hour) ,'hour' ) zerohour
This uses the KEEP clause to return the value for stock and quantity for the maximum HOUR. This is necessary because STOCKNEM & QTYNUM are not in the GROUP BY or in an aggregate function.
This technique of calculating the previous running total up to some limit has many other applications. These include:
We'll look at how you can use this to do stock picking algorithms next
Next we'll look at how to use SQL to find which stock location to get inventory from to fulfil orders. This will use these tables:
select * from orderline;
select * from inventory;
The algorithm needs to list all the locations stock pickers need to choose from. There must be enough locations for the sum of quantities in stock reaches the ordered quantity of that product.
The algorithm needs to keep selecting locations until the total quantity picked is greater than the ordered quantity. As with the previous problem, this is possible with a running SUM.
This query gets the cumulative selected quantity for the current row and the previous quantity:
select o.item
, o.qty ord_qty
, i.loc
, i.purch
, i.qty loc_qty
, sum(i.qty) over (
partition by i.item
order by i.purch, i.loc
rows between unbounded preceding and current row
) sum_qty
, sum(i.qty) over (
partition by i.item
order by i.purch, i.loc
rows between unbounded preceding and 1 preceding
) sum_prv_qty
from orderline o
join inventory i
on i.item = o.item
where o.ordno = 42
order by o.item, i.purch, i.loc;
To filter this to those locations needed to fulfil the order, we need all the rows where the previous picked quantity is less than the ordered quantity.
Like the previous problem, the SQL needs to keep adding rows to the result until the running total for the previous row is greater than the ordered quantity.
This starts the SQL to find all the needed locations. You can implement different stock picking algorithms by changing the ORDER BY for the running total. Experiment by replacing /* TODO */ with different columns to see what effect this has on the locations chosen:
select s.*
, least(s.loc_qty, s.ord_qty - s.sum_prv_qty) pick_qty
from (
select o.item
, o.qty ord_qty
, i.loc
, i.purch
, i.qty loc_qty
, nvl(sum(i.qty) over (
partition by i.item
order by /* TODO */
rows between unbounded preceding and 1 preceding
),0) sum_prv_qty
from orderline o
join inventory i
on i.item = o.item
where o.ordno = 42
) s
where s.sum_prv_qty < s.ord_qty
order by s.item, s.purch, s.loc;
Whichever algorithm you decide to use to select stock, it may choose a poor route for the stock picker to walk around the warehouse. This can lead to them walking back down an aisle, when it would be better to continue up to the top of the aisle. Then walk back down the next one.
One way to do this is to number the aisles needed. Then walk up the odds and back down the evens. This means locations in the same aisle need the same aisle number. We want to assign these numbers!
Oracle Database has three row numbering functions:
This compares the different ranking functions:
with inv_locations as (
select s.loc
, to_number(substr(s.loc,1,1)) warehouse
, substr(s.loc,3,1) aisle
, to_number(substr(s.loc,5,2)) position
from inventory s
)
select loc
, warehouse
, aisle
, row_number () over (
order by warehouse, aisle
) rn
, rank () over (
order by warehouse, aisle
) rk
, dense_rank () over (
order by warehouse, aisle
) dr
from inv_locations;
To improve the routing algorithm, we want to give locations row numbers. Locations on the same aisle must have the same rank. We can then alternate the route up and down the aisles by sorting:
To do this locations in the same aisle must have the same rank. And there must be no gaps in the ranks. Thus DENSE_RANK is the correct function to use here. This sorts by warehouse number, then aisle:
, dense_rank() over ( order by to_number(substr(s.loc,1,1)) -- warehouse , substr(s.loc,3,1) -- aisle ) aisle_no
To alternate ascending/descending sorts, take this rank modulus two. Return the position when it's one and negate the position when it's zero:
, case when mod(s2.aisle_no,2) = 1 then s2.position else -s2.position end;
The complete query for this is:
select s2.warehouse, s2.aisle, s2.aisle_no, s2.position
, s2.loc, s2.item, s2.pick_qty
from (
select to_number(substr(s.loc,1,1)) warehouse
, substr(s.loc,3,1) aisle
, dense_rank() over (
order by to_number(substr(s.loc,1,1)) -- warehouse
, substr(s.loc,3,1) -- aisle
) aisle_no
, to_number(substr(s.loc,5,2)) position
, s.loc, s.item
, least(s.loc_qty, s.ord_qty - s.sum_prv_qty) pick_qty
from (
select o.item, o.qty ord_qty, i.loc, i.purch, i.qty loc_qty
, nvl(sum(i.qty) over (
partition by i.item
order by i.qty, i.loc
rows between unbounded preceding and 1 preceding
),0) sum_prv_qty
from orderline o
join inventory i
on i.item = o.item
where o.ordno = 42
) s
where s.sum_prv_qty < s.ord_qty
) s2
order by s2.warehouse
, s2.aisle_no
, case
when mod(s2.aisle_no,2) = 1 then s2.position
else -s2.position
end;
For the third problem we're searching for consecutive dates in this running log:
select * from running_log;
The goal is to split these rows into groups of consecutive dates. For each group, return the start date and number of days in it.
There's a trick you can use to do this:
After applying this method, consecutive dates will have the same value:
with grps as (
select run_date
, row_number () over ( order by run_date ) rn
, run_date - row_number () over ( order by run_date ) grp
from running_log
)
select *
from grps
order by run_date;
You can then summarise these data by grouping by the expression above and returning min, max, and counts to find start, end, and numbers of rows:
with grps as (
select run_date, row_number () over ( order by run_date ) rn ,
run_date - row_number () over ( order by run_date ) grp
from running_log
)
select min ( run_date ) first_run, count(*) runs
from grps
group by grp
order by first_run;
This technique is referred to at the Tabibitosan method.
Added in Oracle Database 12c, the row pattern matching clause, MATCH_RECOGNIZE, offers another way to solve this problem.
To do this, you need to define pattern variables: criteria for rows to meet. Then create a regular expression using these variables.
To find consecutive dates, you need to look for rows where the current date equals the previous date plus one. This pattern variable does this:
consecutive as run_date = prev ( run_date ) + 1
To search for a series of consecutive rows, use this pattern:
pattern ( init consecutive* )
This matches one instance of INIT followed by any number of CONSECUTIVE.
But what's this INIT variable? It has no definition!
Undefined variables are "always true". This matches any row. This enables the pattern to match the first row in the data set. Without this CONSECUTIVE will always be false, because the previous RUN_DATE will always be null.
This query returns the first date and number of rows in each group:
select *
from running_log
match_recognize (
order by run_date
measures
first ( run_date ) as start_date,
count (*) as days
pattern ( init consecutive* )
define
consecutive as run_date = prev ( run_date ) + 1
);
By default MATCH_RECOGNIZE returns one row per group. To make it easier to see what's going on, this query adds the clause:
all rows per match
This returns all the matched rows. In the MEASURES clause it also adds the CLASSIFIER function. This returns the name of the pattern variable this row matches:
select *
from running_log
match_recognize (
order by run_date
measures
classifier () as var,
first ( run_date ) as start_date,
count (*) as days
all rows per match
pattern ( init consecutive* )
define
consecutive as run_date = prev ( run_date ) + 1
);
To finish, we'll build an organization tree using the classic EMP table:
select empno
, lpad(' ', (level-1)*2) || ename as ename
from emp
start with mgr is null
connect by mgr = prior empno
order siblings by empno;
We want to augment this by adding the total number of reports each person has. I.e. count the number of nodes in the tree below this one.
You can do this with the following query:
select empno
, lpad(' ', (level-1)*2) || ename as ename
, (
select count(*)
from emp sub
start with sub.mgr = emp.empno
connect by sub.mgr = prior sub.empno
) subs
from emp
start with mgr is null
connect by mgr = prior empno
order siblings by empno;
The subquery calculates the hierarchy for every row in the table. So it queries EMP once for each row in the table. This leads to a huge amount of extra work. You can see this by getting its execution plan:
select /*+ gather_plan_statistics */empno
, lpad(' ', (level-1)*2) || ename as ename
, (
select count(*)
from emp sub
start with sub.mgr = emp.empno
connect by sub.mgr = prior sub.empno
) subs
from emp
start with mgr is null
connect by mgr = prior empno
order siblings by empno;
select *
from table(dbms_xplan.display_cursor(:LIVESQL_LAST_SQL_ID, format => 'ROWSTATS LAST'));
Note the FULL TABLE SCAN at line three happens 14 times. Reading a total of 196 rows. As you add more rows to EMP, this query will scale terribly.
You can overcome the performance issues for the previous query with this algorithm:
This uses the fact that when using depth-first search, all the children of a node will be at a lower depth. The next node that is the same depth or higher as the current is not a child.
You can implement this in MATCH_RECOGNIZE with these clauses:
pattern ( strt higher* ) define higher as higher.lvl > strt.lvl
Like searching for consecutive rows, this starts with an always true variable. Then looks for zero or more rows which are at a greater depth.
A key difference is this clause:
after match skip to next row
After matching the pattern for the first row, this instructs the database to repeat the process for the second row in the data set. Then the third, fourth, etc.
This contrasts with the default behaviour for MATCH_RECOGNIZE: after completing a pattern, continue the search from the last matched row. Because all rows are children of the root, the default would match every row. Then have nothing left to match! So it only returns the count of the child for KING!
All together this gives:
with hierarchy as (
select lvl, empno, ename, rownum as rn
from (
select level as lvl, empno, ename
from emp
start with mgr is null
connect by mgr = prior empno
order siblings by empno
)
)
select /*+ gather_plan_statistics */empno
, lpad(' ', (lvl-1)*2) || ename as ename
, subs
from hierarchy
match_recognize (
order by rn
measures
strt.rn as rn
, strt.lvl as lvl
, strt.empno as empno
, strt.ename as ename
, count(higher.lvl) as subs
one row per match
after match skip to next row
pattern ( strt higher* )
define
higher as higher.lvl > strt.lvl
)
order by rn;
select *
from table(dbms_xplan.display_cursor(:LIVESQL_LAST_SQL_ID, format => 'ROWSTATS LAST'));
Notice that now the pattern only reads EMP once, for a total of 14 rows read. This scales significantly better than using a subquery!