Split into quartiles with NTILE
with rws as (
select dbms_random.value ( 0, 100 ) x
from dual connect by level <= 1000
), grps as (
select x,
ntile ( 4 ) over ( order by x ) grp
from rws
)
select grp, count (*) row#,
round ( min ( x ), 1 ) lower_bound,
round ( max ( x ), 1 ) upper_bound
from grps
group by grp order by grp
GRP | ROW# | LOWER_BOUND | UPPER_BOUND | 1 | 250 | .3 | 24.2 | 2 | 250 | 24.3 | 50.1 | 3 | 250 | 50.1 | 75.8 | 4 | 250 | 75.9 | 99.9 |
---|
Split into quartiles with NTILE - normal distribution
with rws as (
select ( 10 * dbms_random.normal ) + 50 x
from dual connect by level <= 1000
), grps as (
select x,
ntile ( 4 ) over ( order by x ) grp
from rws
)
select grp, count(*),
round ( min ( x ), 1 ),
round ( max ( x ), 1 )
from grps
group by grp order by grp
GRP | COUNT(*) | ROUND(MIN(X),1) | ROUND(MAX(X),1) | 1 | 250 | 23.2 | 43.4 | 2 | 250 | 43.5 | 50.2 | 3 | 250 | 50.2 | 56.7 | 4 | 250 | 56.8 | 85 |
---|
Split into four bins with WIDTH_BUCKET
with rws as (
select dbms_random.value ( 0, 100 ) x
from dual connect by level <= 1000
), grps as (
select x,
-- intervals [0, 25), [25, 50), [50, 75), [75, 100)
width_bucket ( x, 0, 100, 4 ) bucket
from rws
)
select bucket, count(*),
round ( min ( x ), 1 ),
round ( max ( x ), 1 )
from grps
group by bucket order by bucket
BUCKET | COUNT(*) | ROUND(MIN(X),1) | ROUND(MAX(X),1) | 1 | 249 | 0 | 24.9 | 2 | 251 | 25 | 50 | 3 | 234 | 50.5 | 74.8 | 4 | 266 | 75 | 100 |
---|
Split into four bins with WIDTH_BUCKET - normal distribution
with rws as (
select ( 10 * dbms_random.normal ) + 50 x
from dual connect by level <= 1000
), grps as (
select x,
width_bucket ( x, 0, 100, 4 ) bucket
from rws
)
select bucket, count(*),
round ( min ( x ), 1 ),
round ( max ( x ), 1 )
from grps
group by bucket order by bucket
BUCKET | COUNT(*) | ROUND(MIN(X),1) | ROUND(MAX(X),1) | 1 | 3 | 9.8 | 21.5 | 2 | 490 | 25.6 | 50 | 3 | 502 | 50 | 74.4 | 4 | 5 | 75.3 | 86 |
---|
Split into four bins with WIDTH_BUCKET with dynamic bounds
with rws as (
select ( 10 * dbms_random.normal ) + 50 x
from dual connect by level <= 1000
), grps as (
select x, width_bucket (
x,
min ( x ) over (),
max ( x ) over () + 1,
4
) bucket
from rws
)
select bucket, count(*),
round ( min ( x ), 1 ),
round ( max ( x ), 1 ),
round ( max ( x ) - min ( x ), 1 ) bucket_range
from grps
group by bucket order by bucket
BUCKET | COUNT(*) | ROUND(MIN(X),1) | ROUND(MAX(X),1) | BUCKET_RANGE | 1 | 79 | 21 | 36.5 | 15.6 | 2 | 504 | 36.8 | 52.2 | 15.4 | 3 | 397 | 52.2 | 67.7 | 15.5 | 4 | 20 | 68.2 | 82.4 | 14.3 |
---|
Split into four bins with WIDTH_BUCKET - character data
with rws as (
select initcap ( dbms_random.string ( 'u', 10 ) ) x
from dual connect by level <= 1000
), ranks as (
select x, substr ( x, 1, 1 ) letter,
dense_rank () over ( order by substr ( x, 1, 1 ) ) dr
from rws
), grps as (
select x, letter, width_bucket (
dr, 1, 27, 4
) grp
from ranks
)
select min ( letter ), max ( letter ),
count ( distinct letter ) value#,
count (*) row#,
min ( x ), max ( x )
from grps
group by grp order by grp
MIN(LETTER) | MAX(LETTER) | VALUE# | ROW# | MIN(X) | MAX(X) | A | G | 7 | 264 | Abbkaisqob | Gzthagisaj | H | M | 6 | 249 | Haotkacnux | Mzqrivbrqz | N | T | 7 | 276 | Nauugxwzqt | Tzjwgcpztq | U | Z | 6 | 211 | Udnwtrnsgn | Zxxyobitlq |
---|
Split into four bins with WIDTH_BUCKET - complete strings
with rws as (
select dense_rank () over ( order by job_id ) dr, e.*
from hr.employees e
), grps as (
select r.*,
width_bucket ( dr, 1, max ( dr ) over () + 1, 4 ) bucket
from rws r
)
select bucket,
count ( distinct job_id ) job_count,
count(*) row_count,
listagg ( distinct job_id, ',' )
within group ( order by job_id ) jobs
from grps
group by bucket
BUCKET | JOB_COUNT | ROW_COUNT | JOBS | 1 | 5 | 6 | AC_ACCOUNT,AC_MGR,AD_ASST,AD_PRES,AD_VP | 2 | 5 | 13 | FI_ACCOUNT,FI_MGR,HR_REP,IT_PROG,MK_MAN | 3 | 5 | 13 | MK_REP,PR_REP,PU_CLERK,PU_MAN,SA_MAN | 4 | 4 | 75 | SA_REP,SH_CLERK,ST_CLERK,ST_MAN |
---|
Split into four bins with WIDTH_BUCKET - missing letters
with rws as (
select first_name x
from hr.employees
), ranks as (
select x, substr ( x, 1, 1 ) letter,
dense_rank () over ( order by substr ( x, 1, 1 ) ) dr
from rws
), grps as (
select x, letter, width_bucket (
dr, 1, 27, 4
) grp
from ranks
)
select min ( letter ), max ( letter ),
count ( distinct letter ) value#,
count (*) row#,
min ( x ), max ( x )
from grps
group by grp order by grp
MIN(LETTER) | MAX(LETTER) | VALUE# | ROW# | MIN(X) | MAX(X) | A | H | 7 | 34 | Adam | Hermann | I | N | 6 | 41 | Irene | Neena | O | W | 7 | 32 | Oliver | Winston |
---|
Split letters into four groups with WIDTH_BUCKET
with rws as (
select first_name x
from hr.employees
), ranks as (
select x, substr ( x, 1, 1 ) letter,
ascii ( substr ( x, 1, 1 ) ) - 64 dr
from rws
), grps as (
select x, letter, width_bucket (
dr, 1, 27, 4
) grp
from ranks
)
select min ( letter ), max ( letter ),
count ( distinct letter ) value#,
count (*) row#,
min ( x ), max ( x )
from grps
group by grp order by grp
MIN(LETTER) | MAX(LETTER) | VALUE# | ROW# | MIN(X) | MAX(X) | A | G | 6 | 31 | Adam | Guy | H | M | 6 | 40 | Harrison | Mozhe | N | T | 6 | 31 | Nancy | Trenna | V | W | 2 | 5 | Valli | Winston |
---|
Split into groups with five rows each
with rws as (
select dbms_random.value ( 0, 100 ) x, level id, 5 n
from dual
connect by level <= dbms_random.value ( 100, 1000 )
), grps as (
select x, n,
row_number () over ( order by id ) rn
from rws
)
select ceil ( rn / n ), count(*),
round ( min ( x ) ), round ( max ( x ) )
from grps
group by ceil ( rn / n ) order by ceil ( rn / n )
CEIL(RN/N) | COUNT(*) | ROUND(MIN(X)) | ROUND(MAX(X)) | 1 | 5 | 13 | 75 | 2 | 5 | 29 | 88 | 3 | 5 | 5 | 85 | 4 | 5 | 25 | 84 | 5 | 5 | 20 | 93 | 6 | 5 | 7 | 87 | 7 | 5 | 4 | 91 | 8 | 5 | 0 | 58 | 9 | 5 | 21 | 93 | 10 | 5 | 34 | 100 | 11 | 5 | 4 | 96 | 12 | 5 | 1 | 92 | 13 | 5 | 16 | 62 | 14 | 5 | 1 | 69 | 15 | 5 | 9 | 86 | 16 | 5 | 18 | 81 | 17 | 5 | 32 | 86 | 18 | 5 | 30 | 83 | 19 | 5 | 47 | 90 | 20 | 5 | 9 | 82 | 21 | 4 | 5 | 92 |
---|
Split rows into fixed intervals
with rws as (
select dbms_random.value ( 0, 100 ) x,
5 interval_size
from dual connect by level <= 1000
), grps as (
select floor ( x / interval_size ) * interval_size x,
interval_size
from rws
)
select x, x + interval_size,
count(*)
from grps
group by x, interval_size
order by x
X | X+INTERVAL_SIZE | COUNT(*) | 0 | 5 | 60 | 5 | 10 | 56 | 10 | 15 | 49 | 15 | 20 | 42 | 20 | 25 | 58 | 25 | 30 | 47 | 30 | 35 | 64 | 35 | 40 | 48 | 40 | 45 | 44 | 45 | 50 | 50 | 50 | 55 | 42 | 55 | 60 | 58 | 60 | 65 | 49 | 65 | 70 | 59 | 70 | 75 | 48 | 75 | 80 | 50 | 80 | 85 | 49 | 85 | 90 | 34 | 90 | 95 | 48 | 95 | 100 | 45 |
---|
alter session set nls_date_format = ' DD-MON HH24:MI '
Statement processed.
alter session set nls_timestamp_format = ' DD-MON HH24:MI '
Statement processed.
Split into five minute intervals
with rws as (
select timestamp'2021-09-01 00:00:00' +
numtodsinterval ( dbms_random.value ( 0, 120 ), 'minute' ) x,
5 interval_mins
from dual connect by level <= 1000
), grps as (
select x, interval_mins, trunc ( x ) dt,
floor (
-- convert to minutes past midnight
( extract ( hour from x ) + 1 ) * extract ( minute from x )
/ interval_mins
) * interval_mins mins
from rws
)
select dt + mins/1440 st, dt + ( mins + interval_mins ) / 1440 en,
count (*)
from grps
group by dt + mins/1440, dt + ( mins + interval_mins ) / 1440 order by dt + mins/1440
ST | EN | COUNT(*) | 01-SEP 00:00 | 01-SEP 00:05 | 71 | 01-SEP 00:05 | 01-SEP 00:10 | 67 | 01-SEP 00:10 | 01-SEP 00:15 | 55 | 01-SEP 00:15 | 01-SEP 00:20 | 48 | 01-SEP 00:20 | 01-SEP 00:25 | 65 | 01-SEP 00:25 | 01-SEP 00:30 | 58 | 01-SEP 00:30 | 01-SEP 00:35 | 79 | 01-SEP 00:35 | 01-SEP 00:40 | 51 | 01-SEP 00:40 | 01-SEP 00:45 | 64 | 01-SEP 00:45 | 01-SEP 00:50 | 67 | 01-SEP 00:50 | 01-SEP 00:55 | 63 | 01-SEP 00:55 | 01-SEP 01:00 | 60 | 01-SEP 01:00 | 01-SEP 01:05 | 24 | 01-SEP 01:05 | 01-SEP 01:10 | 20 | 01-SEP 01:10 | 01-SEP 01:15 | 28 | 01-SEP 01:15 | 01-SEP 01:20 | 17 | 01-SEP 01:20 | 01-SEP 01:25 | 30 | 01-SEP 01:25 | 01-SEP 01:30 | 14 | 01-SEP 01:30 | 01-SEP 01:35 | 22 | 01-SEP 01:35 | 01-SEP 01:40 | 15 | 01-SEP 01:40 | 01-SEP 01:45 | 29 | 01-SEP 01:45 | 01-SEP 01:50 | 21 | 01-SEP 01:50 | 01-SEP 01:55 | 17 | 01-SEP 01:55 | 01-SEP 02:00 | 15 |
---|
Split into five minute intervals - sparse data
with rws as (
select timestamp'2021-09-01 00:00:00' +
numtodsinterval ( dbms_random.value ( 0, 120 ), 'minute' ) x,
5 interval_mins
from dual connect by level <= 10
), grps as (
select x, interval_mins, trunc ( x ) dt,
floor (
-- convert to minutes past midnight
( extract ( hour from x ) + 1 ) * extract ( minute from x )
/ interval_mins
) * interval_mins mins
from rws
)
select dt + mins/1440 st, dt + ( mins + interval_mins ) / 1440 en,
count (*)
from grps
group by dt + mins/1440, dt + ( mins + interval_mins ) / 1440 order by dt + mins/1440
ST | EN | COUNT(*) | 01-SEP 00:10 | 01-SEP 00:15 | 2 | 01-SEP 00:15 | 01-SEP 00:20 | 1 | 01-SEP 00:35 | 01-SEP 00:40 | 2 | 01-SEP 00:40 | 01-SEP 00:45 | 2 | 01-SEP 00:45 | 01-SEP 00:50 | 1 | 01-SEP 00:50 | 01-SEP 00:55 | 1 | 01-SEP 00:55 | 01-SEP 01:00 | 1 |
---|
Split into five minute intervals - sparse data
with intervals as (
select 5 interval_size,
timestamp'2021-09-01 00:00:00' lower_bound,
120 total_minutes
from dual
), grps as (
select lower_bound + numtodsinterval ( ( level - 1 ) * interval_size, 'minute' ) mn_x,
lower_bound + numtodsinterval ( level * interval_size, 'minute' ) mx_x,
interval_size
from intervals
connect by level <= (
-- unknown data size; create a range for all possible values
select max ( ceil ( total_minutes / interval_size ) )
from intervals
)
), rws as (
select lower_bound +
numtodsinterval ( dbms_random.value ( 0, 120 ), 'minute' ) x
from intervals connect by level <= 25
)
select mn_x, mx_x,
count ( x ) rws_per_group
from grps
left join rws
on x >= mn_x
and x < mx_x
group by mn_x, mx_x order by mn_x
MN_X | MX_X | RWS_PER_GROUP | 01-SEP 00:00 | 01-SEP 00:05 | 1 | 01-SEP 00:05 | 01-SEP 00:10 | 1 | 01-SEP 00:10 | 01-SEP 00:15 | 2 | 01-SEP 00:15 | 01-SEP 00:20 | 1 | 01-SEP 00:20 | 01-SEP 00:25 | 0 | 01-SEP 00:25 | 01-SEP 00:30 | 0 | 01-SEP 00:30 | 01-SEP 00:35 | 0 | 01-SEP 00:35 | 01-SEP 00:40 | 3 | 01-SEP 00:40 | 01-SEP 00:45 | 1 | 01-SEP 00:45 | 01-SEP 00:50 | 1 | 01-SEP 00:50 | 01-SEP 00:55 | 3 | 01-SEP 00:55 | 01-SEP 01:00 | 0 | 01-SEP 01:00 | 01-SEP 01:05 | 0 | 01-SEP 01:05 | 01-SEP 01:10 | 0 | 01-SEP 01:10 | 01-SEP 01:15 | 0 | 01-SEP 01:15 | 01-SEP 01:20 | 1 | 01-SEP 01:20 | 01-SEP 01:25 | 1 | 01-SEP 01:25 | 01-SEP 01:30 | 3 | 01-SEP 01:30 | 01-SEP 01:35 | 0 | 01-SEP 01:35 | 01-SEP 01:40 | 0 | 01-SEP 01:40 | 01-SEP 01:45 | 1 | 01-SEP 01:45 | 01-SEP 01:50 | 0 | 01-SEP 01:50 | 01-SEP 01:55 | 1 | 01-SEP 01:55 | 01-SEP 02:00 | 2 |
---|
Split into ranges of five - sparse data
with intervals as (
select 5 interval_size, 0 lower_bound, 100 upper_bound
from dual
), grps as (
select ( level - 1 ) * interval_size mn_x,
level * interval_size mx_x,
interval_size
from intervals
connect by level <= (
-- unknown data size; create a range for all possible values
select max ( ceil ( upper_bound / interval_size ) ) from intervals
)
), rws as (
select dbms_random.value ( lower_bound, upper_bound ) x
from intervals connect by level <= 25
)
select mn_x, mx_x,
count ( x ) rws_per_group
from grps
left join rws
on x >= mn_x
and x < mx_x
group by mn_x, mx_x order by mn_x
MN_X | MX_X | RWS_PER_GROUP | 0 | 5 | 2 | 5 | 10 | 2 | 10 | 15 | 0 | 15 | 20 | 0 | 20 | 25 | 0 | 25 | 30 | 1 | 30 | 35 | 1 | 35 | 40 | 5 | 40 | 45 | 0 | 45 | 50 | 1 | 50 | 55 | 2 | 55 | 60 | 1 | 60 | 65 | 1 | 65 | 70 | 0 | 70 | 75 | 0 | 75 | 80 | 3 | 80 | 85 | 2 | 85 | 90 | 4 | 90 | 95 | 1 | 95 | 100 | 1 |
---|