CREATE TABLE sales_fact AS
SELECT
month_id,
category_id,
state_province_id,
units,
sales
FROM av.sales_fact;
ALTER TABLE sales_fact ADD percent_margin number;
UPDATE sales_fact SET percent_margin = .21 WHERE category_id = -530;
UPDATE sales_fact SET percent_margin = .29 WHERE category_id = -534;
UPDATE sales_fact SET percent_margin = .31 WHERE category_id = -533;
UPDATE sales_fact SET percent_margin = .18 WHERE category_id = -535;
UPDATE sales_fact SET percent_margin = .07 WHERE category_id = -529;
UPDATE sales_fact SET percent_margin = .36 WHERE category_id = -532;
UPDATE sales_fact SET percent_margin = .15 WHERE category_id = -531;
UPDATE sales_fact SET percent_margin = .12 WHERE category_id = -536;
CREATE OR REPLACE ATTRIBUTE DIMENSION time_attr_dim
USING av.time_dim
ATTRIBUTES
(year_id
CLASSIFICATION caption VALUE 'YEAR_ID'
CLASSIFICATION description VALUE 'YEAR ID',
year_name
CLASSIFICATION caption VALUE 'YEAR_NAME'
CLASSIFICATION description VALUE 'Year',
year_end_date
CLASSIFICATION caption VALUE 'YEAR_END_DATE'
CLASSIFICATION description VALUE 'Year End Date',
quarter_id
CLASSIFICATION caption VALUE 'QUARTER_ID'
CLASSIFICATION description VALUE 'QUARTER ID',
quarter_name
CLASSIFICATION caption VALUE 'QUARTER_NAME'
CLASSIFICATION description VALUE 'Quarter',
quarter_end_date
CLASSIFICATION caption VALUE 'QUARTER_END_DATE'
CLASSIFICATION description VALUE 'Quarter End Date',
month_id
CLASSIFICATION caption VALUE 'MONTH_ID'
CLASSIFICATION description VALUE 'MONTH ID',
month_name
CLASSIFICATION caption VALUE 'MONTH_NAME'
CLASSIFICATION description VALUE 'Month',
month_long_name
CLASSIFICATION caption VALUE 'MONTH_LONG_NAME'
CLASSIFICATION description VALUE 'Month Long Name',
month_end_date
CLASSIFICATION caption VALUE 'MONTH_END_DATE'
CLASSIFICATION description VALUE 'Month End Date')
LEVEL month
CLASSIFICATION caption VALUE 'MONTH'
CLASSIFICATION description VALUE 'Month'
KEY month_id
MEMBER NAME month_name
MEMBER CAPTION month_name
MEMBER DESCRIPTION month_long_name
ORDER BY month_end_date
DETERMINES (month_end_date,
quarter_id)
LEVEL quarter
CLASSIFICATION caption VALUE 'QUARTER'
CLASSIFICATION description VALUE 'Quarter'
KEY quarter_id
MEMBER NAME quarter_name
MEMBER CAPTION quarter_name
MEMBER DESCRIPTION quarter_name
ORDER BY quarter_end_date
DETERMINES (quarter_end_date,
year_id)
LEVEL year
CLASSIFICATION caption VALUE 'YEAR'
CLASSIFICATION description VALUE 'Year'
KEY year_id
MEMBER NAME year_name
MEMBER CAPTION year_name
MEMBER DESCRIPTION year_name
ORDER BY year_end_date
DETERMINES (year_end_date)
ALL MEMBER NAME 'ALL TIMES';
CREATE OR REPLACE HIERARCHY time_hier
CLASSIFICATION caption VALUE 'CALENDAR'
CLASSIFICATION description VALUE 'CALENDAR'
USING time_attr_dim
(month CHILD OF
quarter CHILD OF
year);
CREATE OR REPLACE ATTRIBUTE DIMENSION product_attr_dim
USING av.product_dim
ATTRIBUTES
(department_id
CLASSIFICATION caption VALUE 'DEPARTMENT_ID'
CLASSIFICATION description VALUE 'DEPARTMENT ID',
department_name
CLASSIFICATION caption VALUE 'DEPARTMENT_NAME'
CLASSIFICATION description VALUE 'Department',
category_id
CLASSIFICATION caption VALUE 'CATEGORY_ID'
CLASSIFICATION description VALUE 'CATEGORY ID',
category_name
CLASSIFICATION caption VALUE 'CATEGORY_NAME'
CLASSIFICATION description VALUE 'Category')
LEVEL DEPARTMENT
CLASSIFICATION caption VALUE 'DEPARTMENT'
CLASSIFICATION description VALUE 'Department'
KEY department_id
MEMBER NAME department_name
MEMBER CAPTION department_name
ORDER BY department_name
LEVEL CATEGORY
CLASSIFICATION caption VALUE 'CATEGORY'
CLASSIFICATION description VALUE 'Category'
KEY category_id
MEMBER NAME category_name
MEMBER CAPTION category_name
ORDER BY category_name
DETERMINES(department_id)
ALL MEMBER NAME 'ALL PRODUCTS';
CREATE OR REPLACE HIERARCHY product_hier
CLASSIFICATION caption VALUE 'PRODUCT'
CLASSIFICATION description VALUE 'Product'
USING product_attr_dim
(CATEGORY
CHILD OF department);
CREATE OR REPLACE ATTRIBUTE DIMENSION geography_attr_dim
USING av.geography_dim
ATTRIBUTES
(region_id
CLASSIFICATION caption VALUE 'REGION_ID'
CLASSIFICATION description VALUE 'REGION ID',
region_name
CLASSIFICATION caption VALUE 'REGION_NAME'
CLASSIFICATION description VALUE 'Region',
country_id
CLASSIFICATION caption VALUE 'COUNTRY_ID'
CLASSIFICATION description VALUE 'COUNTRY ID',
country_name
CLASSIFICATION caption VALUE 'COUNTRY_NAME'
CLASSIFICATION description VALUE 'Country',
state_province_id
CLASSIFICATION caption VALUE 'STATE_PROVINCE_ID'
CLASSIFICATION description VALUE 'STATE-PROVINCE ID',
state_province_name
CLASSIFICATION caption VALUE 'STATE_PROVINCE_NAME'
CLASSIFICATION description VALUE 'State-Province')
LEVEL REGION
CLASSIFICATION caption VALUE 'REGION'
CLASSIFICATION description VALUE 'Region'
KEY region_id
MEMBER NAME region_name
MEMBER CAPTION region_name
ORDER BY region_name
LEVEL COUNTRY
CLASSIFICATION caption VALUE 'COUNTRY'
CLASSIFICATION description VALUE 'Country'
KEY country_id
MEMBER NAME country_name
MEMBER CAPTION country_name
ORDER BY country_name
DETERMINES(region_id)
LEVEL STATE_PROVINCE
CLASSIFICATION caption VALUE 'STATE_PROVINCE'
CLASSIFICATION description VALUE 'State-Province'
KEY state_province_id
MEMBER NAME state_province_name
MEMBER CAPTION state_province_name
ORDER BY state_province_name
DETERMINES(country_id)
ALL MEMBER NAME 'ALL CUSTOMERS';
CREATE OR REPLACE HIERARCHY geography_hier
CLASSIFICATION caption VALUE 'GEOGRAPHY'
CLASSIFICATION description VALUE 'Geography'
USING geography_attr_dim
(state_province
CHILD OF country
CHILD OF region);
A simple average calculates the center of a data set. For example, the simple average of 1,2,3,4,5 is 2.5. A simple average is meaningful when each data point counts equally in the average. In many cases each data point does not county equally, for example when certain data points have a higher population or a greater number of units sold. In that case, a weighted average is more appropriate.
Consider a case where there are a certain number of students in a school between the ages of 5 and 10.
A simple average of ages would be calculated as:
(5 + 6 + 7+ 8 + 9 + 10) / 6 = 7.5
Because there are more older students than younger students, a simple average might be misleading. An average of age weighted by the number of students might be more appropriate. The weighted average would be calculated as the sum of the products of age and number of students, divided by the number of students. In this example:
((5 * 100) + (6 * 200) + (7 * 300) + (8 * 500) + (9 * 800) + (10 * 1200)) / (100 + 200 + 300 + 500 + 800 + 1200)
(500 + 1200 + 2100 + 4000 + 7200 + 12000 ) / 3100
27000 / 3100
Weighted Average = 8.7 years.
This tutorial uses data in the AV schema, which contains a star schema with sales data varying by time, product and geography. A copy of the fact table was created in the current schema by the setup script of this tutorial. This copy of the SALES_FACT table includes a new column PERCENT_MARGIN, which is the profit margin on for that particular sale.
SELECT * FROM sales_fact WHERE rownum <= 10;
The setup script also created three attribute dimensions and three hierarchies.
SELECT * FROM user_hier_levels ORDER BY hier_name, order_num;
By looking at a small slice of the fact table it can be seen that a simple average of PERCENT_MARGIN is not very meaningful. Category -532 has a high margin at 36% but relatively few sales at 675 units. Category -529 has a low margin at 7% and the most sales at 7,336 units. Category -532 should have a greater weighting in the average margin calculation.
SELECT * FROM sales_fact WHERE rownum <= 50 AND month_id = 'Apr-15' AND state_province_id = 'TEXAS_US';
The following query returns simple average of PERCENT_MARGIN. This might be very misleading.
SELECT AVG(percent_margin) FROM sales_fact WHERE rownum <= 50 AND month_id = 'Apr-15' AND state_province_id = 'TEXAS_US';
It is probably more meaningful to weight the average of PERCENT_MARGIN using the number of units sold. The weighted average is calculated as the sum of (UNITS * PERCENT_MARGIN) divided by the sum of UNITS.
SELECT
SUM(units * percent_margin) / SUM(units)
FROM
sales_fact
WHERE
rownum <= 50
AND month_id = 'Apr-15'
AND state_province_id = 'TEXAS_US';
Using this formula, categories that have a higher number of units sold are given a higher weight than categories with a lower number of units sold.
The following query shows that there are varying numbers of units sold for each product category in South America.>
SELECT
t.year_name,
p.category_name,
g.region_name,
SUM(f.units) AS units
FROM
av.time_dim t,
av.product_dim p,
av.geography_dim g,
sales_fact f
WHERE
t.month_id = f.month_id
AND p.category_id = f.category_id
AND g.state_province_id = f.state_province_id
AND t.year_name = 'CY2015'
AND g.region_name = 'South America'
GROUP BY
t.year_name,
p.category_name,
g.region_name;
The next query calculates the simple and weighted average of percent margin. Note that in this query, PERCENT_MARGIN * UNITS is first calculated for each row and then summed.
SELECT
t.year_name,
g.region_name,
SUM(f.units) AS units,
SUM(f.sales) AS sales,
ROUND(AVG(f.percent_margin),3) AS simple_average_percent_margin,
ROUND(SUM(f.percent_margin * f.units) / SUM(f.units),3) AS units_weighted_average_percent_margin
FROM
av.time_dim t,
av.geography_dim g,
sales_fact f
WHERE
t.month_id = f.month_id
AND g.state_province_id = f.state_province_id
AND t.year_name = 'CY2015'
AND g.region_name = 'South America'
GROUP BY
t.year_name,
g.region_name;
To calculate the weighted average in the analytic view, the same calculation steps are required as with the SELECT from tables.
To do this:
Add PERCENT_MARGIN_WEIGHT to the SALES_FACT table:
ALTER TABLE sales_fact ADD percent_margin_weight AS (units * percent_margin);
Query the fact table to view the new column:
SELECT *
FROM sales_fact
WHERE
rownum <= 50
AND month_id = 'Apr-15'
AND state_province_id = 'TEXAS_US';
Create the analytic view (note the weighting factor and weighted average measures):
CREATE OR REPLACE ANALYTIC VIEW sales_av
CLASSIFICATION caption VALUE 'Sales AV'
CLASSIFICATION description VALUE 'Sales Analytic View'
CLASSIFICATION created_by VALUE 'George Jones'
USING sales_fact
DIMENSION BY
(time_attr_dim
KEY month_id REFERENCES month_id
HIERARCHIES (
time_hier DEFAULT),
product_attr_dim
KEY category_id REFERENCES category_id
HIERARCHIES (
product_hier DEFAULT),
geography_attr_dim
KEY state_province_id
REFERENCES state_province_id
HIERARCHIES (
geography_hier DEFAULT)
)
MEASURES
(sales FACT sales AGGREGATE BY SUM,
units FACT units AGGREGATE BY SUM,
-- Simple average.
avg_percent_margin FACT percent_margin AGGREGATE BY AVG,
-- Weighting factor.
percent_margin_weight FACT percent_margin_weight AGGREGATE BY SUM,
-- Weighted average.
weighted_average_percent_margin AS (percent_margin_weight / units)
)
DEFAULT MEASURE SALES;
Query the analytic view:
SELECT
time_hier.member_name AS time,
geography_hier.member_name AS geography,
sales,
units,
ROUND(avg_percent_margin,3) AS average_price_per_unit,
ROUND(weighted_average_percent_margin,3) AS weighted_average_percent_margin
FROM sales_av
HIERARCHIES (
time_hier,
geography_hier)
WHERE
time_hier.level_name = 'YEAR'
AND geography_hier.level_name = 'REGION'
AND time_hier.member_name = 'CY2015'
AND geography_hier.member_name = 'South America'
ORDER BY
time_hier.member_name,
geography_hier.member_name;
In the SALES_FACT table PERCENT_MARGIN is dense relative to UNITS. That is, for every row where there is UNITS data there is also PERCENT_MARGIN data. Many times it will be the case that one of the measure will be NULL. These cases require addition handling.
Consider the case where a row exists with a value for UNITS and there are NULL values for PERCENT_MARGIN. In this case the sum of UNITS cannot be used at the denominator to the weighted average calculation because weight of those rows is unknown.
Run the following query and note that for every value of UNITS there is a value for PERCENT_MARGIN.
SELECT *
FROM
sales_fact
WHERE
state_province_id like '%_MX'
AND month_id = 'Dec-15'
ORDER BY
state_province_id,
category_id;
Run the following query to select UNITS, PERCENT_MARGIN_WEIGHT and WEIGHTED_AVERAGE_PERCENT_MARGIN from the tables.
SELECT
f.month_id,
g.country_id,
SUM(f.units),
SUM(f.percent_margin_weight) AS percent_margin_weight,
ROUND(SUM(f.percent_margin_weight) / SUM(f.units),3) AS weighted_average_percent_margin
FROM
av.geography_dim g,
sales_fact f
WHERE
g.state_province_id = f.state_province_id
AND g.country_id = 'MX'
AND f.month_id = 'Dec-15'
GROUP BY
f.month_id,
g.country_id;
The weighted average percent margin is 12316.52 percent_margin_weight / 95778 units = .129.
Next, set PERCENT_MARGIN = NULL for Baja California.
UPDATE sales_fact
SET percent_margin = null
WHERE
month_id = 'Dec-15'
AND state_province_id = 'BAJA_CALIFORNIA_MX';
COMMIT;
View the updated data.
SELECT *
FROM sales_fact
WHERE
state_province_id LIKE '%_MX'
AND month_id = 'Dec-15'
ORDER BY
state_province_id,
category_id;
Note that PERCENT_MARGIN_WEIGHT is also NULL.
Run the following query to select UNITS, PERCENT_MARGIN_WEIGHT and WEIGHTED_AVERAGE_PERCENT_MARGIN again.
SELECT
f.month_id,
g.country_id,
SUM(f.units),
SUM(f.percent_margin_weight) AS percent_margin_weight,
ROUND(SUM(f.percent_margin_weight) / SUM(f.units),3) AS weighted_average_percent_margin
FROM
av.geography_dim g,
sales_fact f
WHERE
g.state_province_id = f.state_province_id
AND g.country_id = 'MX'
AND f.month_id = 'Dec-15'
GROUP BY
f.month_id,
g.country_id;
Note that the WEIGHTED_AVERAGE_PERCENT_MARGIN is .118 rather than .129. This is because the sum of PERCENT_MARGIN weight has decreased while the sum of UNITS has remained the same.
Because the weight of rows with NULL values is unknown those rows should not be considered in the weighted average calculation. To account for this, create a new column in the fact table that returns NULL when PERCENT_MARGIN is NULL.
ALTER TABLE sales_fact
ADD percent_margin_units
AS (CASE
WHEN percent_margin IS NULL THEN NULL
ELSE units
END);
View the data.
SELECT * FROM sales_fact WHERE state_province_id like '%_MX' and month_id = 'Dec-15' ORDER BY state_province_id, category_id;
Run the following query to view UNITS, PERCENT_MARGIN_UNITS, PERCENT_MARGIN_WEIGHT and WEIGHTED_AVERAGE_PERCENT_MARGIN again.
SELECT
f.month_id,
g.country_id,
SUM(f.units),
SUM(f.percent_margin_units),
SUM(f.percent_margin_weight) AS percent_margin_weight,
ROUND(SUM(f.percent_margin_weight) / SUM(f.percent_margin_units),3) AS weighted_average_percent_margin
FROM
av.geography_dim g,
sales_fact f
WHERE
g.state_province_id = f.state_province_id
AND g.country_id = 'MX'
AND f.month_id = 'Dec-15'
GROUP BY
f.month_id,
g.country_id;
The rows where PERCENT_MARGIN are NULL are not longer considered in the calculation and the weighted average percent margin is .129.
To apply this the analytic view add the PERCENT_MARGIN_UNITS measure and use it in the denominator of the WEIGHTED_AVERAGE_PERCENT_MARGIN measure.
CREATE OR REPLACE ANALYTIC VIEW sales_av
CLASSIFICATION caption VALUE 'Sales AV'
CLASSIFICATION description VALUE 'Sales Analytic View'
CLASSIFICATION created_by VALUE 'George Jones'
USING sales_fact
DIMENSION BY
(time_attr_dim
KEY month_id REFERENCES month_id
HIERARCHIES (
time_hier DEFAULT),
product_attr_dim
KEY category_id REFERENCES category_id
HIERARCHIES (
product_hier DEFAULT),
geography_attr_dim
KEY state_province_id
REFERENCES state_province_id
HIERARCHIES (
geography_hier DEFAULT)
)
MEASURES
(sales FACT sales AGGREGATE BY SUM,
units FACT units AGGREGATE BY SUM,
-- Weighted Average Units.
percent_margin_units FACT percent_margin_units AGGREGATE BY SUM,
-- Simple average.
avg_percent_margin FACT percent_margin AGGREGATE BY AVG,
-- Weighting factor.
percent_margin_weight FACT percent_margin_weight AGGREGATE BY SUM,
-- Weighted average.
weighted_average_percent_margin AS (percent_margin_weight / percent_margin_units)
)
DEFAULT MEASURE SALES;
Run the following query to select UNITS, PERCENT_MARGIN_UNITS, PERCENT_MARGIN_WEIGHT and WEIGHTED_AVERAGE_PERCENT_MARGIN from the analytic view.
SELECT
time_hier.member_name AS time,
geography_hier.member_name AS geography,
units,
percent_margin_units,
percent_margin_weight,
ROUND(weighted_average_percent_margin,3) AS weighted_average_percent_margin
FROM sales_av
HIERARCHIES (
time_hier,
geography_hier)
WHERE
time_hier.level_name = 'MONTH'
AND geography_hier.level_name = 'COUNTRY'
AND time_hier.member_name = 'Dec-15'
AND geography_hier.member_name = 'Mexico'
ORDER BY
time_hier.member_name,
geography_hier.member_name;