前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Data Vault初探(十) —— 星型模型向Data Vault模型转化

Data Vault初探(十) —— 星型模型向Data Vault模型转化

作者头像
用户1148526
发布2022-05-07 11:32:12
5950
发布2022-05-07 11:32:12
举报
文章被收录于专栏:Hadoop数据仓库

源数据库模型(3NF)如下图:

星型模型如下图:

Data Vault模型如下图:

说明: 星型模型(star schema)的事实表采取了完全规范化的第三范式(3NF)模型,而维表采取了第二范式的设计模型。有时也会把维表的设计规范化,就成了所谓的雪花模型(snowflake schema)。 星型模型向Data Vault模型转化: 星型模趔的主要构成部分是维表与事实表,在转化为DataVault模型时自然涉及到维表与事实表的分别转化,使之映射为DataVault模型的Hub、Link、Satellite组件。星型模型与DataVault模型表的对应如下表所示。

转换SQL脚本如下:

代码语言:javascript
复制
use dv;

-- 装载客户中心表
insert hub_customer (customer_id,record_source) 
select distinct customer_id,'customer_dim' from customer_dim;

-- 装载客户附属表
insert into sat_customer
(
	hub_customer_id,
	load_dts,
	load_end_dts,
	record_source,
	customer_name,
	city_name,
	province_name,
	cust_post_code,
	cust_address,
	ship_post_code,
	ship_address
)
select t1.hub_customer_id,
	   t2.effective_date,
	   t2.expiry_date,
	   'hub_customer,customer_dim',
	   t2.customer_name,
	   t2.city_name,
	   t2.province_name,
	   t2.cust_post_code,
	   t2.cust_address,
	   t2.ship_post_code,
	   t2.ship_address
  from hub_customer t1,customer_dim t2 
 where t1.customer_id=t2.customer_id;
 
-- 装载产品分类中心表
insert hub_product_catagory (product_catagory_id,record_source) 
select distinct product_category_id,'product_dim' from product_dim;

-- 装载产品中心表
insert hub_product (product_id,record_source) 
select distinct product_id,'product_dim' from product_dim;

-- 装载产品分类_产品链接表
insert into link_product_catagory (hub_product_id,hub_product_catagory_id,record_source)
select distinct t1.hub_product_id,t2.hub_product_catagory_id,'hub_product,product_dim,hub_product_catagory'
  from hub_product t1,hub_product_catagory t2,product_dim t3
 where t1.product_id = t3.product_id and t2.product_catagory_id = t3.product_category_id;

-- 装载产品分类附属表
insert into sat_product_catagory 
(hub_product_catagory_id,
load_dts,
load_end_dts,
record_source,
product_catagory_name)
select 
    t1.hub_product_catagory_id,
    t2.start_date,
    t2.end_date,
    'hub_product_catagory,product_dim' record_source,
    t2.cname
from
    hub_product_catagory t1,
    (select 
        t1.cid cid,
            t1.cname cname,
            t1.start_date start_date,
            t2.end_date end_date
    from
        (select distinct
        if(@cid = t1.cid and @cname = t1.cname, @start_date, @start_date:=t1.start_date) as start_date,
            @cid:=t1.cid cid,
            @cname:=t1.cname cname
    from
        ((select 
        product_category_id cid,
            product_category_name cname,
            effective_date start_date
    from
        product_dim
    order by cid , start_date) t1, (select @cid:=0, @cname:='', @start_date:='0000-00-00') t2)) t1, (select distinct
        if(@cid = t1.cid and @cname <> t1.cname, t1.end_date, '2200-01-01') as end_date,
            @cid:=t1.cid cid,
            @cname:=t1.cname cname
    from
        ((select 
        product_category_id cid,
            product_category_name cname,
            expiry_date end_date
    from
        product_dim
    order by cid , end_date desc) t1, (select @cid:=0, @cname:='', @end_date:='0000-0000-00') t2)) t2
    where
        t1.cid = t2.cid and t1.cname = t2.cname) t2
where
    t1.product_catagory_id = t2.cid;

-- 装载产品附属表
insert into sat_product
(
hub_product_id,
load_dts,
load_end_dts,
record_source,
product_name,
unit_price
)
select t1.hub_product_id,t2.start_date,t2.end_date,'hub_product,product_dim' record_source,
t2.pname pname,t2.unit_price
from hub_product t1,
(
select 
    t1.pid pid,
    t1.pname pname,
    t1.unit_price,
    t1.start_date start_date,
    t2.end_date end_date
from
    (select distinct
        if(@pid = t1.pid and @pname = t1.pname
                and @unit_price = t1.unit_price, @start_date, @start_date:=t1.start_date) as start_date,
            @pid:=t1.pid pid,
            @pname:=t1.pname pname,
            @unit_price:=t1.unit_price unit_price
    from
        ((select 
        product_id pid,
            product_name pname,
            unit_price,
            effective_date start_date
    from
        product_dim
    order by pid , start_date) t1, (select 
        @pid:=0,
            @pname:='',
            @unit_price:=0,
            @start_date:='0000-00-00'
    ) t2)) t1,
    (select distinct
        if(@pid = t1.pid
                and (@pname <> t1.pname
                OR @unit_price <> t1.unit_price), t1.end_date, '2200-01-01') as end_date,
            @pid:=t1.pid pid,
            @pname:=t1.pname pname,
            @unit_price:=t1.unit_price unit_price
    from
        ((select 
        product_id pid,
            product_name pname,
            unit_price,
            expiry_date end_date
    from
        product_dim
    order by pid , end_date desc) t1, (select 
        @pid:=0,
            @pname:='',
            @unit_price:=0,
            @end_date:='0000-0000-00'
    ) t2)) t2
where
    t1.pid = t2.pid and t1.pname = t2.pname
        and t1.unit_price = t2.unit_price) t2 where t1.product_id=t2.pid;

-- 装载销售订单中心表
insert into hub_sales_order (sales_order_id,record_source) 
select distinct sales_order_id,'sales_order_dim' from sales_order_dim;

-- 装载销售订单附属表
insert into sat_sales_order
(
hub_sales_order_id,
load_dts,
load_end_dts,
record_source,
order_time,
entry_time,
amount,
allocate_time,
packing_time,
ship_time,
receive_time)
select 
t1.hub_sales_order_id,
t2.effective_date,
t2.expiry_date,
'hub_sales_order,sales_order_dim',
t2.order_time,
t2.entry_time,
t2.amount,
t2.allocate_time,
t2.packing_time,
t2.ship_time,
t2.receive_time
from hub_sales_order t1,sales_order_dim t2
where t1.sales_order_id = t2.sales_order_id;

-- 装载订单产品链接表
insert into link_order_product
(
hub_sales_order_id,
hub_product_id,
record_source)
select t1.hub_sales_order_id, t2.hub_product_id,
'hub_sales_order,hub_product,sales_order_dim,product_dim,sales_order_fact'
from 
hub_sales_order t1,
hub_product t2,
(select t1.sales_order_id sales_order_id, t2.product_id product_id
from sales_order_dim t1, product_dim t2, sales_order_fact t3 
where t1.sales_order_sk = t3.sales_order_sk and t2.product_sk = t3.product_sk) t3
where t1.sales_order_id = t3.sales_order_id and t2.product_id = t3.product_id;

-- 装载订单客户链接表
insert into link_order_customer
(
hub_sales_order_id,
hub_customer_id,
record_source)
select t1.hub_sales_order_id, t2.hub_customer_id,
'hub_sales_order,hub_customer,sales_order_dim,customer_dim,sales_order_fact'
from 
hub_sales_order t1,
hub_customer t2,
(select t1.sales_order_id sales_order_id, t2.customer_id customer_id
from sales_order_dim t1, customer_dim t2, sales_order_fact t3 
where t1.sales_order_sk = t3.sales_order_sk and t2.customer_sk = t3.customer_sk) t3
where t1.sales_order_id = t3.sales_order_id and t2.customer_id = t3.customer_id;

-- 装载订单产品附属表
insert into sat_order_product
(
link_order_product_id,
load_dts,
load_end_dts,
record_source,
unit_price,
quantity
)
select t1.link_order_product_id,t2.effective_date,t2.expiry_date,
'link_order_product,product_dim,sales_order_dim,sales_order_fact,hub_sales_order,hub_product',
t2.unit_price,
t4.quantity
from 
link_order_product t1, 
product_dim t2, 
sales_order_dim t3, 
sales_order_fact t4,
hub_sales_order t5,
hub_product t6
where t1.hub_sales_order_id = t5.hub_sales_order_id
  and t1.hub_product_id = t6.hub_product_id
  and t4.sales_order_sk = t3.sales_order_sk
  and t4.product_sk = t2.product_sk
  and t5.sales_order_id = t3.sales_order_id
  and t6.product_id = t2.product_id;
  
COMMIT;
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
原始发表:2015-12-15,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档