teradata中使用不存在的性能调整

发布于 2025-01-24 05:31:59 字数 7291 浏览 0 评论 0原文

IE_VT是一个挥发性表,使用唯一的主索引(store_number,upc,pos_dept,pos_dept_number)创建,

现在我们的代码试图通过从各个表中选择几个列来插入VT中。

实际查询:

INSERT INTO IE_VT 
       (STORE_NUMBER,
        UPC,
        SKU_REGIONAL,
        REGION,
        ITEM_LONG_DESCRIPTION,
        POS_DESCRIPTION,
        POS_DEPT,
        POS_DEPT_NUMBER,
        ITEM_SIZE,
        ITEM_SIZE_NUMBER,
        ITEM_UOM,
        BRAND,
        REG_HIER_REF,
        ITEM_STATUS,
        CREATE_DATE,
        COMMODITY_CODE,
        NAT_UPC,
        START_DATE,
        END_DATE,
        LAST_UPDATE)
SELECT  ir.STORE_NUMBER,  
        ir.UPC,                                               
        ir.SKU_REGIONAL,                                      
        ir.REGION,                                            
        ir.ITEM_LONG_DESCRIPTION,                             
        ir.POS_DESCRIPTION, 
        coalesce(otr.POS_DEPT,'00000'),                         -- Set to default POS_DEPT if not found in ORG_TEAM_REF                                    
        a.DEPARTMENT,                                           -- Invalid POS_DEPT, not in hierarchy.  Will be null if not valid integer.                    
               
        ir.ITEM_SIZE,                                         
        ir.ITEM_SIZE_NUMBER,                                  
        ir.ITEM_UOM,                                          
        ir.BRAND,                                             
        ir.REG_HIER_REF,                                      
        ir.ITEM_STATUS,                                                        
                   
        ir.CREATE_DATE,
        ir.COMMODITY_CODE,
        ir.NAT_UPC,
        ir.START_DATE,                                   
        COALESCE(ir.END_DATE, CAST('2999-12-31' AS DATE)) END_DATE,
        ir.LAST_UPDATE
 FROM   
       (
               SELECT distinct  x.STORE_NUMBER, 
                                x.UPC, 
                                x.DEPARTMENT
               FROM             EW.PRD_SCS x
               LEFT OUTER JOIN  EW.ITRM_R y            -- Invalid POS_DEPT_NUMBER in PRD_SALES_COST_SUMMARY.
                   ON  x.STORE_NUMBER =    y.STORE_NUMBER       -- But still valid item per STORE_NUMBER / UPC in IR.
                   AND x.UPC =             y.UPC
                   and x.DEPARTMENT =      y.POS_DEPT_NUMBER 
                   and y.PDN_END_DATE is null
               WHERE y.ITEM_REGION_ID IS NULL
       ) A        
join    EDW.STORE B  
            on  a.STORE_NUMBER = b.STORE_NUMBER
            and b.END_DATE is null  
join    EW.ITRM_R ir                                   -- Item is valid for STORE.
            on  b.REGION =          ir.REGION
            and a.STORE_NUMBER =    ir.STORE_NUMBER
            and a.UPC =             ir.UPC
            and ir.END_DATE is null
LEFT OUTER JOIN                  
        EDW.ORG_TEAM_REF  otr                                -- With possibly still valid POS_DEPT_NUMBER per OTR.
            on  b.REGION =          otr.REGION
            and a.STORE_NUMBER  =   otr.STORE_NUMBER
            and a.DEPARTMENT =      otr.POS_DEPT_NUMBER
            and otr.END_DATE is null -- 2678712 records
--------            
where  (a.STORE_NUMBER, a.UPC, trim(leading '0' from coalesce(otr.POS_DEPT,'00000')), a.department)
            not in  (select STORE_NUMBER,                       -- LOJ to IE_VT is much slower.
                            UPC, 
                            trim(leading '0' from POS_DEPT),
                            POS_DEPT_NUMBER
                     from   IE_VT); -- 2678700 records

我们的实际查询运行良好,直到上面的5行标记为--------仅此之后,我们的查询消耗了500k CPU秒,并且运行左右大约10分钟,并为我们提供了最终的最终集合2678700记录。

在那里,我尝试替换在不存在的地方的地方,是更改的查询。


调查查询:

INSERT INTO IE_VT1 
       (STORE_NUMBER,
        UPC,
        SKU_REGIONAL,
        REGION,
        ITEM_LONG_DESCRIPTION,
        POS_DESCRIPTION,
        POS_DEPT,
        POS_DEPT_NUMBER,
        ITEM_SIZE,
        ITEM_SIZE_NUMBER,
        ITEM_UOM,
        BRAND,
        REG_HIER_REF,
        ITEM_STATUS,
        CREATE_DATE,
        COMMODITY_CODE,
        NAT_UPC,
        START_DATE,
        END_DATE,
        LAST_UPDATE)
SELECT  ir.STORE_NUMBER, 
        ir.UPC,                  
        ir.SKU_REGIONAL,                                      
        ir.REGION,                                            
        ir.ITEM_LONG_DESCRIPTION,                             
        ir.POS_DESCRIPTION, 
        coalesce(otr.POS_DEPT,'00000'),                         -- Set to default POS_DEPT if not found in ORG_TEAM_REF                                    
        a.DEPARTMENT,                                           -- Invalid POS_DEPT, not in hierarchy.  Will be null if not valid integer.                    
               
        ir.ITEM_SIZE,                                         
        ir.ITEM_SIZE_NUMBER,                                  
        ir.ITEM_UOM,                                          
        ir.BRAND,                                             
        ir.REG_HIER_REF,                                      
        ir.ITEM_STATUS,                                                        
                   
        ir.CREATE_DATE,
        ir.COMMODITY_CODE,
        ir.NAT_UPC,
        ir.START_DATE,                                   
        COALESCE(ir.END_DATE, CAST('2999-12-31' AS DATE)) END_DATE,
        ir.LAST_UPDATE
 FROM   
       (
               SELECT distinct  x.STORE_NUMBER, 
                                x.UPC, 
                                x.DEPARTMENT
               FROM             EW.PRD_SCS x
               LEFT OUTER JOIN  EW.ITRM_R y            -- Invalid POS_DEPT_NUMBER in PRD_SALES_COST_SUMMARY.
                   ON  x.STORE_NUMBER =    y.STORE_NUMBER       -- But still valid item per STORE_NUMBER / UPC in IR.
                   AND x.UPC =             y.UPC
                   and x.DEPARTMENT =      y.POS_DEPT_NUMBER 
                   and y.PDN_END_DATE is null
               WHERE y.ITEM_REGION_ID IS NULL
       ) A        
join    EDW.STORE B  
            on  a.STORE_NUMBER = b.STORE_NUMBER
            and b.END_DATE is null  
join    EW.ITRM_R ir                                   -- Item is valid for STORE.
            on  b.REGION =          ir.REGION
            and a.STORE_NUMBER =    ir.STORE_NUMBER
            and a.UPC =             ir.UPC
            and ir.END_DATE is null
LEFT OUTER JOIN                  
        EDW.ORG_TEAM_REF  otr                                -- With possibly still valid POS_DEPT_NUMBER per OTR.
            on  b.REGION =          otr.REGION
            and a.STORE_NUMBER  =   otr.STORE_NUMBER
            and a.DEPARTMENT =      otr.POS_DEPT_NUMBER
            and otr.END_DATE is null  --2678712 records
-----------
where not exists
(
select store_number,upc,trim(leading '0' from POS_DEPT) as POS_DEPT,pos_dept_number from IE_VT1 vt
where a.STORE_NUMBER =  vt.store_number
and a.upc = vt.upc
and trim(leading '0' from coalesce(otr.POS_DEPT,'00000')) =  vt.POS_DEPT
and a.department =vt.pos_dept_number);  --2678712 22 Secs  

上述调查查询仅采用25K CPU SEC,但是即使在不存在的位置之后,最终计数也与“ 2678712记录”条款不存在之前的数字相同。

有人可以在此查询中如何实施如何实施吗?

IE_VT is a volatile table that is created with UNIQUE PRIMARY INDEX ( STORE_NUMBER, UPC, POS_DEPT, POS_DEPT_NUMBER )

Now our code tries to insert into the VT by selecting few columns from various tables.

ACTUAL QUERY:

INSERT INTO IE_VT 
       (STORE_NUMBER,
        UPC,
        SKU_REGIONAL,
        REGION,
        ITEM_LONG_DESCRIPTION,
        POS_DESCRIPTION,
        POS_DEPT,
        POS_DEPT_NUMBER,
        ITEM_SIZE,
        ITEM_SIZE_NUMBER,
        ITEM_UOM,
        BRAND,
        REG_HIER_REF,
        ITEM_STATUS,
        CREATE_DATE,
        COMMODITY_CODE,
        NAT_UPC,
        START_DATE,
        END_DATE,
        LAST_UPDATE)
SELECT  ir.STORE_NUMBER,  
        ir.UPC,                                               
        ir.SKU_REGIONAL,                                      
        ir.REGION,                                            
        ir.ITEM_LONG_DESCRIPTION,                             
        ir.POS_DESCRIPTION, 
        coalesce(otr.POS_DEPT,'00000'),                         -- Set to default POS_DEPT if not found in ORG_TEAM_REF                                    
        a.DEPARTMENT,                                           -- Invalid POS_DEPT, not in hierarchy.  Will be null if not valid integer.                    
               
        ir.ITEM_SIZE,                                         
        ir.ITEM_SIZE_NUMBER,                                  
        ir.ITEM_UOM,                                          
        ir.BRAND,                                             
        ir.REG_HIER_REF,                                      
        ir.ITEM_STATUS,                                                        
                   
        ir.CREATE_DATE,
        ir.COMMODITY_CODE,
        ir.NAT_UPC,
        ir.START_DATE,                                   
        COALESCE(ir.END_DATE, CAST('2999-12-31' AS DATE)) END_DATE,
        ir.LAST_UPDATE
 FROM   
       (
               SELECT distinct  x.STORE_NUMBER, 
                                x.UPC, 
                                x.DEPARTMENT
               FROM             EW.PRD_SCS x
               LEFT OUTER JOIN  EW.ITRM_R y            -- Invalid POS_DEPT_NUMBER in PRD_SALES_COST_SUMMARY.
                   ON  x.STORE_NUMBER =    y.STORE_NUMBER       -- But still valid item per STORE_NUMBER / UPC in IR.
                   AND x.UPC =             y.UPC
                   and x.DEPARTMENT =      y.POS_DEPT_NUMBER 
                   and y.PDN_END_DATE is null
               WHERE y.ITEM_REGION_ID IS NULL
       ) A        
join    EDW.STORE B  
            on  a.STORE_NUMBER = b.STORE_NUMBER
            and b.END_DATE is null  
join    EW.ITRM_R ir                                   -- Item is valid for STORE.
            on  b.REGION =          ir.REGION
            and a.STORE_NUMBER =    ir.STORE_NUMBER
            and a.UPC =             ir.UPC
            and ir.END_DATE is null
LEFT OUTER JOIN                  
        EDW.ORG_TEAM_REF  otr                                -- With possibly still valid POS_DEPT_NUMBER per OTR.
            on  b.REGION =          otr.REGION
            and a.STORE_NUMBER  =   otr.STORE_NUMBER
            and a.DEPARTMENT =      otr.POS_DEPT_NUMBER
            and otr.END_DATE is null -- 2678712 records
--------            
where  (a.STORE_NUMBER, a.UPC, trim(leading '0' from coalesce(otr.POS_DEPT,'00000')), a.department)
            not in  (select STORE_NUMBER,                       -- LOJ to IE_VT is much slower.
                            UPC, 
                            trim(leading '0' from POS_DEPT),
                            POS_DEPT_NUMBER
                     from   IE_VT); -- 2678700 records

Our Actual query is working well and pretty fast until the 5 lines above where marked with -------- ONly after that our query is consuming 500K CPU seconds and is running for around 10 mins and gives us the final set of 2678700 records.

There by I tried replacing the where clause with where not exists and below is the altered query.


TUNED QUERY:

INSERT INTO IE_VT1 
       (STORE_NUMBER,
        UPC,
        SKU_REGIONAL,
        REGION,
        ITEM_LONG_DESCRIPTION,
        POS_DESCRIPTION,
        POS_DEPT,
        POS_DEPT_NUMBER,
        ITEM_SIZE,
        ITEM_SIZE_NUMBER,
        ITEM_UOM,
        BRAND,
        REG_HIER_REF,
        ITEM_STATUS,
        CREATE_DATE,
        COMMODITY_CODE,
        NAT_UPC,
        START_DATE,
        END_DATE,
        LAST_UPDATE)
SELECT  ir.STORE_NUMBER, 
        ir.UPC,                  
        ir.SKU_REGIONAL,                                      
        ir.REGION,                                            
        ir.ITEM_LONG_DESCRIPTION,                             
        ir.POS_DESCRIPTION, 
        coalesce(otr.POS_DEPT,'00000'),                         -- Set to default POS_DEPT if not found in ORG_TEAM_REF                                    
        a.DEPARTMENT,                                           -- Invalid POS_DEPT, not in hierarchy.  Will be null if not valid integer.                    
               
        ir.ITEM_SIZE,                                         
        ir.ITEM_SIZE_NUMBER,                                  
        ir.ITEM_UOM,                                          
        ir.BRAND,                                             
        ir.REG_HIER_REF,                                      
        ir.ITEM_STATUS,                                                        
                   
        ir.CREATE_DATE,
        ir.COMMODITY_CODE,
        ir.NAT_UPC,
        ir.START_DATE,                                   
        COALESCE(ir.END_DATE, CAST('2999-12-31' AS DATE)) END_DATE,
        ir.LAST_UPDATE
 FROM   
       (
               SELECT distinct  x.STORE_NUMBER, 
                                x.UPC, 
                                x.DEPARTMENT
               FROM             EW.PRD_SCS x
               LEFT OUTER JOIN  EW.ITRM_R y            -- Invalid POS_DEPT_NUMBER in PRD_SALES_COST_SUMMARY.
                   ON  x.STORE_NUMBER =    y.STORE_NUMBER       -- But still valid item per STORE_NUMBER / UPC in IR.
                   AND x.UPC =             y.UPC
                   and x.DEPARTMENT =      y.POS_DEPT_NUMBER 
                   and y.PDN_END_DATE is null
               WHERE y.ITEM_REGION_ID IS NULL
       ) A        
join    EDW.STORE B  
            on  a.STORE_NUMBER = b.STORE_NUMBER
            and b.END_DATE is null  
join    EW.ITRM_R ir                                   -- Item is valid for STORE.
            on  b.REGION =          ir.REGION
            and a.STORE_NUMBER =    ir.STORE_NUMBER
            and a.UPC =             ir.UPC
            and ir.END_DATE is null
LEFT OUTER JOIN                  
        EDW.ORG_TEAM_REF  otr                                -- With possibly still valid POS_DEPT_NUMBER per OTR.
            on  b.REGION =          otr.REGION
            and a.STORE_NUMBER  =   otr.STORE_NUMBER
            and a.DEPARTMENT =      otr.POS_DEPT_NUMBER
            and otr.END_DATE is null  --2678712 records
-----------
where not exists
(
select store_number,upc,trim(leading '0' from POS_DEPT) as POS_DEPT,pos_dept_number from IE_VT1 vt
where a.STORE_NUMBER =  vt.store_number
and a.upc = vt.upc
and trim(leading '0' from coalesce(otr.POS_DEPT,'00000')) =  vt.POS_DEPT
and a.department =vt.pos_dept_number);  --2678712 22 Secs  

The above tuned query is taking only 25K CPU secs, but even after the where not exists clause, the final count is same as what is before the Where not Exists clause "2678712 records".

Can someone please help me here on how to implement where no exists in this query?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文