更新一对多关系的重复项。

发布于 2024-10-15 20:56:06 字数 9110 浏览 3 评论 0原文

这不是您的标准“如何查找重复项”问题,我知道如何查找重复项,请参见下文。这个问题是如何更新也包含具有匹配记录的子项目的所述记录?

好吧,我将为您提供整个场景,以便您可以解决这个问题。

由于严重的系统故障,可能会插入重复的记录。

查找后面的重复项并将父commission_import_commission_junction标记为“is_processed = True”可以解决此问题。

复杂的是,commission_import_commission_junction 及其子commission_import_commission_junction_line_items 在要比较的列上必须相同。

这些表是:(

commission_import_commission_junction
 - id
 - created_date
 - some columns that are checked for duplication
 - some columns that are not checked for duplication
commission_import_commission_junction_line_items
 - id
 - some columns that are checked for duplication
 - some columns that are not checked for duplication

对于完整的表规范,请查看最底部代码块中的 CREATE TABLE 语句。)

仅在父表上标记重复项的查询commission_import_commission_junction:

UPDATE commission_import_commission_junction cicj
SET is_processed = TRUE
FROM (
    SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
    FROM commission_import_commission_junction inner_imports
    JOIN commission_import_commission_junction_line_items inner_items ON inner_items.commission_import_commission_junction_id = inner_imports.commission_import_commission_junction_id
    GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
    HAVING (COUNT(*) > 1)
) AS dups
WHERE
-- MAIN TABLE COLUMNN LIST
(cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
-- OTHER TABLE COLUMN LIST
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
AND cicj.created_date <> dups.first_date
AND cicj.is_processed = FALSE;

在某处以某种方式我需要检查 line_items 是否也是重复的。

下面的代码是设置数据库,记住这是 postgres 特定的。

-- "commission_import_build" is a record that keeps information about the process of collecting the commission information. Duplicate commission_import_commission_junction records will not exist with the same commission_import_build_id
-- "commission_import_commission_junction" is a record description commission information from a customers purchase.
-- "commission_import_commission_junction_line_items" are records describing items in that purchase.


DROP TABLE IF EXISTS commission_import_commission_junction_line_items;
DROP TABLE IF EXISTS commission_import_commission_junction;
DROP TABLE IF EXISTS commission_import_builds;

CREATE TABLE commission_import_builds
(
  commission_import_build_id serial NOT NULL,
  build_date timestamp with time zone NOT NULL,
  CONSTRAINT pkey_commission_import_build_id PRIMARY KEY (commission_import_build_id),
  CONSTRAINT commission_import_builds_build_date_key UNIQUE (build_date)
);
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (1, '2011-01-01');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (2, '2011-01-02');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (3, '2011-01-03');

CREATE TABLE commission_import_commission_junction
(
  commission_import_commission_junction_id serial NOT NULL,
  member_id integer,
  site_id integer,
  action_status character varying NOT NULL,
  action_type character varying NOT NULL,
  ad_id bigint,
  commission_id bigint NOT NULL,
  country character varying,
  event_date timestamp with time zone NOT NULL,
  locking_date timestamp with time zone,
  order_id character varying NOT NULL,
  original boolean,
  original_action_id bigint NOT NULL,
  posting_date timestamp with time zone NOT NULL,
  website_id bigint NOT NULL,
  advertiser_name character varying,
  commission_amount numeric(19,2) NOT NULL,
  sale_amount numeric(19,2) NOT NULL,
  aggregator_affiliate_id integer NOT NULL,
  is_processed boolean NOT NULL DEFAULT false,
  created_date timestamp with time zone NOT NULL DEFAULT now(),
  member_transaction_id integer,
  commission_import_build_id integer NOT NULL,
  CONSTRAINT pkey_commission_import_commission_junction_commission_import_co PRIMARY KEY (commission_import_commission_junction_id),
  CONSTRAINT fk_commission_import_commission_junction_commission_import_buil FOREIGN KEY (commission_import_build_id)
      REFERENCES commission_import_builds (commission_import_build_id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION
);
CREATE INDEX idx_commission_import_commission_junction_is_processed
  ON commission_import_commission_junction
  USING btree
  (is_processed);
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
  (1, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 1, '2011-02-05');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
  (2, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 2, '2011-02-06');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
  (3, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 3, '2011-02-07');
SELECT * FROM commission_import_commission_junction;

CREATE TABLE commission_import_commission_junction_line_items
(
  commission_import_commission_junction_line_item_id serial NOT NULL,
  commission_import_commission_junction_id integer NOT NULL,
  sku character varying,
  quantity integer,
  posting_date timestamp with time zone,
  sale_amount numeric(19,2),
  discount numeric(19,2),
  CONSTRAINT pkey_commission_import_commission_junction_link_items_commissio PRIMARY KEY (commission_import_commission_junction_line_item_id),
  CONSTRAINT fkey_commission_import_commission_junction_line_items_commissio FOREIGN KEY (commission_import_commission_junction_id)
      REFERENCES commission_import_commission_junction (commission_import_commission_junction_id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION
);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test3', 3, 32.50);

This isn't your standard "how do I find duplicates" question, I know how to do find duplicates, see below. This question is how do I update said records that also have child items with matching records?

Alright, I'm going to give you whole scenario so that you can work with this problem.

Duplicate records could be inserted as a result of critical system failure.

Finding later duplicates and marking the parent commission_import_commission_junction "is_processed = True" solves this problem.

The complication is that the commission_import_commission_junction and its children commission_import_commission_junction_line_items must be identical on the columns to compare.

the tables are:

commission_import_commission_junction
 - id
 - created_date
 - some columns that are checked for duplication
 - some columns that are not checked for duplication
commission_import_commission_junction_line_items
 - id
 - some columns that are checked for duplication
 - some columns that are not checked for duplication

(For the full table spec, check out the CREATE TABLE statements in the bottom-most block of code.)

The query to mark duplicates on just the parent table commission_import_commission_junction:

UPDATE commission_import_commission_junction cicj
SET is_processed = TRUE
FROM (
    SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
    FROM commission_import_commission_junction inner_imports
    JOIN commission_import_commission_junction_line_items inner_items ON inner_items.commission_import_commission_junction_id = inner_imports.commission_import_commission_junction_id
    GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
    HAVING (COUNT(*) > 1)
) AS dups
WHERE
-- MAIN TABLE COLUMNN LIST
(cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
IS NOT DISTINCT FROM
-- OTHER TABLE COLUMN LIST
(dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
AND cicj.created_date <> dups.first_date
AND cicj.is_processed = FALSE;

Somewhere and somehow I need to check that the line_items are also duplicates.

THE CODE BELOW IS TO SETUP THE DATABASE, remember this is postgres specific.

-- "commission_import_build" is a record that keeps information about the process of collecting the commission information. Duplicate commission_import_commission_junction records will not exist with the same commission_import_build_id
-- "commission_import_commission_junction" is a record description commission information from a customers purchase.
-- "commission_import_commission_junction_line_items" are records describing items in that purchase.


DROP TABLE IF EXISTS commission_import_commission_junction_line_items;
DROP TABLE IF EXISTS commission_import_commission_junction;
DROP TABLE IF EXISTS commission_import_builds;

CREATE TABLE commission_import_builds
(
  commission_import_build_id serial NOT NULL,
  build_date timestamp with time zone NOT NULL,
  CONSTRAINT pkey_commission_import_build_id PRIMARY KEY (commission_import_build_id),
  CONSTRAINT commission_import_builds_build_date_key UNIQUE (build_date)
);
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (1, '2011-01-01');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (2, '2011-01-02');
INSERT INTO commission_import_builds (commission_import_build_id, build_date) VALUES (3, '2011-01-03');

CREATE TABLE commission_import_commission_junction
(
  commission_import_commission_junction_id serial NOT NULL,
  member_id integer,
  site_id integer,
  action_status character varying NOT NULL,
  action_type character varying NOT NULL,
  ad_id bigint,
  commission_id bigint NOT NULL,
  country character varying,
  event_date timestamp with time zone NOT NULL,
  locking_date timestamp with time zone,
  order_id character varying NOT NULL,
  original boolean,
  original_action_id bigint NOT NULL,
  posting_date timestamp with time zone NOT NULL,
  website_id bigint NOT NULL,
  advertiser_name character varying,
  commission_amount numeric(19,2) NOT NULL,
  sale_amount numeric(19,2) NOT NULL,
  aggregator_affiliate_id integer NOT NULL,
  is_processed boolean NOT NULL DEFAULT false,
  created_date timestamp with time zone NOT NULL DEFAULT now(),
  member_transaction_id integer,
  commission_import_build_id integer NOT NULL,
  CONSTRAINT pkey_commission_import_commission_junction_commission_import_co PRIMARY KEY (commission_import_commission_junction_id),
  CONSTRAINT fk_commission_import_commission_junction_commission_import_buil FOREIGN KEY (commission_import_build_id)
      REFERENCES commission_import_builds (commission_import_build_id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION
);
CREATE INDEX idx_commission_import_commission_junction_is_processed
  ON commission_import_commission_junction
  USING btree
  (is_processed);
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
  (1, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 1, '2011-02-05');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
  (2, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 2, '2011-02-06');
INSERT INTO commission_import_commission_junction (commission_import_commission_junction_id, action_status, action_type, commission_id, event_date, order_id, original_action_id, posting_date, website_id, commission_amount, sale_amount, aggregator_affiliate_id, commission_import_build_id, created_date) VALUES
  (3, 'new', 'sale', 1234, '2011-02-04 14:39:52.989499-07', 'test-order', 1234567, '2011-02-04 14:39:52.989499-07', 123, 12.35, 123.45, 9876, 3, '2011-02-07');
SELECT * FROM commission_import_commission_junction;

CREATE TABLE commission_import_commission_junction_line_items
(
  commission_import_commission_junction_line_item_id serial NOT NULL,
  commission_import_commission_junction_id integer NOT NULL,
  sku character varying,
  quantity integer,
  posting_date timestamp with time zone,
  sale_amount numeric(19,2),
  discount numeric(19,2),
  CONSTRAINT pkey_commission_import_commission_junction_link_items_commissio PRIMARY KEY (commission_import_commission_junction_line_item_id),
  CONSTRAINT fkey_commission_import_commission_junction_line_items_commissio FOREIGN KEY (commission_import_commission_junction_id)
      REFERENCES commission_import_commission_junction (commission_import_commission_junction_id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION
);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (1, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (2, 'test3', 3, 32.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test1', 3, 23.45);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test2', 3, 67.50);
INSERT INTO commission_import_commission_junction_line_items (commission_import_commission_junction_id, sku, quantity, sale_amount) VALUES (3, 'test3', 3, 32.50);

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

云醉月微眠 2024-10-22 20:56:06

提醒我直接营销邮件列表中的重复消除

无论表的详细信息如何,父子重复消除算法都会遵循以下步骤:

1) 将重复项放入与旧密钥与新密钥相匹配的列表中(临时表)

2) 更新子表中的外键

3)从父表中删除重复项

我很欣赏您帖子中的细节,但我将通过一些示例表/列名称使其保持简单且易于阅读:

-- step 1, get the list
-- Warning: t-sql syntax, adjust for Postgres
--          if it doesn't like placement of "into..." clause
select keep.primaryKey as keepKey
     , dupe.primaryKey as dupeKey
  into #DupeList
  from (
        select min(primaryKey) as primaryKey
             , dupeCriteria1
             , dupeCriteria2
          FROM theTable
         group by dupeCriteria1,dupeCritera2
        having count(*) > 1
       ) keep
  JOIN theTable dupe
    ON keep.dupeCriteria1 = dupe.dupeCriteria1
   AND keep.dupeCriteria2 = dupe.dupeCriteria2
   AND keep.primaryKey   <> dupe.primaryKey

一旦有了,请更新子表中的外键:

update childTable
   set foreignKey = #temp1.keepKey
  from #temp1
 where foreignKey = #temp1.dupeKey

然后删除父表中的所有内容:

delete from parentTable
 where primaryKey in (select dupeKey from #temp1)

Reminds me of duplicate elimination in direct marketing mailing lists

Regardless of the details of your tables, a parent-child dupe elimination algorithm follows these steps:

1) Get duplicates into a list that matches old key to new key (temp table)

2) Update the foreign key in the child table

3) Delete the dupes from the parent

I admire the detail in your post, but I'm going to keep it simple and easier to read with some example table/column names:

-- step 1, get the list
-- Warning: t-sql syntax, adjust for Postgres
--          if it doesn't like placement of "into..." clause
select keep.primaryKey as keepKey
     , dupe.primaryKey as dupeKey
  into #DupeList
  from (
        select min(primaryKey) as primaryKey
             , dupeCriteria1
             , dupeCriteria2
          FROM theTable
         group by dupeCriteria1,dupeCritera2
        having count(*) > 1
       ) keep
  JOIN theTable dupe
    ON keep.dupeCriteria1 = dupe.dupeCriteria1
   AND keep.dupeCriteria2 = dupe.dupeCriteria2
   AND keep.primaryKey   <> dupe.primaryKey

Once you have that, update the foreign key in the child table:

update childTable
   set foreignKey = #temp1.keepKey
  from #temp1
 where foreignKey = #temp1.dupeKey

Then just delete everything out of the parent table:

delete from parentTable
 where primaryKey in (select dupeKey from #temp1)
舟遥客 2024-10-22 20:56:06
CREATE FUNCTION removeCommissionImportCommissionJunctionDuplicates() RETURNS INT AS $BODY$ DECLARE duplicate RECORD; DECLARE parent RECORD; DECLARE children commission_import_commission_junction_line_items[]; DECLARE duplicate_children commission_import_commission_junction_line_items[]; DECLARE duplicate_child_count INT; DECLARE child commission_import_commission_junction_line_items; DECLARE duplicate_child commission_import_commission_junction_line_items; DECLARE num_updates INT; BEGIN
    SELECT * FROM (SELECT 0) AS value INTO num_updates;
    FOR duplicate IN
        SELECT cicj.*, dups.first_date
        FROM commission_import_commission_junction cicj
        JOIN (SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
            FROM commission_import_commission_junction inner_imports
            GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
            HAVING (COUNT(*) > 1)) AS dups
        ON (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
        IS NOT DISTINCT FROM
        (dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
        WHERE cicj.created_date != dups.first_date
        AND cicj.is_processed = FALSE
    LOOP
        --RAISE NOTICE 'Looping';
        -- We need to collect the parent and children of the original record.
        -- Get the parent of the original
        SELECT * 
        FROM commission_import_commission_junction cicj 
        WHERE (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
        IS NOT DISTINCT FROM
        (duplicate.member_id, duplicate.site_id, duplicate.action_status, duplicate.action_type, duplicate.ad_id, duplicate.commission_id, duplicate.country, duplicate.event_date, duplicate.locking_date, duplicate.order_id, duplicate.original, duplicate.original_action_id, duplicate.posting_date, duplicate.website_id, duplicate.advertiser_name, duplicate.commission_amount, duplicate.sale_amount, duplicate.aggregator_affiliate_id)
        AND cicj.created_date = duplicate.first_date
        INTO parent;
        -- Get the children of the original
        children := ARRAY(
            SELECT cicjli
            FROM commission_import_commission_junction_line_items cicjli
            WHERE cicjli.commission_import_commission_junction_id
= parent.commission_import_commission_junction_id);
        --RAISE NOTICE 'parent: %', parent;
        --RAISE NOTICE 'children: %', children;
        -- Now get the duplicates children
        duplicate_children := ARRAY(
            SELECT cicjli
            FROM commission_import_commission_junction_line_items cicjli
            WHERE cicjli.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id);
        --RAISE NOTICE 'duplicate_children: %', duplicate_children;
        -- Next, compare the children of the duplicate to the children of the original parent.
        -- First compare size
        IF array_upper(children, 1) = array_upper(duplicate_children, 1) THEN
            --RAISE NOTICE 'Same number of children in duplicate as in parent';
            -- Now compare each set
            SELECT * FROM (SELECT 0) AS value INTO duplicate_child_count;
            FOR child_index IN array_lower(children, 1) .. array_upper(children, 1) LOOP
                child := children[child_index];
                FOR duplicate_child_index IN array_lower(duplicate_children, 1) .. array_upper(duplicate_children, 1) LOOP
                    duplicate_child := duplicate_children[duplicate_child_index];
                    IF (child.sku, child.quantity, child.posting_date, child.sale_amount, child.discount) IS NOT DISTINCT FROM (duplicate_child.sku, duplicate_child.quantity, duplicate_child.posting_date, duplicate_child.sale_amount, duplicate_child.discount) THEN
                        SELECT * FROM (SELECT duplicate_child_count + 1) AS value INTO duplicate_child_count;
                        EXIT;
                    END IF;
                END LOOP;
            END LOOP;
            --RAISE NOTICE 'Duplicate Child Count: %', duplicate_child_count;
            -- If we have the same number of duplicates as there are records
            IF duplicate_child_count = array_upper(duplicate_children, 1) THEN
                -- Update the duplicate record as processed.
                --RAISE NOTICE 'Marking duplicate % as is_processed', duplicate;
                UPDATE commission_import_commission_junction cicj SET is_processed = TRUE WHERE cicj.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id;
                SELECT * FROM (SELECT num_updates + 1) AS value INTO num_updates;
            END IF;
        END IF;
    END LOOP;
    --RAISE NOTICE 'Updates: %', num_updates;
    RETURN num_updates; END; $BODY$ LANGUAGE plpgsql;
CREATE FUNCTION removeCommissionImportCommissionJunctionDuplicates() RETURNS INT AS $BODY$ DECLARE duplicate RECORD; DECLARE parent RECORD; DECLARE children commission_import_commission_junction_line_items[]; DECLARE duplicate_children commission_import_commission_junction_line_items[]; DECLARE duplicate_child_count INT; DECLARE child commission_import_commission_junction_line_items; DECLARE duplicate_child commission_import_commission_junction_line_items; DECLARE num_updates INT; BEGIN
    SELECT * FROM (SELECT 0) AS value INTO num_updates;
    FOR duplicate IN
        SELECT cicj.*, dups.first_date
        FROM commission_import_commission_junction cicj
        JOIN (SELECT MIN(created_date) AS first_date, member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
            FROM commission_import_commission_junction inner_imports
            GROUP BY member_id, site_id, action_status, action_type, ad_id, commission_id, country, event_date, locking_date, order_id, original, original_action_id, posting_date, website_id, advertiser_name, commission_amount, sale_amount, aggregator_affiliate_id
            HAVING (COUNT(*) > 1)) AS dups
        ON (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
        IS NOT DISTINCT FROM
        (dups.member_id, dups.site_id, dups.action_status, dups.action_type, dups.ad_id, dups.commission_id, dups.country, dups.event_date, dups.locking_date, dups.order_id, dups.original, dups.original_action_id, dups.posting_date, dups.website_id, dups.advertiser_name, dups.commission_amount, dups.sale_amount, dups.aggregator_affiliate_id)
        WHERE cicj.created_date != dups.first_date
        AND cicj.is_processed = FALSE
    LOOP
        --RAISE NOTICE 'Looping';
        -- We need to collect the parent and children of the original record.
        -- Get the parent of the original
        SELECT * 
        FROM commission_import_commission_junction cicj 
        WHERE (cicj.member_id, cicj.site_id, cicj.action_status, cicj.action_type, cicj.ad_id, cicj.commission_id, cicj.country, cicj.event_date, cicj.locking_date, cicj.order_id, cicj.original, cicj.original_action_id, cicj.posting_date, cicj.website_id, cicj.advertiser_name, cicj.commission_amount, cicj.sale_amount, cicj.aggregator_affiliate_id)
        IS NOT DISTINCT FROM
        (duplicate.member_id, duplicate.site_id, duplicate.action_status, duplicate.action_type, duplicate.ad_id, duplicate.commission_id, duplicate.country, duplicate.event_date, duplicate.locking_date, duplicate.order_id, duplicate.original, duplicate.original_action_id, duplicate.posting_date, duplicate.website_id, duplicate.advertiser_name, duplicate.commission_amount, duplicate.sale_amount, duplicate.aggregator_affiliate_id)
        AND cicj.created_date = duplicate.first_date
        INTO parent;
        -- Get the children of the original
        children := ARRAY(
            SELECT cicjli
            FROM commission_import_commission_junction_line_items cicjli
            WHERE cicjli.commission_import_commission_junction_id
= parent.commission_import_commission_junction_id);
        --RAISE NOTICE 'parent: %', parent;
        --RAISE NOTICE 'children: %', children;
        -- Now get the duplicates children
        duplicate_children := ARRAY(
            SELECT cicjli
            FROM commission_import_commission_junction_line_items cicjli
            WHERE cicjli.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id);
        --RAISE NOTICE 'duplicate_children: %', duplicate_children;
        -- Next, compare the children of the duplicate to the children of the original parent.
        -- First compare size
        IF array_upper(children, 1) = array_upper(duplicate_children, 1) THEN
            --RAISE NOTICE 'Same number of children in duplicate as in parent';
            -- Now compare each set
            SELECT * FROM (SELECT 0) AS value INTO duplicate_child_count;
            FOR child_index IN array_lower(children, 1) .. array_upper(children, 1) LOOP
                child := children[child_index];
                FOR duplicate_child_index IN array_lower(duplicate_children, 1) .. array_upper(duplicate_children, 1) LOOP
                    duplicate_child := duplicate_children[duplicate_child_index];
                    IF (child.sku, child.quantity, child.posting_date, child.sale_amount, child.discount) IS NOT DISTINCT FROM (duplicate_child.sku, duplicate_child.quantity, duplicate_child.posting_date, duplicate_child.sale_amount, duplicate_child.discount) THEN
                        SELECT * FROM (SELECT duplicate_child_count + 1) AS value INTO duplicate_child_count;
                        EXIT;
                    END IF;
                END LOOP;
            END LOOP;
            --RAISE NOTICE 'Duplicate Child Count: %', duplicate_child_count;
            -- If we have the same number of duplicates as there are records
            IF duplicate_child_count = array_upper(duplicate_children, 1) THEN
                -- Update the duplicate record as processed.
                --RAISE NOTICE 'Marking duplicate % as is_processed', duplicate;
                UPDATE commission_import_commission_junction cicj SET is_processed = TRUE WHERE cicj.commission_import_commission_junction_id
= duplicate.commission_import_commission_junction_id;
                SELECT * FROM (SELECT num_updates + 1) AS value INTO num_updates;
            END IF;
        END IF;
    END LOOP;
    --RAISE NOTICE 'Updates: %', num_updates;
    RETURN num_updates; END; $BODY$ LANGUAGE plpgsql;
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文