请有人能帮我优化这个程序，Oracle 10g

发布于 2025-01-06 10:43:19 字数 2028 浏览 1 评论 0原文

create or replace procedure prcdr_Clustering is
 v_sampleCount  number;
 v_sampleFlag   number;
 v_matchPercent number;
 v_SpendAmount  Number(18, 2);
 cursor cur_PDCSample is
 SELECT *
  FROM TBL_BIL
 WHERE UDF_CHK = 'N';       
rec_Pdcsample TBL_BIL%rowtype;
BEGIN
OPEN cur_PDCSample;
LOOP
FETCH cur_PDCSample
  into rec_Pdcsample;
EXIT WHEN cur_PDCSample%NOTFOUND;
SELECT COUNT(*)
  INTO v_sampleCount
  FROM TBL_BIL
 WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
IF v_sampleCount <> 0 THEN
  UPDATE TBL_BIL
     SET UDF_CHK = 'Y'
   WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
  IF v_sampleCount > 1 THEN
    v_sampleFlag := 1;
  ELSE
    IF v_sampleCount = 1 THEN
      v_sampleFlag := 2;
    ELSE
      v_sampleFlag := 0;
    END IF;
  END IF;
  UPDATE TBL_BIL
     SET UDF_SAMPLECOUNT = v_sampleCount, UDF_SAMPLEFLAG = v_sampleFlag
   WHERE uniqueid = rec_Pdcsample.uniqueid;
  UPDATE TBL_BIL
     SET UDF_PID = rec_Pdcsample.uniqueid
   WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
  UPDATE TBL_BIL
     SET UDF_PIDSPEND = v_SpendAmount
   WHERE uniqueid = rec_Pdcsample.uniqueid;
  UPDATE TBL_BIL
     SET UDF_MATCHPERCENT = 1
   WHERE uniqueid <> rec_Pdcsample.uniqueid
     AND UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
 END IF;
IF cur_PDCSample%ISOPEN THEN
   CLOSE cur_PDCSample;
 END IF;
OPEN cur_PDCSample;
END LOOP;
IF cur_PDCSample%ISOPEN THEN
 CLOSE cur_PDCSample;
END IF;
 end PrcdrClustering;

我花了几天时间来执行，我的表有 225,846 行数据。

我的表的结构是：-

UNIQUEID    NUMBER  Notnull primary key
VENDORNAME  VARCHAR2(200)               
SHORTTEXT   VARCHAR2(500)               
SPENDAMT    NUMBER(18,2)                
UDF_TOKENIZED   VARCHAR2(999)               
UDF_PID NUMBER(10)              
UDF_SAMPLEFLAG  NUMBER(4)               
UDF_SAMPLECOUNT NUMBER(4)               
UDF_MATCHPERCENT    NUMBER(4)               
UDF_TOKENCNT    NUMBER(4)               
UDF_PIDSPEND    NUMBER(18,2)                
UDF_CHK VARCHAR2(1)

原文

create or replace procedure prcdr_Clustering is
 v_sampleCount  number;
 v_sampleFlag   number;
 v_matchPercent number;
 v_SpendAmount  Number(18, 2);
 cursor cur_PDCSample is
 SELECT *
  FROM TBL_BIL
 WHERE UDF_CHK = 'N';       
rec_Pdcsample TBL_BIL%rowtype;
BEGIN
OPEN cur_PDCSample;
LOOP
FETCH cur_PDCSample
  into rec_Pdcsample;
EXIT WHEN cur_PDCSample%NOTFOUND;
SELECT COUNT(*)
  INTO v_sampleCount
  FROM TBL_BIL
 WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
IF v_sampleCount <> 0 THEN
  UPDATE TBL_BIL
     SET UDF_CHK = 'Y'
   WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
  IF v_sampleCount > 1 THEN
    v_sampleFlag := 1;
  ELSE
    IF v_sampleCount = 1 THEN
      v_sampleFlag := 2;
    ELSE
      v_sampleFlag := 0;
    END IF;
  END IF;
  UPDATE TBL_BIL
     SET UDF_SAMPLECOUNT = v_sampleCount, UDF_SAMPLEFLAG = v_sampleFlag
   WHERE uniqueid = rec_Pdcsample.uniqueid;
  UPDATE TBL_BIL
     SET UDF_PID = rec_Pdcsample.uniqueid
   WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
  UPDATE TBL_BIL
     SET UDF_PIDSPEND = v_SpendAmount
   WHERE uniqueid = rec_Pdcsample.uniqueid;
  UPDATE TBL_BIL
     SET UDF_MATCHPERCENT = 1
   WHERE uniqueid <> rec_Pdcsample.uniqueid
     AND UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
 END IF;
IF cur_PDCSample%ISOPEN THEN
   CLOSE cur_PDCSample;
 END IF;
OPEN cur_PDCSample;
END LOOP;
IF cur_PDCSample%ISOPEN THEN
 CLOSE cur_PDCSample;
END IF;
 end PrcdrClustering;

It takes me days to execute, my table has 225,846 rows of data.

The structure of my table is :-

UNIQUEID    NUMBER  Notnull primary key
VENDORNAME  VARCHAR2(200)               
SHORTTEXT   VARCHAR2(500)               
SPENDAMT    NUMBER(18,2)                
UDF_TOKENIZED   VARCHAR2(999)               
UDF_PID NUMBER(10)              
UDF_SAMPLEFLAG  NUMBER(4)               
UDF_SAMPLECOUNT NUMBER(4)               
UDF_MATCHPERCENT    NUMBER(4)               
UDF_TOKENCNT    NUMBER(4)               
UDF_PIDSPEND    NUMBER(18,2)                
UDF_CHK VARCHAR2(1)

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

爱，才寂寞 2025-01-13 10:43:19

从哪里开始？我有几点要说。

您正在进行批量更新；这意味着批量收集... forall 会更加高效。
您对同一个表进行多次更新，这会使 DML 量增加一倍。
由于您已经从表中进行了选择，因此重新输入它进行另一次计数是毫无意义的，请使用分析函数来获得您需要的结果。
缩进，缩进，缩进。使您的代码更易于阅读。
您可以使用 elsif 来减少要评估的语句数量（非常非常小的胜利）
如果 uniqueid 是唯一的，您可以使用 rowid更新表。
您将 udf_pidspend 更新为 null，无论这是有意还是无意，都无需为其进行单独更新。
您可以在光标中执行更多操作，但显然无需选择所有内容，这将减少需要从磁盘读取的数据量。
您可能需要在那里进行一些提交；但这意味着如果中途失败则无法回滚。
我希望 tbl_bil 在 uniqueid 上建立索引，
正如 GolzeTrol 指出的那样'多次重新打开光标。没有这个必要。

一般规则：

如果您要从表中选择/更新或删除，请尽可能执行一次，否则尽可能少执行。
如果您要进行批量操作，请使用批量收集。
永远不要写select *
尽可能使用 rowid 以避免所有索引问题。

这仅适用于11G，我回答这个问题最近我提供了自己在 11G 之前的版本中处理此实现限制的方法，并链接到Ollie's、Tom Kyte 和 Sathya's

我不完全确定你想在这里做什么，所以如果逻辑有点偏离，请原谅我。

create or replace procedure prcdr_Clustering is

   cursor c_pdcsample is
    select rowid as rid
         , count(*) over ( partition by udf_tokenized ) as samplecount
         , udf_chk
         , max(uniqueid) over ( partition by udf_tokenized ) as udf_pid
      from tbl_bil
     where udf_chk = 'N';       

   type t__pdcsample is table of c_pdcsample%rowtype index by binary_integer;
   t_pdcsample t__pdcsample;

begin

   open c_pdcsample;
   loop

      fetch c_pdcsample bulk collect into t_pdcsample limit 1000;

      exit when t_pdcsample.count = 0;

      if t_pdcsample.samplecount <> 0 then
         t_pdcsample.udf_chk := 'y';

         if t_pdcsample.samplecount > 1 then
            t_pdcsample.samplecount := 1;
         elsif t_pdcsample.samplecount = 1 then
            t_pdcsample.samplecount := 2;
         else
            t_pdcsample.samplecount := 0;
         end if;

      end if;

      forall i in t_pdcsample.first .. t_pdcsample.last
         update tbl_bil
            set udfsamplecount = t_pdcsample.samplecount
              , udf_sampleflag = t_pdcsample.sampleflag
              , udf_pidspend = null
              , udf_pid = t_pdcsample.udf_pid
          where rowid = t_pdcsample(i).rowid
                ;

      for i in t_pdcsample.first .. t_pdcsample.last loop
         update tbl_bil TBL_BIL
            set udfmatchpercent = 1
          where uniqueid <> t_pdcsample.uniqueid
            and udf_tokenized = t_pdcsample.udf_tokenized;
      end loop;

      commit ;

   end loop;
   close c_pdcsample;

end PrcdrClustering;
/

最后调用所有表tbl_... 有点不必要。

Where to start? I've a number points to make.

You're doing bulk updates; this implies that bulk collect ... forall would be far more efficient.
You're doing multiple updates of the same table, which doubles the amount of DML.
As you've already selected from the table, re-entering it to do another count is pretty pointless, use an analytic function to get the result you need.
Indentation, indentation, indentation. Makes your code much easier to read.
You can use elsif to reduce the amount of statements to be evaluated ( very, very minor win )
If the uniqueid is unique you can use rowid to update the table.
You're updating udf_pidspend to null, whether this is intentional or not there's no need to do a separate update for it.
You can do a lot more in the cursor, but there's obviously no need to select everything, which'll decrease the amount of data you need to read from the disks.
You may need a couple of commits in there; though this means you can't rollback if it fails midway.
I hope tbl_bil is indexed on uniqueid
As GolzeTrol noted you're opening the cursor multiple times. There's no need for this.

As general rules:

If you're going to select / update or delete from a table do it once if possible and as few times as possible if not.
If you're doing bulk operations use bulk collect.
Never write select *
Use rowid where possible it avoids all index problems.

This will only work in 11G, I answered this question recently where I provided my own way of dealing with this implementation restriction in versions prior to 11G and linked to Ollie's, Tom Kyte's and Sathya's

I'm not entirely certain what you're trying to do here so please forgive me if the logic is a little off.

create or replace procedure prcdr_Clustering is

   cursor c_pdcsample is
    select rowid as rid
         , count(*) over ( partition by udf_tokenized ) as samplecount
         , udf_chk
         , max(uniqueid) over ( partition by udf_tokenized ) as udf_pid
      from tbl_bil
     where udf_chk = 'N';       

   type t__pdcsample is table of c_pdcsample%rowtype index by binary_integer;
   t_pdcsample t__pdcsample;

begin

   open c_pdcsample;
   loop

      fetch c_pdcsample bulk collect into t_pdcsample limit 1000;

      exit when t_pdcsample.count = 0;

      if t_pdcsample.samplecount <> 0 then
         t_pdcsample.udf_chk := 'y';

         if t_pdcsample.samplecount > 1 then
            t_pdcsample.samplecount := 1;
         elsif t_pdcsample.samplecount = 1 then
            t_pdcsample.samplecount := 2;
         else
            t_pdcsample.samplecount := 0;
         end if;

      end if;

      forall i in t_pdcsample.first .. t_pdcsample.last
         update tbl_bil
            set udfsamplecount = t_pdcsample.samplecount
              , udf_sampleflag = t_pdcsample.sampleflag
              , udf_pidspend = null
              , udf_pid = t_pdcsample.udf_pid
          where rowid = t_pdcsample(i).rowid
                ;

      for i in t_pdcsample.first .. t_pdcsample.last loop
         update tbl_bil TBL_BIL
            set udfmatchpercent = 1
          where uniqueid <> t_pdcsample.uniqueid
            and udf_tokenized = t_pdcsample.udf_tokenized;
      end loop;

      commit ;

   end loop;
   close c_pdcsample;

end PrcdrClustering;
/

Lastly calling all tables tbl_... is a little bit unnecessary.

回复收藏 0 原文

扬花落满肩 2025-01-13 10:43:19

这是使用单个 SQL 语句的变体。我不能 100% 确定逻辑完全相同，但对于我的测试集来说，确实如此。此外，当您有多个 udf_chk = 'N' 和相同 udf_tokenized 的记录时，当前过程是不确定的...

这是重构的过程

SQL> create procedure prcdr_clustering_refactored
  2  is
  3  begin
  4    merge into tbl_bil t
  5    using ( select tb1.uniqueid
  6                 , count(*) over (partition by tb1.udf_tokenized) cnt
  7                 , max(decode(udf_chk,'N',uniqueid)) over (partition by tb1.udf_tokenized order by tb1.udf_chk) pid
  8              from tbl_bil tb1
  9             where udf_chk = 'N'
 10                or exists
 11                   ( select 'dummy'
 12                       from tbl_bil tb2
 13                      where tb2.udf_tokenized = tb1.udf_tokenized
 14                   )
 15          ) q
 16       on ( t.uniqueid = q.uniqueid )
 17     when matched then
 18          update
 19             set t.udf_samplecount = decode(t.udf_chk,'N',q.cnt,t.udf_samplecount)
 20               , t.udf_sampleflag = decode(t.udf_chk,'N',decode(q.cnt,1,2,1),t.udf_sampleflag)
 21               , t.udf_pid = q.pid
 22               , t.udf_pidspend = decode(t.udf_chk,'N',null,t.udf_pidspend)
 23               , t.udf_matchpercent = decode(t.udf_chk,'N',t.udf_matchpercent,1)
 24               , t.udf_chk = 'Y'
 25    ;
 26  end;
 27  /

Procedure created.

，这是一个测试：

SQL> select *
  2    from tbl_bil
  3   order by uniqueid
  4  /

UNIQUEID VENDORNAME SHORTTEXT  SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
       1 a          a                 1 bl               0              0               0                0            0            0 N
       2 a          a                 1 bla              0              0               0                0            0            0 N
       3 a          a                 1 bla              0              0               0                0            0            0 Y
       4 a          a                 1 bla              0              0               0                0            0            0 Y
       5 a          a                 1 bla              0              0               0                0            0            0 Y
       6 a          a                 1 blah             0              0               0                0            0            0 N
       7 a          a                 1 blah             0              0               0                0            0            0 Y
       8 a          a                 1 blah             0              0               0                0            0            0 Y
       9 a          a                 1 blah             0              0               0                0            0            0 Y
      10 a          a                 1 blah             0              0               0                0            0            0 Y
      11 a          a                 1 blah             0              0               0                0            0            0 Y

11 rows selected.

SQL> exec prcdr_clustering

PL/SQL procedure successfully completed.

SQL> select *
  2    from tbl_bil
  3   order by uniqueid
  4  /

UNIQUEID VENDORNAME SHORTTEXT  SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
       1 a          a                 1 bl               1              2               1                0            0              Y
       2 a          a                 1 bla              2              1               4                0            0              Y
       3 a          a                 1 bla              2              0               0                1            0            0 Y
       4 a          a                 1 bla              2              0               0                1            0            0 Y
       5 a          a                 1 bla              2              0               0                1            0            0 Y
       6 a          a                 1 blah             6              1               6                0            0              Y
       7 a          a                 1 blah             6              0               0                1            0            0 Y
       8 a          a                 1 blah             6              0               0                1            0            0 Y
       9 a          a                 1 blah             6              0               0                1            0            0 Y
      10 a          a                 1 blah             6              0               0                1            0            0 Y
      11 a          a                 1 blah             6              0               0                1            0            0 Y

11 rows selected.

SQL> rollback
  2  /

Rollback complete.

SQL> exec prcdr_clustering_refactored

PL/SQL procedure successfully completed.

SQL> select *
  2    from tbl_bil
  3   order by uniqueid
  4  /

UNIQUEID VENDORNAME SHORTTEXT  SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
       1 a          a                 1 bl               1              2               1                0            0              Y
       2 a          a                 1 bla              2              1               4                0            0              Y
       3 a          a                 1 bla              2              0               0                1            0            0 Y
       4 a          a                 1 bla              2              0               0                1            0            0 Y
       5 a          a                 1 bla              2              0               0                1            0            0 Y
       6 a          a                 1 blah             6              1               6                0            0              Y
       7 a          a                 1 blah             6              0               0                1            0            0 Y
       8 a          a                 1 blah             6              0               0                1            0            0 Y
       9 a          a                 1 blah             6              0               0                1            0            0 Y
      10 a          a                 1 blah             6              0               0                1            0            0 Y
      11 a          a                 1 blah             6              0               0                1            0            0 Y

11 rows selected.

问候，
抢。

Here is a variant using a single SQL statement. I'm not 100% certain that the logic is exactly the same, but for my test set, it is. Also the current procedure is non deterministic when you have more than one record with udf_chk = 'N' and the same udf_tokenized ...

This is the refactored procedure

SQL> create procedure prcdr_clustering_refactored
  2  is
  3  begin
  4    merge into tbl_bil t
  5    using ( select tb1.uniqueid
  6                 , count(*) over (partition by tb1.udf_tokenized) cnt
  7                 , max(decode(udf_chk,'N',uniqueid)) over (partition by tb1.udf_tokenized order by tb1.udf_chk) pid
  8              from tbl_bil tb1
  9             where udf_chk = 'N'
 10                or exists
 11                   ( select 'dummy'
 12                       from tbl_bil tb2
 13                      where tb2.udf_tokenized = tb1.udf_tokenized
 14                   )
 15          ) q
 16       on ( t.uniqueid = q.uniqueid )
 17     when matched then
 18          update
 19             set t.udf_samplecount = decode(t.udf_chk,'N',q.cnt,t.udf_samplecount)
 20               , t.udf_sampleflag = decode(t.udf_chk,'N',decode(q.cnt,1,2,1),t.udf_sampleflag)
 21               , t.udf_pid = q.pid
 22               , t.udf_pidspend = decode(t.udf_chk,'N',null,t.udf_pidspend)
 23               , t.udf_matchpercent = decode(t.udf_chk,'N',t.udf_matchpercent,1)
 24               , t.udf_chk = 'Y'
 25    ;
 26  end;
 27  /

Procedure created.

And here is a test:

SQL> select *
  2    from tbl_bil
  3   order by uniqueid
  4  /

UNIQUEID VENDORNAME SHORTTEXT  SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
       1 a          a                 1 bl               0              0               0                0            0            0 N
       2 a          a                 1 bla              0              0               0                0            0            0 N
       3 a          a                 1 bla              0              0               0                0            0            0 Y
       4 a          a                 1 bla              0              0               0                0            0            0 Y
       5 a          a                 1 bla              0              0               0                0            0            0 Y
       6 a          a                 1 blah             0              0               0                0            0            0 N
       7 a          a                 1 blah             0              0               0                0            0            0 Y
       8 a          a                 1 blah             0              0               0                0            0            0 Y
       9 a          a                 1 blah             0              0               0                0            0            0 Y
      10 a          a                 1 blah             0              0               0                0            0            0 Y
      11 a          a                 1 blah             0              0               0                0            0            0 Y

11 rows selected.

SQL> exec prcdr_clustering

PL/SQL procedure successfully completed.

SQL> select *
  2    from tbl_bil
  3   order by uniqueid
  4  /

UNIQUEID VENDORNAME SHORTTEXT  SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
       1 a          a                 1 bl               1              2               1                0            0              Y
       2 a          a                 1 bla              2              1               4                0            0              Y
       3 a          a                 1 bla              2              0               0                1            0            0 Y
       4 a          a                 1 bla              2              0               0                1            0            0 Y
       5 a          a                 1 bla              2              0               0                1            0            0 Y
       6 a          a                 1 blah             6              1               6                0            0              Y
       7 a          a                 1 blah             6              0               0                1            0            0 Y
       8 a          a                 1 blah             6              0               0                1            0            0 Y
       9 a          a                 1 blah             6              0               0                1            0            0 Y
      10 a          a                 1 blah             6              0               0                1            0            0 Y
      11 a          a                 1 blah             6              0               0                1            0            0 Y

11 rows selected.

SQL> rollback
  2  /

Rollback complete.

SQL> exec prcdr_clustering_refactored

PL/SQL procedure successfully completed.

SQL> select *
  2    from tbl_bil
  3   order by uniqueid
  4  /

UNIQUEID VENDORNAME SHORTTEXT  SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
       1 a          a                 1 bl               1              2               1                0            0              Y
       2 a          a                 1 bla              2              1               4                0            0              Y
       3 a          a                 1 bla              2              0               0                1            0            0 Y
       4 a          a                 1 bla              2              0               0                1            0            0 Y
       5 a          a                 1 bla              2              0               0                1            0            0 Y
       6 a          a                 1 blah             6              1               6                0            0              Y
       7 a          a                 1 blah             6              0               0                1            0            0 Y
       8 a          a                 1 blah             6              0               0                1            0            0 Y
       9 a          a                 1 blah             6              0               0                1            0            0 Y
      10 a          a                 1 blah             6              0               0                1            0            0 Y
      11 a          a                 1 blah             6              0               0                1            0            0 Y

11 rows selected.

Regards,
Rob.

回复收藏 0 原文