如何使用SAS中的xmlengine(xmlmapper)添加基于计数器的变量并输出到xml文件?

发布于 2024-11-06 13:35:59 字数 2214 浏览 8 评论 0原文

我有两个数据集,其中一些记录彼此相关。

例如

Dataset1

Var1

abcde

坏人

大爆炸

奇怪

每天

都很

Dataset2

var1

abc

cde

坏坏

人是一个

陌生人

现在我想要使用循环逻辑比较这些记录,这是我的代码。

%let id1=%sysfunc(open(dataset2,in)); %let colterm=%sysfunc(varnum(&id1,var1)); %do %while(%sysfunc(fetch(&id1)) eq 0); %let vterm=%sysfunc(getvarc(&id1,&colterm));

        data dataset1;
            set dataset1;
            if index(strip(var1),strip("&vterm"))>0 or index(strip("&vterm"),strip(var1))>0 then do;/*when one contains the other*/
            match="Fuzzy";
            cnt=cnt+1;
            end;
        run;
    %end;

    proc sql noprint;
      select max(cnt) into:maxnum/*to get max cnt*/
      from dataset1;
    quit;

现在 dataset1 如下所示

Var1 cnt ma​​tch

abcde 2 模糊

坏人 2 模糊

大爆炸 0

奇怪 1 模糊

日常 0

完全 0

我想合并将 dataset2 中的相关记录复制到 dataset1 中,新的 dataset1 应如下所示

Var1 cnt ma​​tch FM_dataset2_1 FM_dataset2_2

abcde 2 模糊 abc cde

坏人 2 模糊坏坏人

大爆炸 0

奇怪 1 模糊陌生人

每天 0

完全 0

正如你所看到的新变量 FM_dataset2_1 和 FM_dataset2_2 是基于一个计数器 cnt 的自动重新分配。但我只是想不出使用 SAS 代码实现此步骤的正确方法。

此外,我需要将数据集输出到 xml 文件中。结果应该如下所示。

<text>abcde</text>
<match>Fuzzy</match>
  <matchitem>abc</matchitem> 
  <matchitem>tecde</matchitem> 

与上面的问题一样,问题也是关于如何确定 matchitem 元素的数量并写入文件。在 xml 映射文件中,我可以如下确定位置,

     <COLUMN name="FM_dataset2_1">
            <PATH syntax="XPath">/../matchitem[position()=**1**]</PATH>
...

     <COLUMN name="FM_dataset2_2">
            <PATH syntax="XPath">/../matchitem[position()=**2**]</PATH>

但这必须根据情况手动完成。是否可以根据cnt计数器(maxnum)自动自定义地图文件?

有人可以建议吗?

I have two data sets in which some records are relevant to each other.

E.g.

Dataset1

Var1

abcde

bad man

big bang

strange

everyday

exactly

Dataset2

var1

abc

cde

bad

bad man a

stranger

Now I want to compare those records using a loop logic, and here is my code.

%let id1=%sysfunc(open(dataset2,in));
%let colterm=%sysfunc(varnum(&id1,var1));
%do %while(%sysfunc(fetch(&id1)) eq 0);
%let vterm=%sysfunc(getvarc(&id1,&colterm));

        data dataset1;
            set dataset1;
            if index(strip(var1),strip("&vterm"))>0 or index(strip("&vterm"),strip(var1))>0 then do;/*when one contains the other*/
            match="Fuzzy";
            cnt=cnt+1;
            end;
        run;
    %end;

    proc sql noprint;
      select max(cnt) into:maxnum/*to get max cnt*/
      from dataset1;
    quit;

Now dataset1 looks like below

Var1 cnt match

abcde 2 Fuzzy

bad man 2 Fuzzy

big bang 0

strange 1 Fuzzy

everyday 0

exactly 0

I want to merge those relevant records in dataset2 into dataset1, and the new dataset1 should look like below

Var1 cnt match FM_dataset2_1 FM_dataset2_2

abcde 2 Fuzzy abc cde

bad man 2 Fuzzy bad bad man a

big bang 0

strange 1 Fuzzy stranger

everyday 0

exactly 0

As you can see the new variables FM_dataset2_1 and FM_dataset2_2 are auto-reassigned ones based one counter, cnt. But I just couldn't think out a proper way of realizing this step using SAS code.

Further more, I need to output the dataset into an xml file. And the result should look like below

<text>abcde</text>
<match>Fuzzy</match>
  <matchitem>abc</matchitem> 
  <matchitem>tecde</matchitem> 

The problem, as with the issue above, is also about how to determine the number of matchitem element and write into the file. In xml map file, I can determine the position as follows

     <COLUMN name="FM_dataset2_1">
            <PATH syntax="XPath">/../matchitem[position()=**1**]</PATH>
...

     <COLUMN name="FM_dataset2_2">
            <PATH syntax="XPath">/../matchitem[position()=**2**]</PATH>

But this has to be done mannually case by case. Is it possible to customize map file based on cnt counter(maxnum) automatically?

Can anybody suggest?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

染年凉城似染瑾 2024-11-13 13:35:59

我确信有比下面更有效的代码,但我试图遵循您的思路。我不熟悉 XML 引擎的使用,所以我将把这部分留给其他人。否则,如果您需要手动创建它,那么您就在正确的轨道上创建了 MAXNUM 宏变量,那么您可以在循环中使用它。

%let id1=%sysfunc(open(dataset2,in));
%let colterm=%sysfunc(varnum(&id1,var1));
%do %while(%sysfunc(fetch(&id1)) eq 0);
%let vterm=%sysfunc(getvarc(&id1,&colterm));

   data dataset1;
      set dataset1;
      format vterm $20.;
      if match eq "Fuzzy" then output;
      if index(strip(var1),strip("&vterm"))>0 or index(strip("&vterm"),strip(var1))>0 then do;
         cnt=sum(cnt,1);
         match="Fuzzy";
         vterm = "&vterm";
         output;
      end;
      else do;
         cnt=sum(cnt,0);
         output;
      end;
   run;

   proc sort data=dataset1;
      by var1 match vterm descending cnt;
   proc sort data=dataset1 nodupkey;
      by var1 match vterm;
   run;
%end;

proc sql;
   create table maxcnt as
      select
         var1,
         match,
         max(cnt) as cnt
      from dataset1
      group by 1,2
      ;
   quit;
run;

proc transpose data=dataset1 out=dataset1(drop=FM_dataset2_0 _name_) prefix=FM_dataset2_;
   by var1 match;
   id cnt;
   var vterm;
run;

data dataset1;
   merge dataset1 maxcnt;
   by var1 match;
run;

%let id2=%sysfunc(close(&id1));    /*closes out dataset2 in case you need it later */

I'm sure there is more efficient code than the following, but I tried to stay with your line of thought. I am not familiar with working with the XML engine, so I'll leave that part to someone else. Otherwise, if you need to create it manually then you were on the right track creating the MAXNUM macro variable, then you can use it in a loop.

%let id1=%sysfunc(open(dataset2,in));
%let colterm=%sysfunc(varnum(&id1,var1));
%do %while(%sysfunc(fetch(&id1)) eq 0);
%let vterm=%sysfunc(getvarc(&id1,&colterm));

   data dataset1;
      set dataset1;
      format vterm $20.;
      if match eq "Fuzzy" then output;
      if index(strip(var1),strip("&vterm"))>0 or index(strip("&vterm"),strip(var1))>0 then do;
         cnt=sum(cnt,1);
         match="Fuzzy";
         vterm = "&vterm";
         output;
      end;
      else do;
         cnt=sum(cnt,0);
         output;
      end;
   run;

   proc sort data=dataset1;
      by var1 match vterm descending cnt;
   proc sort data=dataset1 nodupkey;
      by var1 match vterm;
   run;
%end;

proc sql;
   create table maxcnt as
      select
         var1,
         match,
         max(cnt) as cnt
      from dataset1
      group by 1,2
      ;
   quit;
run;

proc transpose data=dataset1 out=dataset1(drop=FM_dataset2_0 _name_) prefix=FM_dataset2_;
   by var1 match;
   id cnt;
   var vterm;
run;

data dataset1;
   merge dataset1 maxcnt;
   by var1 match;
run;

%let id2=%sysfunc(close(&id1));    /*closes out dataset2 in case you need it later */
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文