联合“表”用 awk

发布于 2024-12-10 14:13:04 字数 424 浏览 0 评论 0原文

我在一个文件中有多个“表”,例如:

col1, col2, col3, col4
1, 2, 3, 4
5, 6, 7, 8

col2, col3, col5
10, 11, 12
13, 14, 15

我想将这两个表折叠为:(

col1, col2, col3, col4, col5
1   , 2   , 3   , 4   , 
5   , 6   , 7   , 8   , 
    , 10  , 11  ,     , 12
    , 13  , 14  ,     , 15

注意:留下额外的空格只是为了让事情更容易理解)

这似乎需要至少 2 遍,一次到收集完整的列列表,并收集另一列以创建输出表。用 awk 可以做到这一点吗?如果没有,您会推荐什么其他工具?

I have multiple "tables" in a file, such as:

col1, col2, col3, col4
1, 2, 3, 4
5, 6, 7, 8

col2, col3, col5
10, 11, 12
13, 14, 15

And I would like to collapse these 2 tables to:

col1, col2, col3, col4, col5
1   , 2   , 3   , 4   , 
5   , 6   , 7   , 8   , 
    , 10  , 11  ,     , 12
    , 13  , 14  ,     , 15

(Note: extra whitespace left just to make things easier to understand)

This would seem to require at least 2 passes, one to collect the full list of columns, and another one to create the output table. Is it possible to do this with awk? If not, what other tool would you recommend?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

原谅过去的我 2024-12-17 14:13:05

尝试一下:

代码:

$ cat s.awk
NR==FNR{
    if (match($1, /^col/))
        maxIndex=(substr($NF,4,1)>maxIndex)?substr($NF,4,1):maxColumn
    next
}

FNR==1{
    for (i=1;i<=maxIndex;i++)
        header=(i==maxIndex)?header "col"i:header "col" i ", "
    print header
}

/^col[1-9]/{
    for (i in places)
        delete places[i]
    for (i=1;i<=NF;i++){
        n=substr($i,4,1)
        places[n]=i
    }
}

/^[0-9]/{
    s=""
    for (i=1;i<=maxIndex;i++)
        s=(i in places)? s $places[i] " " : s ", "
    print s
}

调用:

awk -f s.awk file file  | column -t

输出:

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4      ,
5,     6,     7,     8      ,
,      10,    11,    ,      12
,      13,    14,    ,      15

HTH Chris

give this a try:

Code:

$ cat s.awk
NR==FNR{
    if (match($1, /^col/))
        maxIndex=(substr($NF,4,1)>maxIndex)?substr($NF,4,1):maxColumn
    next
}

FNR==1{
    for (i=1;i<=maxIndex;i++)
        header=(i==maxIndex)?header "col"i:header "col" i ", "
    print header
}

/^col[1-9]/{
    for (i in places)
        delete places[i]
    for (i=1;i<=NF;i++){
        n=substr($i,4,1)
        places[n]=i
    }
}

/^[0-9]/{
    s=""
    for (i=1;i<=maxIndex;i++)
        s=(i in places)? s $places[i] " " : s ", "
    print s
}

Call with:

awk -f s.awk file file  | column -t

Output:

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4      ,
5,     6,     7,     8      ,
,      10,    11,    ,      12
,      13,    14,    ,      15

HTH Chris

优雅的叶子 2024-12-17 14:13:05

该代码假定表由空行分隔:

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
!NF { 
  fnr = NR + 1; next 
  }
NR == 1 || NR == fnr  {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' tables

如果表位于单独的文件中:

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
FNR == 1 {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' file1 file2 [.. filen] 

The code assumes that the tables are separated by empty lines:

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
!NF { 
  fnr = NR + 1; next 
  }
NR == 1 || NR == fnr  {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' tables

If you have the tables in separate files:

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
FNR == 1 {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' file1 file2 [.. filen] 
メ斷腸人バ 2024-12-17 14:13:05

这是一个一次性的 Perl 解决方案。它假设文件中每个表之间至少有一个空行。

perl -00 -ne '
    BEGIN {
        %column2idx = ();
        @idx2column = ();
        $lineno = 0;
        @lines = ();
    }

    chomp;
    @rows = split /\n/;

    @field_map = ();
    @F = split /, /, $rows[0];
    for ($i=0; $i < @F; $i++) {
        if (not exists $column2idx{$F[$i]}) {
            $idx = @idx2column;
            $column2idx{$F[$i]} = $idx;
            $idx2column[$idx] = $F[$i];
        }
        $field_map[$i] = $column2idx{$F[$i]};
    }

    for ($i=1; $i < @rows; $i++) {
        @{$lines[$lineno]} = ();
        @F = split /, /, $rows[$i];
        for ($j=0; $j < @F; $j++) {
            $lines[$lineno][$field_map[$j]] = $F[$j];
        }
        $lineno++;
    }

    END {
        $ncols = @idx2column;
        print join(", ", @idx2column), "\n";

        foreach $row (@lines) {
            @row = ();
            for ($i=0; $i < $ncols; $i++) {
                push @row, $row->[$i];
            }
            print join(", ", @row), "\n";
        }
    }
' tables | column -t

输出

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4,
5,     6,     7,     8,
,      10,    11,    ,      12
,      13,    14,    ,      15

Here's a one-pass perl solution. It assumes there is at least one blank line between each table in the file.

perl -00 -ne '
    BEGIN {
        %column2idx = ();
        @idx2column = ();
        $lineno = 0;
        @lines = ();
    }

    chomp;
    @rows = split /\n/;

    @field_map = ();
    @F = split /, /, $rows[0];
    for ($i=0; $i < @F; $i++) {
        if (not exists $column2idx{$F[$i]}) {
            $idx = @idx2column;
            $column2idx{$F[$i]} = $idx;
            $idx2column[$idx] = $F[$i];
        }
        $field_map[$i] = $column2idx{$F[$i]};
    }

    for ($i=1; $i < @rows; $i++) {
        @{$lines[$lineno]} = ();
        @F = split /, /, $rows[$i];
        for ($j=0; $j < @F; $j++) {
            $lines[$lineno][$field_map[$j]] = $F[$j];
        }
        $lineno++;
    }

    END {
        $ncols = @idx2column;
        print join(", ", @idx2column), "\n";

        foreach $row (@lines) {
            @row = ();
            for ($i=0; $i < $ncols; $i++) {
                push @row, $row->[$i];
            }
            print join(", ", @row), "\n";
        }
    }
' tables | column -t

output

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4,
5,     6,     7,     8,
,      10,    11,    ,      12
,      13,    14,    ,      15
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文