联合“表”用 awk

发布于 2024-12-10 14:13:04 字数 424 浏览 0 评论 0原文

我在一个文件中有多个“表”，例如：

col1, col2, col3, col4
1, 2, 3, 4
5, 6, 7, 8

col2, col3, col5
10, 11, 12
13, 14, 15

我想将这两个表折叠为：（

col1, col2, col3, col4, col5
1   , 2   , 3   , 4   , 
5   , 6   , 7   , 8   , 
    , 10  , 11  ,     , 12
    , 13  , 14  ,     , 15

注意：留下额外的空格只是为了让事情更容易理解）

这似乎需要至少 2 遍，一次到收集完整的列列表，并收集另一列以创建输出表。用 awk 可以做到这一点吗？如果没有，您会推荐什么其他工具？

原文

I have multiple "tables" in a file, such as:

col1, col2, col3, col4
1, 2, 3, 4
5, 6, 7, 8

col2, col3, col5
10, 11, 12
13, 14, 15

And I would like to collapse these 2 tables to:

col1, col2, col3, col4, col5
1   , 2   , 3   , 4   , 
5   , 6   , 7   , 8   , 
    , 10  , 11  ,     , 12
    , 13  , 14  ,     , 15

(Note: extra whitespace left just to make things easier to understand)

This would seem to require at least 2 passes, one to collect the full list of columns, and another one to create the output table. Is it possible to do this with awk? If not, what other tool would you recommend?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

原谅过去的我 2024-12-17 14:13:05

尝试一下：

代码：

$ cat s.awk
NR==FNR{
    if (match($1, /^col/))
        maxIndex=(substr($NF,4,1)>maxIndex)?substr($NF,4,1):maxColumn
    next
}

FNR==1{
    for (i=1;i<=maxIndex;i++)
        header=(i==maxIndex)?header "col"i:header "col" i ", "
    print header
}

/^col[1-9]/{
    for (i in places)
        delete places[i]
    for (i=1;i<=NF;i++){
        n=substr($i,4,1)
        places[n]=i
    }
}

/^[0-9]/{
    s=""
    for (i=1;i<=maxIndex;i++)
        s=(i in places)? s $places[i] " " : s ", "
    print s
}

调用：

awk -f s.awk file file  | column -t

输出：

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4      ,
5,     6,     7,     8      ,
,      10,    11,    ,      12
,      13,    14,    ,      15

HTH Chris

give this a try:

Code:

$ cat s.awk
NR==FNR{
    if (match($1, /^col/))
        maxIndex=(substr($NF,4,1)>maxIndex)?substr($NF,4,1):maxColumn
    next
}

FNR==1{
    for (i=1;i<=maxIndex;i++)
        header=(i==maxIndex)?header "col"i:header "col" i ", "
    print header
}

/^col[1-9]/{
    for (i in places)
        delete places[i]
    for (i=1;i<=NF;i++){
        n=substr($i,4,1)
        places[n]=i
    }
}

/^[0-9]/{
    s=""
    for (i=1;i<=maxIndex;i++)
        s=(i in places)? s $places[i] " " : s ", "
    print s
}

Call with:

awk -f s.awk file file  | column -t

Output:

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4      ,
5,     6,     7,     8      ,
,      10,    11,    ,      12
,      13,    14,    ,      15

HTH Chris

回复收藏 0 原文

优雅的叶子 2024-12-17 14:13:05

该代码假定表由空行分隔：

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
!NF { 
  fnr = NR + 1; next 
  }
NR == 1 || NR == fnr  {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' tables

如果表位于单独的文件中：

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
FNR == 1 {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' file1 file2 [.. filen]

The code assumes that the tables are separated by empty lines:

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
!NF { 
  fnr = NR + 1; next 
  }
NR == 1 || NR == fnr  {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' tables

If you have the tables in separate files:

awk -F', *' 'END {
  for (i = 0; ++i <= c;)
    printf "%s", (cols[i] (i < c ? OFS : RS))
  for (i = 0; ++i <= n;)
    for (j = 0; ++j <= c;)
      printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))    
  }
FNR == 1 {
 for (i = 0; ++i <= NF;) {
   _[$i]++ || cols[++c] = $i
   idx[i] = $i
   }
  next 
  }
{  
  ++n; for (i = 0; ++i <= NF;)
         vals[n, idx[i]] = $i
   }' OFS=', ' file1 file2 [.. filen]

回复收藏 0 原文

メ斷腸人バ 2024-12-17 14:13:05

这是一个一次性的 Perl 解决方案。它假设文件中每个表之间至少有一个空行。

perl -00 -ne '
    BEGIN {
        %column2idx = ();
        @idx2column = ();
        $lineno = 0;
        @lines = ();
    }

    chomp;
    @rows = split /\n/;

    @field_map = ();
    @F = split /, /, $rows[0];
    for ($i=0; $i < @F; $i++) {
        if (not exists $column2idx{$F[$i]}) {
            $idx = @idx2column;
            $column2idx{$F[$i]} = $idx;
            $idx2column[$idx] = $F[$i];
        }
        $field_map[$i] = $column2idx{$F[$i]};
    }

    for ($i=1; $i < @rows; $i++) {
        @{$lines[$lineno]} = ();
        @F = split /, /, $rows[$i];
        for ($j=0; $j < @F; $j++) {
            $lines[$lineno][$field_map[$j]] = $F[$j];
        }
        $lineno++;
    }

    END {
        $ncols = @idx2column;
        print join(", ", @idx2column), "\n";

        foreach $row (@lines) {
            @row = ();
            for ($i=0; $i < $ncols; $i++) {
                push @row, $row->[$i];
            }
            print join(", ", @row), "\n";
        }
    }
' tables | column -t

输出

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4,
5,     6,     7,     8,
,      10,    11,    ,      12
,      13,    14,    ,      15

Here's a one-pass perl solution. It assumes there is at least one blank line between each table in the file.

perl -00 -ne '
    BEGIN {
        %column2idx = ();
        @idx2column = ();
        $lineno = 0;
        @lines = ();
    }

    chomp;
    @rows = split /\n/;

    @field_map = ();
    @F = split /, /, $rows[0];
    for ($i=0; $i < @F; $i++) {
        if (not exists $column2idx{$F[$i]}) {
            $idx = @idx2column;
            $column2idx{$F[$i]} = $idx;
            $idx2column[$idx] = $F[$i];
        }
        $field_map[$i] = $column2idx{$F[$i]};
    }

    for ($i=1; $i < @rows; $i++) {
        @{$lines[$lineno]} = ();
        @F = split /, /, $rows[$i];
        for ($j=0; $j < @F; $j++) {
            $lines[$lineno][$field_map[$j]] = $F[$j];
        }
        $lineno++;
    }

    END {
        $ncols = @idx2column;
        print join(", ", @idx2column), "\n";

        foreach $row (@lines) {
            @row = ();
            for ($i=0; $i < $ncols; $i++) {
                push @row, $row->[$i];
            }
            print join(", ", @row), "\n";
        }
    }
' tables | column -t

output

col1,  col2,  col3,  col4,  col5
1,     2,     3,     4,
5,     6,     7,     8,
,      10,    11,    ,      12
,      13,    14,    ,      15

回复收藏 0 原文

~没有更多了~

关于作者

策马西风

暂无简介

0 文章

0 评论

23 人气

关注发私信

醉城メ夜风

文章 0 评论 0

关注

远昼

文章 0 评论 0

关注

平生欢

文章 0 评论 0

关注

微凉

文章 0 评论 0

关注

Honwey

文章 0 评论 0

关注

qq_ikhFfg

文章 0 评论 0

友情链接

文江博客

联合“表”用 awk

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（3）

关于作者

相关话题

热门标签

推荐作者

醉城メ夜风

远昼

平生欢

微凉

Honwey

qq_ikhFfg

友情链接

联合“表”用 awk

如果你对这篇内容有疑问，欢迎到本站社区发帖提问 参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（3）

关于作者

相关话题

热门标签

推荐作者

醉城メ夜风

远昼

平生欢

微凉

Honwey

qq_ikhFfg

友情链接

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。