如何解析CSV数据?

发布于 2024-09-17 12:55:46 字数 41 浏览 3 评论 0原文

在哪里可以找到一些 JavaScript 代码来解析 CSV 数据?

Where could I find some JavaScript code to parse CSV data?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(15

情魔剑神 2024-09-24 12:57:39

只需使用 .split(',')

var str = "How are you doing today?";
var n = str.split(" ");

Just use .split(','):

var str = "How are you doing today?";
var n = str.split(" ");
夏尔 2024-09-24 12:57:32

编辑为接受换行符作为 \r \n\r\n 作为单字节或双字节字符代码...不能找到了明确的答案,所以我就同意了。

function parseCSV(str) {
    let csv = [], pointer = 0, c = null, quote = 0, col = '', row = [];
    while (c = str.charAt(pointer)) {
        pointer++;
        if (c == '"') {
            quote++;
            // first and even numbered quotes are parsed, not used
            if (quote == 1 || quote%2 == 0) {
                continue;
            }
        } else if ([',','\r\n','\n','\r'].includes(c)) {
            // even quotes means a completed col and potential row
            if (quote%2 == 0) {
                row.push(col);
                col = '';
                quote = 0;
                // if delimiter is not a comma, also a completed row
                if (c != ',') {
                    // handle edge case of \r\n being two separate characters
                    if (c == '\r' && str.charAt(pointer) == '\n') {
                        pointer++;
                    }
                    csv.push(row);
                    row = [];
                }
                continue;
            }
        }
        col += c;
    }
    return csv;
}

Edited to accept newlines as \r \n and \r\n as either a single or double byte character code ... couldn't find a definitive answer on that so I rolled with it.

function parseCSV(str) {
    let csv = [], pointer = 0, c = null, quote = 0, col = '', row = [];
    while (c = str.charAt(pointer)) {
        pointer++;
        if (c == '"') {
            quote++;
            // first and even numbered quotes are parsed, not used
            if (quote == 1 || quote%2 == 0) {
                continue;
            }
        } else if ([',','\r\n','\n','\r'].includes(c)) {
            // even quotes means a completed col and potential row
            if (quote%2 == 0) {
                row.push(col);
                col = '';
                quote = 0;
                // if delimiter is not a comma, also a completed row
                if (c != ',') {
                    // handle edge case of \r\n being two separate characters
                    if (c == '\r' && str.charAt(pointer) == '\n') {
                        pointer++;
                    }
                    csv.push(row);
                    row = [];
                }
                continue;
            }
        }
        col += c;
    }
    return csv;
}
空心空情空意 2024-09-24 12:57:20

我构建了这个 JavaScript 脚本来将字符串中的 CSV 解析为数组对象。我发现最好将整个 CSV 分解为行、字段并相应地处理它们。我认为这将使您可以轻松更改代码以满足您的需要。

    //
    //
    // CSV to object
    //
    //

    const new_line_char = '\n';
    const field_separator_char = ',';

    function parse_csv(csv_str) {

        var result = [];

        let line_end_index_moved = false;
        let line_start_index = 0;
        let line_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_str[csr_index];
        let found_new_line_char = get_new_line_char(csv_str);
        let in_quote = false;

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            csv_str = csv_str.split(found_new_line_char).join(new_line_char);
        }
        // Handle the last character is not \n
        if (csv_str[csv_str.length - 1] !== new_line_char) {
            csv_str += new_line_char;
        }

        while (csr_index < csv_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === new_line_char) {
                if (in_quote === false) {
                    if (line_end_index_moved && (line_start_index <= line_end_index)) {
                        result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
                        line_start_index = csr_index + 1;
                    } // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
                } // Else: just ignore because we are in a quote
            }
            csr_index++;
            cursor_val = csv_str[csr_index];
            line_end_index = csr_index;
            line_end_index_moved = true;
        }

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            let new_result = [];
            let curr_row;
            for (var i = 0; i < result.length; i++) {
                curr_row = [];
                for (var j = 0; j < result[i].length; j++) {
                    curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
                }
                new_result.push(curr_row);
            }
            result = new_result;
        }
        return result;
    }

    function parse_csv_line(csv_line_str) {

        var result = [];

        //let field_end_index_moved = false;
        let field_start_index = 0;
        let field_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_line_str[csr_index];
        let in_quote = false;

        // Pretend that the last char is the separator_char to complete the loop
        csv_line_str += field_separator_char;

        while (csr_index < csv_line_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === field_separator_char) {
                if (in_quote === false) {
                    if (field_start_index <= field_end_index) {
                        result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
                        field_start_index = csr_index + 1;
                    } // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
                } // Else: just ignore because we are in quote
            }
            csr_index++;
            cursor_val = csv_line_str[csr_index];
            field_end_index = csr_index;
            field_end_index_moved = true;
        }
        return result;
    }

    function parse_csv_field(csv_field_str) {
        with_quote = (csv_field_str[0] === '"');

        if (with_quote) {
            csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
            csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
        }
        return csv_field_str;
    }

    // Initial method: check the first newline character only
    function get_new_line_char(csv_str) {
        if (csv_str.indexOf('\r\n') > -1) {
            return '\r\n';
        } else {
            return '\n'
        }
    }

I have constructed this JavaScript script to parse a CSV in string to array object. I find it better to break down the whole CSV into lines, fields and process them accordingly. I think that it will make it easy for you to change the code to suit your need.

    //
    //
    // CSV to object
    //
    //

    const new_line_char = '\n';
    const field_separator_char = ',';

    function parse_csv(csv_str) {

        var result = [];

        let line_end_index_moved = false;
        let line_start_index = 0;
        let line_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_str[csr_index];
        let found_new_line_char = get_new_line_char(csv_str);
        let in_quote = false;

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            csv_str = csv_str.split(found_new_line_char).join(new_line_char);
        }
        // Handle the last character is not \n
        if (csv_str[csv_str.length - 1] !== new_line_char) {
            csv_str += new_line_char;
        }

        while (csr_index < csv_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === new_line_char) {
                if (in_quote === false) {
                    if (line_end_index_moved && (line_start_index <= line_end_index)) {
                        result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
                        line_start_index = csr_index + 1;
                    } // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
                } // Else: just ignore because we are in a quote
            }
            csr_index++;
            cursor_val = csv_str[csr_index];
            line_end_index = csr_index;
            line_end_index_moved = true;
        }

        // Handle \r\n
        if (found_new_line_char == '\r\n') {
            let new_result = [];
            let curr_row;
            for (var i = 0; i < result.length; i++) {
                curr_row = [];
                for (var j = 0; j < result[i].length; j++) {
                    curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
                }
                new_result.push(curr_row);
            }
            result = new_result;
        }
        return result;
    }

    function parse_csv_line(csv_line_str) {

        var result = [];

        //let field_end_index_moved = false;
        let field_start_index = 0;
        let field_end_index = 0;
        let csr_index = 0;
        let cursor_val = csv_line_str[csr_index];
        let in_quote = false;

        // Pretend that the last char is the separator_char to complete the loop
        csv_line_str += field_separator_char;

        while (csr_index < csv_line_str.length) {
            if (cursor_val === '"') {
                in_quote = !in_quote;
            } else if (cursor_val === field_separator_char) {
                if (in_quote === false) {
                    if (field_start_index <= field_end_index) {
                        result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
                        field_start_index = csr_index + 1;
                    } // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
                } // Else: just ignore because we are in quote
            }
            csr_index++;
            cursor_val = csv_line_str[csr_index];
            field_end_index = csr_index;
            field_end_index_moved = true;
        }
        return result;
    }

    function parse_csv_field(csv_field_str) {
        with_quote = (csv_field_str[0] === '"');

        if (with_quote) {
            csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
            csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
        }
        return csv_field_str;
    }

    // Initial method: check the first newline character only
    function get_new_line_char(csv_str) {
        if (csv_str.indexOf('\r\n') > -1) {
            return '\r\n';
        } else {
            return '\n'
        }
    }
断舍离 2024-09-24 12:57:06

Edit1(2024)

现在 deno std 托管在 https://jsr.io/ ,可以从 esm.sh 使用它

import { parse } from "https://esm.sh/jsr/@std/csv";

原始答案

我个人喜欢使用 deno std 库,因为大多数模块都与浏览器正式兼容

问题是 std 在打字稿中,但官方解决方案可能会发生将来 https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728

目前有是一个积极维护的动态转译器 https://bundle.deno.dev/

因此您可以使用它就像这样

<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/[email protected]/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>

,你也可以直接供应它

curl https://bundle.deno.dev/https://deno.land/[email protected]/encoding/csv.ts --output src/csv.js

Edit1(2024)

Now deno std is hosted on https://jsr.io/, its possible to use it from esm.sh

import { parse } from "https://esm.sh/jsr/@std/csv";

Original Answer

Personally I like to use deno std library since most modules are officially compatible with the browser

The problem is that the std is in typescript but official solution might happen in the future https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728

For now there is an actively maintained on the fly transpiler https://bundle.deno.dev/

so you can use it simply like this

<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/[email protected]/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>

and you can also just vendor it

curl https://bundle.deno.dev/https://deno.land/[email protected]/encoding/csv.ts --output src/csv.js
酒几许 2024-09-24 12:57:00

只是把这个扔在那里......我最近遇到需要用 Javascript 解析 CSV 列,我选择了我自己的简单解决方案。它适合我的需要,并且可能对其他人有帮助。

const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';

const parseCSV = text => {
  const lines = text.split('\n');
  const output = [];

  lines.forEach(line => {
      line = line.trim();

      if (line.length === 0) return;

      const skipIndexes = {};
      const columns = line.split(',');

      output.push(columns.reduce((result, item, index) => {
          if (skipIndexes[index]) return result;

          if (item.startsWith('"') && !item.endsWith('"')) {
              while (!columns[index + 1].endsWith('"')) {
                  index++;
                  item += `,${columns[index]}`;
                  skipIndexes[index] = true;
              }

              index++;
              skipIndexes[index] = true;
              item += `,${columns[index]}`;
          }

          result.push(item);
          return result;
      }, []));
  });

  return output;
};

console.log(parseCSV(csvString));

Just throwing this out there.. I recently ran into the need to parse CSV columns with Javascript, and I opted for my own simple solution. It works for my needs, and may help someone else.

const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';

const parseCSV = text => {
  const lines = text.split('\n');
  const output = [];

  lines.forEach(line => {
      line = line.trim();

      if (line.length === 0) return;

      const skipIndexes = {};
      const columns = line.split(',');

      output.push(columns.reduce((result, item, index) => {
          if (skipIndexes[index]) return result;

          if (item.startsWith('"') && !item.endsWith('"')) {
              while (!columns[index + 1].endsWith('"')) {
                  index++;
                  item += `,${columns[index]}`;
                  skipIndexes[index] = true;
              }

              index++;
              skipIndexes[index] = true;
              item += `,${columns[index]}`;
          }

          result.push(item);
          return result;
      }, []));
  });

  return output;
};

console.log(parseCSV(csvString));

心欲静而疯不止 2024-09-24 12:56:54

正则表达式来救援!这几行代码根据 RFC 4180 标准处理带有嵌入逗号、引号和换行符的正确引用字段。

function parseCsv(data, fieldSep, newLine) {
    fieldSep = fieldSep || ',';
    newLine = newLine || '\n';
    var nSep = '\x1D';
    var qSep = '\x1E';
    var cSep = '\x1F';
    var nSepRe = new RegExp(nSep, 'g');
    var qSepRe = new RegExp(qSep, 'g');
    var cSepRe = new RegExp(cSep, 'g');
    var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
    var grid = [];
    data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
        return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
    }).split(/\n/).forEach(function(line) {
        var row = line.split(fieldSep).map(function(cell) {
            return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
        });
        grid.push(row);
    });
    return grid;
}

const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ',';      // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n' 
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]

您不需要解析器生成器,例如 lex/yacc。由于正向查找、负向查找和正向查找,正则表达式可以正确处理 RFC 4180。

克隆/下载代码 https://github.com/peterthoeny/parse-csv-js

Regular expressions to the rescue! These few lines of code handle properly quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.

function parseCsv(data, fieldSep, newLine) {
    fieldSep = fieldSep || ',';
    newLine = newLine || '\n';
    var nSep = '\x1D';
    var qSep = '\x1E';
    var cSep = '\x1F';
    var nSepRe = new RegExp(nSep, 'g');
    var qSepRe = new RegExp(qSep, 'g');
    var cSepRe = new RegExp(cSep, 'g');
    var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
    var grid = [];
    data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
        return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
    }).split(/\n/).forEach(function(line) {
        var row = line.split(fieldSep).map(function(cell) {
            return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
        });
        grid.push(row);
    });
    return grid;
}

const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ',';      // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n' 
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]

You don't need a parser-generator such as lex/yacc. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.

Clone/download code at https://github.com/peterthoeny/parse-csv-js

把时间冻结 2024-09-24 12:56:48

我不知道为什么我无法获取 Kirtan 的示例< /a> 为我工作。它似乎在空字段或带有尾随逗号的字段上失败了......

这个似乎可以处理这两个问题。

我没有编写解析器代码,只是对解析器函数进行了包装以使其适用于文件。参见归属。

    var Strings = {
        /**
         * Wrapped CSV line parser
         * @param s      String delimited CSV string
         * @param sep    Separator override
         * @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
         */
        parseCSV : function(s,sep) {
            // http://stackoverflow.com/questions/1155678/javascript-string-newline-character
            var universalNewline = /\r\n|\r|\n/g;
            var a = s.split(universalNewline);
            for(var i in a){
                for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
                    if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
                        if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
                            f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
                          } else if (x) {
                        f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
                      } else f = f.shift().split(sep).concat(f);
                    } else f[x].replace(/""/g, '"');
                  } a[i] = f;
        }
        return a;
        }
    }

I'm not sure why I couldn't get Kirtan's example to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...

This one seems to handle both.

I did not write the parser code, just a wrapper around the parser function to make this work for a file. See attribution.

    var Strings = {
        /**
         * Wrapped CSV line parser
         * @param s      String delimited CSV string
         * @param sep    Separator override
         * @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
         */
        parseCSV : function(s,sep) {
            // http://stackoverflow.com/questions/1155678/javascript-string-newline-character
            var universalNewline = /\r\n|\r|\n/g;
            var a = s.split(universalNewline);
            for(var i in a){
                for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
                    if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
                        if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
                            f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
                          } else if (x) {
                        f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
                      } else f = f.shift().split(sep).concat(f);
                    } else f[x].replace(/""/g, '"');
                  } a[i] = f;
        }
        return a;
        }
    }
想挽留 2024-09-24 12:56:44

这是我的简单 JavaScript 代码:

let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))

function splitQuotes(line) {
  if(line.indexOf('"') < 0) 
    return line.split(',')

  let result = [], cell = '', quote = false;
  for(let i = 0; i < line.length; i++) {
    char = line[i]
    if(char == '"' && line[i+1] == '"') {
      cell += char
      i++
    } else if(char == '"') {
      quote = !quote;
    } else if(!quote && char == ',') {
      result.push(cell)
      cell = ''
    } else {
      cell += char
    }
    if ( i == line.length-1 && cell) {
      result.push(cell)
    }
  }
  return result
}

Here's my simple vanilla JavaScript code:

let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))

function splitQuotes(line) {
  if(line.indexOf('"') < 0) 
    return line.split(',')

  let result = [], cell = '', quote = false;
  for(let i = 0; i < line.length; i++) {
    char = line[i]
    if(char == '"' && line[i+1] == '"') {
      cell += char
      i++
    } else if(char == '"') {
      quote = !quote;
    } else if(!quote && char == ',') {
      result.push(cell)
      cell = ''
    } else {
      cell += char
    }
    if ( i == line.length-1 && cell) {
      result.push(cell)
    }
  }
  return result
}
苏大泽ㄣ 2024-09-24 12:56:39

这是另一个解决方案。这使用:

  • 用于分割 CSV 字符串(包括周围引号和尾随逗号)的粗粒
  • 度正则表达式 用于清理周围引号和尾随逗号的细粒度正则表达式
  • ,还具有区分字符串、数字、布尔值和 null 的类型校正 代码

对于以下输入字符串:

"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,

输出:

[
  "This is, a value",
  "Hello",
  4,
  -123,
  3.1415,
  "This is also, possible",
  true,
  null
]

这是我在可运行代码片段中对 parseCSVLine() 的实现:

function parseCSVLine(text) {
  return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
    let m;
    if (m = text.match(/^\s*,?$/)) return null; // null value
    if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
    if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
    if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
    if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
    if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
    if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
    return text;
  } );
}

let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );

Here's another solution. This uses:

  • a coarse global regular expression for splitting the CSV string (which includes surrounding quotes and trailing commas)
  • fine-grained regular expression for cleaning up the surrounding quotes and trailing commas
  • also, has type correction differentiating strings, numbers, boolean values and null values

For the following input string:

"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,

The code outputs:

[
  "This is, a value",
  "Hello",
  4,
  -123,
  3.1415,
  "This is also, possible",
  true,
  null
]

Here's my implementation of parseCSVLine() in a runnable code snippet:

function parseCSVLine(text) {
  return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
    let m;
    if (m = text.match(/^\s*,?$/)) return null; // null value
    if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
    if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
    if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
    if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
    if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
    if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
    return text;
  } );
}

let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );

放血 2024-09-24 12:56:34

这是我的 PEG(.js) 语法,似乎在 RFC 4180 上表现良好(即它处理 处的示例http://en.wikipedia.org/wiki/Comma-separated_values):

start
  = [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }

line
  = first:field rest:("," text:field { return text; })*
    & { return !!first || rest.length; } // ignore blank lines
    { rest.unshift(first); return rest; }

field
  = '"' text:char* '"' { return text.join(''); }
  / text:[^\n\r,]* { return text.join(''); }

char
  = '"' '"' { return '"'; }
  / [^"]

http: //jsfiddle.net/knvzk/10http://pegjs.majda.cz/online。在 https://gist.github.com/3362830 下载生成的解析器。

Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):

start
  = [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }

line
  = first:field rest:("," text:field { return text; })*
    & { return !!first || rest.length; } // ignore blank lines
    { rest.unshift(first); return rest; }

field
  = '"' text:char* '"' { return text.join(''); }
  / text:[^\n\r,]* { return text.join(''); }

char
  = '"' '"' { return '"'; }
  / [^"]

Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.

红ご颜醉 2024-09-24 12:56:27

csvToArray v1.3

一个紧凑(645 字节)但兼容的函数,用于将 CSV 字符串转换为 2D 数组,符合 RFC4180 标准。

https://code.google.com/archive/p/csv- to-array/downloads

常见用法:jQuery

 $.ajax({
        url: "test.csv",
        dataType: 'text',
        cache: false
 }).done(function(csvAsString){
        csvAsArray=csvAsString.csvToArray();
 });

常见用法:JavaScript

csvAsArray = csvAsString.csvToArray();

覆盖字段分隔符

csvAsArray = csvAsString.csvToArray("|");

覆盖记录分隔符

csvAsArray = csvAsString.csvToArray("", "#");

覆盖跳过标题

csvAsArray = csvAsString.csvToArray("", "", 1);

覆盖全部

csvAsArray = csvAsString.csvToArray("|", "#", 1);

csvToArray v1.3

A compact (645 bytes), but compliant function to convert a CSV string into a 2D array, conforming to the RFC4180 standard.

https://code.google.com/archive/p/csv-to-array/downloads

Common Usage: jQuery

 $.ajax({
        url: "test.csv",
        dataType: 'text',
        cache: false
 }).done(function(csvAsString){
        csvAsArray=csvAsString.csvToArray();
 });

Common usage: JavaScript

csvAsArray = csvAsString.csvToArray();

Override field separator

csvAsArray = csvAsString.csvToArray("|");

Override record separator

csvAsArray = csvAsString.csvToArray("", "#");

Override Skip Header

csvAsArray = csvAsString.csvToArray("", "", 1);

Override all

csvAsArray = csvAsString.csvToArray("|", "#", 1);
_失温 2024-09-24 12:56:22

我有一个实现作为电子表格项目的一部分。

该代码尚未经过彻底测试,但欢迎任何人使用它。

正如一些答案所指出的,如果您确实拥有 DSV 或 TSV 文件,因为它们不允许使用值中的记录和字段分隔符。另一方面,CSV 实际上可以在字段内包含逗号和换行符,这会破坏大多数正则表达式和基于拆分的方法。

var CSV = {
    parse: function(csv, reviver) {
        reviver = reviver || function(r, c, v) { return v; };
        var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
        while (c < cc) {
            table.push(row = []);
            while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
                start = end = c;
                if ('"' === chars[c]){
                    start = end = ++c;
                    while (c < cc) {
                        if ('"' === chars[c]) {
                            if ('"' !== chars[c+1]) {
                                break;
                            }
                            else {
                                chars[++c] = ''; // unescape ""
                            }
                        }
                        end = ++c;
                    }
                    if ('"' === chars[c]) {
                        ++c;
                    }
                    while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
                        ++c;
                    }
                } else {
                    while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
                        end = ++c;
                    }
                }
                row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
                if (',' === chars[c]) {
                    ++c;
                }
            }
            if ('\r' === chars[c]) {
                ++c;
            }
            if ('\n' === chars[c]) {
                ++c;
            }
        }
        return table;
    },

    stringify: function(table, replacer) {
        replacer = replacer || function(r, c, v) { return v; };
        var csv = '', c, cc, r, rr = table.length, cell;
        for (r = 0; r < rr; ++r) {
            if (r) {
                csv += '\r\n';
            }
            for (c = 0, cc = table[r].length; c < cc; ++c) {
                if (c) {
                    csv += ',';
                }
                cell = replacer(r, c, table[r][c]);
                if (/[,\r\n"]/.test(cell)) {
                    cell = '"' + cell.replace(/"/g, '""') + '"';
                }
                csv += (cell || 0 === cell) ? cell : '';
            }
        }
        return csv;
    }
};

I have an implementation as part of a spreadsheet project.

This code is not yet tested thoroughly, but anyone is welcome to use it.

As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand, can actually have commas and newlines inside a field, which breaks most regular expression and split-based approaches.

var CSV = {
    parse: function(csv, reviver) {
        reviver = reviver || function(r, c, v) { return v; };
        var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
        while (c < cc) {
            table.push(row = []);
            while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
                start = end = c;
                if ('"' === chars[c]){
                    start = end = ++c;
                    while (c < cc) {
                        if ('"' === chars[c]) {
                            if ('"' !== chars[c+1]) {
                                break;
                            }
                            else {
                                chars[++c] = ''; // unescape ""
                            }
                        }
                        end = ++c;
                    }
                    if ('"' === chars[c]) {
                        ++c;
                    }
                    while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
                        ++c;
                    }
                } else {
                    while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
                        end = ++c;
                    }
                }
                row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
                if (',' === chars[c]) {
                    ++c;
                }
            }
            if ('\r' === chars[c]) {
                ++c;
            }
            if ('\n' === chars[c]) {
                ++c;
            }
        }
        return table;
    },

    stringify: function(table, replacer) {
        replacer = replacer || function(r, c, v) { return v; };
        var csv = '', c, cc, r, rr = table.length, cell;
        for (r = 0; r < rr; ++r) {
            if (r) {
                csv += '\r\n';
            }
            for (c = 0, cc = table[r].length; c < cc; ++c) {
                if (c) {
                    csv += ',';
                }
                cell = replacer(r, c, table[r][c]);
                if (/[,\r\n"]/.test(cell)) {
                    cell = '"' + cell.replace(/"/g, '""') + '"';
                }
                csv += (cell || 0 === cell) ? cell : '';
            }
        }
        return csv;
    }
};
墨离汐 2024-09-24 12:56:17

这是一个极其简单的 CSV 解析器,它处理带逗号、换行符和转义双引号的引用字段。没有分割或正则表达式。它一次扫描输入字符串 1-2 个字符并构建一个数组。

http://jsfiddle.net/vHKYH/ 进行测试。

function parseCSV(str) {
    const arr = [];
    let quote = false;  // 'true' means we're inside a quoted field

    // Iterate over each character, keep track of current row and column (of the returned array)
    for (let row = 0, col = 0, c = 0; c < str.length; c++) {
        let cc = str[c], nc = str[c+1];        // Current character, next character
        arr[row] = arr[row] || [];             // Create a new row if necessary
        arr[row][col] = arr[row][col] || '';   // Create a new column (start with empty string) if necessary

        // If the current character is a quotation mark, and we're inside a
        // quoted field, and the next character is also a quotation mark,
        // add a quotation mark to the current column and skip the next character
        if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }

        // If it's just one quotation mark, begin/end quoted field
        if (cc == '"') { quote = !quote; continue; }

        // If it's a comma and we're not in a quoted field, move on to the next column
        if (cc == ',' && !quote) { ++col; continue; }

        // If it's a newline (CRLF) and we're not in a quoted field, skip the next character
        // and move on to the next row and move to column 0 of that new row
        if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }

        // If it's a newline (LF or CR) and we're not in a quoted field,
        // move on to the next row and move to column 0 of that new row
        if (cc == '\n' && !quote) { ++row; col = 0; continue; }
        if (cc == '\r' && !quote) { ++row; col = 0; continue; }

        // Otherwise, append the current character to the current column
        arr[row][col] += cc;
    }
    return arr;
}

Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or regular expression. It scans the input string 1-2 characters at a time and builds an array.

Test it at http://jsfiddle.net/vHKYH/.

function parseCSV(str) {
    const arr = [];
    let quote = false;  // 'true' means we're inside a quoted field

    // Iterate over each character, keep track of current row and column (of the returned array)
    for (let row = 0, col = 0, c = 0; c < str.length; c++) {
        let cc = str[c], nc = str[c+1];        // Current character, next character
        arr[row] = arr[row] || [];             // Create a new row if necessary
        arr[row][col] = arr[row][col] || '';   // Create a new column (start with empty string) if necessary

        // If the current character is a quotation mark, and we're inside a
        // quoted field, and the next character is also a quotation mark,
        // add a quotation mark to the current column and skip the next character
        if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }

        // If it's just one quotation mark, begin/end quoted field
        if (cc == '"') { quote = !quote; continue; }

        // If it's a comma and we're not in a quoted field, move on to the next column
        if (cc == ',' && !quote) { ++col; continue; }

        // If it's a newline (CRLF) and we're not in a quoted field, skip the next character
        // and move on to the next row and move to column 0 of that new row
        if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }

        // If it's a newline (LF or CR) and we're not in a quoted field,
        // move on to the next row and move to column 0 of that new row
        if (cc == '\n' && !quote) { ++row; col = 0; continue; }
        if (cc == '\r' && !quote) { ++row; col = 0; continue; }

        // Otherwise, append the current character to the current column
        arr[row][col] += cc;
    }
    return arr;
}
纵情客 2024-09-24 12:56:08

jQuery-CSV

这是一个 jQuery 插件,旨在作为用于将 CSV 解析为 JavaScript 数据的端到端解决方案。它处理 RFC 4180 中出现的每一个边缘情况,以及一些弹出的情况对于 Excel/Google 电子表格导出(即主要涉及空值),缺少规范。

示例:

曲目、艺术家、专辑、年份

危险,“Busta Rhymes”,“灾难来袭时”,1997

// Calling this
music = $.csv.toArrays(csv)

// Outputs...
[
  ["track", "artist", "album", "year"],
  ["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]

console.log(music[1][2]) // Outputs: 'When Disaster Strikes'

更新:

哦,是的,我还应该提到它是完全可配置的。

music = $.csv.toArrays(csv, {
  delimiter: "'", // Sets a custom value delimiter character
  separator: ';', // Sets a custom field separator character
});

更新 2:

现在它也可以与 Node.js 上的 jQuery 配合使用。因此,您可以选择使用同一库进行客户端或服务器端解析。

更新 3

自 Google 代码关闭以来,jquery-csv 已迁移到 GitHub

免责声明:我也是 jQuery-CSV 的作者。

jQuery-CSV

It's a jQuery plugin designed to work as an end-to-end solution for parsing CSV into JavaScript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google spreadsheet exports (i.e., mostly involving null values) that the specification is missing.

Example:

track,artist,album,year

Dangerous,'Busta Rhymes','When Disaster Strikes',1997

// Calling this
music = $.csv.toArrays(csv)

// Outputs...
[
  ["track", "artist", "album", "year"],
  ["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]

console.log(music[1][2]) // Outputs: 'When Disaster Strikes'

Update:

Oh yeah, I should also probably mention that it's completely configurable.

music = $.csv.toArrays(csv, {
  delimiter: "'", // Sets a custom value delimiter character
  separator: ';', // Sets a custom field separator character
});

Update 2:

It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same library.

Update 3:

Since the Google Code shutdown, jquery-csv has been migrated to GitHub.

Disclaimer: I am also the author of jQuery-CSV.

趁年轻赶紧闹 2024-09-24 12:56:01

您可以使用 CSVToArray() 本博客文章中提到的函数。

      console.log(CSVToArray(`"foo, the column",bar
2,3
"4, the value",5`));

      // ref: http://stackoverflow.com/a/1293163/2343
        // This will parse a delimited string into an array of
        // arrays. The default delimiter is the comma, but this
        // can be overriden in the second argument.
        function CSVToArray( strData, strDelimiter ){
            // Check to see if the delimiter is defined. If not,
            // then default to comma.
            strDelimiter = (strDelimiter || ",");
     
            // Create a regular expression to parse the CSV values.
            var objPattern = new RegExp(
                (
                    // Delimiters.
                    "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
     
                    // Quoted fields.
                    "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
     
                    // Standard fields.
                    "([^\"\\" + strDelimiter + "\\r\\n]*))"
                ),
                "gi"
                );
     
     
            // Create an array to hold our data. Give the array
            // a default empty first row.
            var arrData = [[]];
     
            // Create an array to hold our individual pattern
            // matching groups.
            var arrMatches = null;
     
     
            // Keep looping over the regular expression matches
            // until we can no longer find a match.
            while (arrMatches = objPattern.exec( strData )){
     
                // Get the delimiter that was found.
                var strMatchedDelimiter = arrMatches[ 1 ];
     
                // Check to see if the given delimiter has a length
                // (is not the start of string) and if it matches
                // field delimiter. If id does not, then we know
                // that this delimiter is a row delimiter.
                if (
                    strMatchedDelimiter.length &&
                    strMatchedDelimiter !== strDelimiter
                    ){
     
                    // Since we have reached a new row of data,
                    // add an empty row to our data array.
                    arrData.push( [] );
     
                }
     
                var strMatchedValue;
     
                // Now that we have our delimiter out of the way,
                // let's check to see which kind of value we
                // captured (quoted or unquoted).
                if (arrMatches[ 2 ]){
     
                    // We found a quoted value. When we capture
                    // this value, unescape any double quotes.
                    strMatchedValue = arrMatches[ 2 ].replace(
                        new RegExp( "\"\"", "g" ),
                        "\""
                        );
     
                } else {
     
                    // We found a non-quoted value.
                    strMatchedValue = arrMatches[ 3 ];
     
                }
     
     
                // Now that we have our value string, let's add
                // it to the data array.
                arrData[ arrData.length - 1 ].push( strMatchedValue );
            }
     
            // Return the parsed data.
            return( arrData );
        }

You can use the CSVToArray() function mentioned in this blog entry.

      console.log(CSVToArray(`"foo, the column",bar
2,3
"4, the value",5`));

      // ref: http://stackoverflow.com/a/1293163/2343
        // This will parse a delimited string into an array of
        // arrays. The default delimiter is the comma, but this
        // can be overriden in the second argument.
        function CSVToArray( strData, strDelimiter ){
            // Check to see if the delimiter is defined. If not,
            // then default to comma.
            strDelimiter = (strDelimiter || ",");
     
            // Create a regular expression to parse the CSV values.
            var objPattern = new RegExp(
                (
                    // Delimiters.
                    "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
     
                    // Quoted fields.
                    "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
     
                    // Standard fields.
                    "([^\"\\" + strDelimiter + "\\r\\n]*))"
                ),
                "gi"
                );
     
     
            // Create an array to hold our data. Give the array
            // a default empty first row.
            var arrData = [[]];
     
            // Create an array to hold our individual pattern
            // matching groups.
            var arrMatches = null;
     
     
            // Keep looping over the regular expression matches
            // until we can no longer find a match.
            while (arrMatches = objPattern.exec( strData )){
     
                // Get the delimiter that was found.
                var strMatchedDelimiter = arrMatches[ 1 ];
     
                // Check to see if the given delimiter has a length
                // (is not the start of string) and if it matches
                // field delimiter. If id does not, then we know
                // that this delimiter is a row delimiter.
                if (
                    strMatchedDelimiter.length &&
                    strMatchedDelimiter !== strDelimiter
                    ){
     
                    // Since we have reached a new row of data,
                    // add an empty row to our data array.
                    arrData.push( [] );
     
                }
     
                var strMatchedValue;
     
                // Now that we have our delimiter out of the way,
                // let's check to see which kind of value we
                // captured (quoted or unquoted).
                if (arrMatches[ 2 ]){
     
                    // We found a quoted value. When we capture
                    // this value, unescape any double quotes.
                    strMatchedValue = arrMatches[ 2 ].replace(
                        new RegExp( "\"\"", "g" ),
                        "\""
                        );
     
                } else {
     
                    // We found a non-quoted value.
                    strMatchedValue = arrMatches[ 3 ];
     
                }
     
     
                // Now that we have our value string, let's add
                // it to the data array.
                arrData[ arrData.length - 1 ].push( strMatchedValue );
            }
     
            // Return the parsed data.
            return( arrData );
        }

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文