需要帮助解析 filebeat json

发布于 2025-01-09 09:11:00 字数 5764 浏览 0 评论 0原文

我想将日志文件的每一行作为 json 文档发送到 elastic。
我有一个如下所示的日志文件:

{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:35:16.397023'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:35:16.397296'}
{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:39:58.357111'}
{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:39:58.357738'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:39:58.357904'}
{'client_id': 1, 'logger': 'instameister', 'event': '3', 'level': 'critical', 'date_created': '2022-02-23T11:39:58.358029'}
{'client_id': 1, 'logger': 'instameister_event', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358149'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '1', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358363'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '2', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358562'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '3', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358728'}
{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:41:00.466514'}
{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:41:00.466931'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:41:00.467042'}
{'client_id': 1, 'logger': 'instameister', 'event': '3', 'level': 'critical', 'date_created': '2022-02-23T11:41:00.467141'}

我的 filebeat 配置如下:

filebeat.inputs:
- type: log
  paths: /home/philip/Devel/InstaMeister/instameister.log
  json.keys_under_root: true
  json.overwrite_keys: true


output.logstash:
  hosts: ["219.34.99.125:5044"]

这是我的 Logstash 管道配置:

input {
  beats {
    port => 5044
  }
}

output {
  elasticsearch {
    hosts => ["http://10.136.95.164:9200"]
    user => "elastic"
    password => ""
    index => "instameister"
    manage_template => false
  }
  stdout { codec => json_lines }
}

它不会出现 filebeat 将密钥放在根文档下,因为当我的文件发送到弹性时,文档看起来像这:

{
  "_index": "instameister",
  "_id": "-6zaJn8BxuuGm2MUXt8x",
  "_version": 1,
  "_score": 1,
  "_source": {
    "message": "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}",
    "@timestamp": "2022-02-23T13:51:56.173Z",
    "json": {},
    "input": {
      "type": "log"
    },
    "host": {
      "name": "ThinkPad-T490"
    },
    "ecs": {
      "version": "8.0.0"
    },
    "tags": [
      "beats_input_codec_plain_applied"
    ],
    "event": {
      "original": "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    },
    "@version": "1",
    "agent": {
      "name": "ThinkPad-T490",
      "version": "8.0.0",
      "ephemeral_id": "7d63964b-eb3a-479c-8cb6-e2345e67dea9",
      "id": "001286a0-0ce8-4bf5-a9dc-41798923cae7",
      "type": "filebeat"
    },
    "log": {
      "file": {
        "path": "/home/philip/Devel/InstaMeister/instameister.log"
      },
      "offset": 21192
    }
  },
  "fields": {
    "agent.version.keyword": [
      "8.0.0"
    ],
    "input.type.keyword": [
      "log"
    ],
    "host.name.keyword": [
      "ThinkPad-T490"
    ],
    "tags.keyword": [
      "beats_input_codec_plain_applied"
    ],
    "agent.type": [
      "filebeat"
    ],
    "ecs.version.keyword": [
      "8.0.0"
    ],
    "@version": [
      "1"
    ],
    "agent.name": [
      "ThinkPad-T490"
    ],
    "host.name": [
      "ThinkPad-T490"
    ],
    "log.file.path.keyword": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.type.keyword": [
      "filebeat"
    ],
    "agent.ephemeral_id.keyword": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "event.original": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "agent.name.keyword": [
      "ThinkPad-T490"
    ],
    "agent.id.keyword": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "input.type": [
      "log"
    ],
    "@version.keyword": [
      "1"
    ],
    "log.offset": [
      21192
    ],
    "message": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "tags": [
      "beats_input_codec_plain_applied"
    ],
    "@timestamp": [
      "2022-02-23T13:51:56.173Z"
    ],
    "agent.id": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "ecs.version": [
      "8.0.0"
    ],
    "log.file.path": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "message.keyword": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "event.original.keyword": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "agent.ephemeral_id": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.version": [
      "8.0.0"
    ]
  }
}

我根本不关心 filebeat 元数据。我只想将日志文件中的每一行作为弹性中的单独文档。但我很难寻找如何实现这一目标。有人可以帮我吗?

I want to send each line of my log file as a json document to elastic.
I have a log file that looks like this:

{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:35:16.397023'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:35:16.397296'}
{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:39:58.357111'}
{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:39:58.357738'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:39:58.357904'}
{'client_id': 1, 'logger': 'instameister', 'event': '3', 'level': 'critical', 'date_created': '2022-02-23T11:39:58.358029'}
{'client_id': 1, 'logger': 'instameister_event', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358149'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '1', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358363'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '2', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358562'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '3', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358728'}
{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:41:00.466514'}
{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:41:00.466931'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:41:00.467042'}
{'client_id': 1, 'logger': 'instameister', 'event': '3', 'level': 'critical', 'date_created': '2022-02-23T11:41:00.467141'}

I have filebeat configured like this:

filebeat.inputs:
- type: log
  paths: /home/philip/Devel/InstaMeister/instameister.log
  json.keys_under_root: true
  json.overwrite_keys: true


output.logstash:
  hosts: ["219.34.99.125:5044"]

And here is my pipeline config for logstash:

input {
  beats {
    port => 5044
  }
}

output {
  elasticsearch {
    hosts => ["http://10.136.95.164:9200"]
    user => "elastic"
    password => ""
    index => "instameister"
    manage_template => false
  }
  stdout { codec => json_lines }
}

It doesnt apear filebeat is putting the keys under the root document because when my files are send to elastic the documents look like this:

{
  "_index": "instameister",
  "_id": "-6zaJn8BxuuGm2MUXt8x",
  "_version": 1,
  "_score": 1,
  "_source": {
    "message": "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}",
    "@timestamp": "2022-02-23T13:51:56.173Z",
    "json": {},
    "input": {
      "type": "log"
    },
    "host": {
      "name": "ThinkPad-T490"
    },
    "ecs": {
      "version": "8.0.0"
    },
    "tags": [
      "beats_input_codec_plain_applied"
    ],
    "event": {
      "original": "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    },
    "@version": "1",
    "agent": {
      "name": "ThinkPad-T490",
      "version": "8.0.0",
      "ephemeral_id": "7d63964b-eb3a-479c-8cb6-e2345e67dea9",
      "id": "001286a0-0ce8-4bf5-a9dc-41798923cae7",
      "type": "filebeat"
    },
    "log": {
      "file": {
        "path": "/home/philip/Devel/InstaMeister/instameister.log"
      },
      "offset": 21192
    }
  },
  "fields": {
    "agent.version.keyword": [
      "8.0.0"
    ],
    "input.type.keyword": [
      "log"
    ],
    "host.name.keyword": [
      "ThinkPad-T490"
    ],
    "tags.keyword": [
      "beats_input_codec_plain_applied"
    ],
    "agent.type": [
      "filebeat"
    ],
    "ecs.version.keyword": [
      "8.0.0"
    ],
    "@version": [
      "1"
    ],
    "agent.name": [
      "ThinkPad-T490"
    ],
    "host.name": [
      "ThinkPad-T490"
    ],
    "log.file.path.keyword": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.type.keyword": [
      "filebeat"
    ],
    "agent.ephemeral_id.keyword": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "event.original": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "agent.name.keyword": [
      "ThinkPad-T490"
    ],
    "agent.id.keyword": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "input.type": [
      "log"
    ],
    "@version.keyword": [
      "1"
    ],
    "log.offset": [
      21192
    ],
    "message": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "tags": [
      "beats_input_codec_plain_applied"
    ],
    "@timestamp": [
      "2022-02-23T13:51:56.173Z"
    ],
    "agent.id": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "ecs.version": [
      "8.0.0"
    ],
    "log.file.path": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "message.keyword": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "event.original.keyword": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "agent.ephemeral_id": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.version": [
      "8.0.0"
    ]
  }
}

I dont care at all about the filebeat metadata. I just want to have each line in my log file as a seperate document in elastic. But im having a hard time searching for how to achieve this. Can anyone help me out here?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

梦在深巷 2025-01-16 09:11:00

正如 @Val 建议的单引号,其中无效的 json。我更新了我的应用程序以将有效的 json(双引号)输出到日志文件,现在所有键都附加到“根”文档中。

{
  "_index": "instameister",
  "_id": "HqwUJ38BxuuGm2MUgOAA",
  "_version": 1,
  "_score": 1,
  "_source": {
    "@timestamp": "2022-02-23T14:55:26.510Z",
    "input": {
      "type": "log"
    },
    "host": {
      "name": "ThinkPad-T490"
    },
    "ecs": {
      "version": "8.0.0"
    },
    "tags": [
      "beats_input_raw_event"
    ],
    "event": "1",
    "level": "warning",
    "client_id": 1,
    "@version": "1",
    "logger": "instameister",
    "agent": {
      "name": "ThinkPad-T490",
      "version": "8.0.0",
      "ephemeral_id": "7d63964b-eb3a-479c-8cb6-e2345e67dea9",
      "id": "001286a0-0ce8-4bf5-a9dc-41798923cae7",
      "type": "filebeat"
    },
    "log": {
      "file": {
        "path": "/home/philip/Devel/InstaMeister/instameister.log"
      },
      "offset": 1488
    },
    "date_created": "2022-02-23T15:55:17.234136"
  },
  "fields": {
    "agent.version.keyword": [
      "8.0.0"
    ],
    "input.type.keyword": [
      "log"
    ],
    "host.name.keyword": [
      "ThinkPad-T490"
    ],
    "logger": [
      "instameister"
    ],
    "tags.keyword": [
      "beats_input_raw_event"
    ],
    "client_id": [
      1
    ],
    "agent.type": [
      "filebeat"
    ],
    "ecs.version.keyword": [
      "8.0.0"
    ],
    "@version": [
      "1"
    ],
    "agent.name": [
      "ThinkPad-T490"
    ],
    "host.name": [
      "ThinkPad-T490"
    ],
    "event": [
      "1"
    ],
    "log.file.path.keyword": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.type.keyword": [
      "filebeat"
    ],
    "event.keyword": [
      "1"
    ],
    "agent.ephemeral_id.keyword": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.name.keyword": [
      "ThinkPad-T490"
    ],
    "level": [
      "warning"
    ],
    "date_created": [
      "2022-02-23T15:55:17.234Z"
    ],
    "agent.id.keyword": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "input.type": [
      "log"
    ],
    "@version.keyword": [
      "1"
    ],
    "log.offset": [
      1488
    ],
    "tags": [
      "beats_input_raw_event"
    ],
    "@timestamp": [
      "2022-02-23T14:55:26.510Z"
    ],
    "agent.id": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "level.keyword": [
      "warning"
    ],
    "ecs.version": [
      "8.0.0"
    ],
    "logger.keyword": [
      "instameister"
    ],
    "log.file.path": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.ephemeral_id": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.version": [
      "8.0.0"
    ]
  }
}

我仍然想删除 filebeat 创建的冗余密钥。但我的主要问题现在已经解决了。
编辑:
我通过添加以下内容摆脱了额外的键:

processors:
  - drop_fields:
      fields: ["date_created", "ecs.version", "agent.version", "agent.type", "agent.id", "agent.name", "input.type", "log.file.path", "log.offset", "agent.ephemeral_id", "host.name"]

到我的 filebeat 配置

As @Val suggested the single quotes where not valid json. I updated my application to to output valid json(double quotes) to the log file and everything now the keys are appended to the "root" document.

{
  "_index": "instameister",
  "_id": "HqwUJ38BxuuGm2MUgOAA",
  "_version": 1,
  "_score": 1,
  "_source": {
    "@timestamp": "2022-02-23T14:55:26.510Z",
    "input": {
      "type": "log"
    },
    "host": {
      "name": "ThinkPad-T490"
    },
    "ecs": {
      "version": "8.0.0"
    },
    "tags": [
      "beats_input_raw_event"
    ],
    "event": "1",
    "level": "warning",
    "client_id": 1,
    "@version": "1",
    "logger": "instameister",
    "agent": {
      "name": "ThinkPad-T490",
      "version": "8.0.0",
      "ephemeral_id": "7d63964b-eb3a-479c-8cb6-e2345e67dea9",
      "id": "001286a0-0ce8-4bf5-a9dc-41798923cae7",
      "type": "filebeat"
    },
    "log": {
      "file": {
        "path": "/home/philip/Devel/InstaMeister/instameister.log"
      },
      "offset": 1488
    },
    "date_created": "2022-02-23T15:55:17.234136"
  },
  "fields": {
    "agent.version.keyword": [
      "8.0.0"
    ],
    "input.type.keyword": [
      "log"
    ],
    "host.name.keyword": [
      "ThinkPad-T490"
    ],
    "logger": [
      "instameister"
    ],
    "tags.keyword": [
      "beats_input_raw_event"
    ],
    "client_id": [
      1
    ],
    "agent.type": [
      "filebeat"
    ],
    "ecs.version.keyword": [
      "8.0.0"
    ],
    "@version": [
      "1"
    ],
    "agent.name": [
      "ThinkPad-T490"
    ],
    "host.name": [
      "ThinkPad-T490"
    ],
    "event": [
      "1"
    ],
    "log.file.path.keyword": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.type.keyword": [
      "filebeat"
    ],
    "event.keyword": [
      "1"
    ],
    "agent.ephemeral_id.keyword": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.name.keyword": [
      "ThinkPad-T490"
    ],
    "level": [
      "warning"
    ],
    "date_created": [
      "2022-02-23T15:55:17.234Z"
    ],
    "agent.id.keyword": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "input.type": [
      "log"
    ],
    "@version.keyword": [
      "1"
    ],
    "log.offset": [
      1488
    ],
    "tags": [
      "beats_input_raw_event"
    ],
    "@timestamp": [
      "2022-02-23T14:55:26.510Z"
    ],
    "agent.id": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "level.keyword": [
      "warning"
    ],
    "ecs.version": [
      "8.0.0"
    ],
    "logger.keyword": [
      "instameister"
    ],
    "log.file.path": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.ephemeral_id": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.version": [
      "8.0.0"
    ]
  }
}

I still would like to remove the redundant keys filebeat creates. But my main problem is solved now.
Edit:
I got rid of the extra keys by adding this:

processors:
  - drop_fields:
      fields: ["date_created", "ecs.version", "agent.version", "agent.type", "agent.id", "agent.name", "input.type", "log.file.path", "log.offset", "agent.ephemeral_id", "host.name"]

To my filebeat config

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文