对于循环,不处理R中的整个XML脚本
我正在编写一个例程来解析XML文件,该文件是系统日志文件以捕获登录和登录消息。正如您在我的代码中看到的那样,有几个检查点我验证XML文件是否已被完全读取,但是,当我打印数据框时,仅捕获第一个事件。
LG.df <- read_xml("Syslog.xml")
xml_ns(LG.df)
NS<- xml_children(xml_children(LG.df))
EV<- ""
DT<- ""
AN<- ""
AD<- ""
WSN<- ""
SA<- ""
LogEvents.df<- data.frame(EV, DT, AN, AD, WSN, SA)
print(LG.df)
for (i in sequence(length(NS))) {
DT<- NS[1]
DT<- gsub("<DateTime>","",as.character(DT))
DT<- gsub("</DateTime>","",as.character(DT))
#print(DT)
Pr<- NS[2]
Pr<- gsub("<Priority>","",as.character(Pr))
Pr<- gsub("</Priority>","",as.character(Pr))
#print(Pr)
SH<- NS[3]
SH<- gsub("<Source_Host>","",as.character(SH))
SH<- gsub("</Source_Host>","",as.character(SH))
#print(SH)
MT<- NS[4]
MT<- gsub("<MessageText>","",as.character(MT))
MT<- gsub("</MessageText>","",as.character(MT))
#print(MT)
EV<- str_match(MT, "\\d{4}(?=\\tMicrosoft-Windows-Security-Auditing)")
#print(EV)
AN<- str_match(MT, "(?<=Account Name:\\t\\t).*?(?=\\n)")
SID<- str_match(MT, "(?<=Security ID:\\t\\t).*?(?=\\n)")
AD<- str_match(MT, "(?<=Account Domain:\\t\\t).*?(?=\\n)")
WSN<- str_match(MT, "(?<=Workstation Name:\\t\\t).*?(?=\\n)")
SA<- str_match(MT, "(?<=Source Network Address:\\t\\t).*?(?=\\n)")
if(EV == 4624){(LogEvents.df$EV <-(EV))
(LogEvents.df$DT<- (DT))
(LogEvents.df$AN<- (AN))
(LogEvents.df$AD<- (AD))
(LogEvents.df$WSN<- (WSN))
(LogEvents.df$SA<- (SA))
} else if(EV == 4634){(LogEvents.df$EV <-( EV))}
(LogEvents.df$DT<- (DT))
(LogEvents.df$AN<- (AN))
(LogEvents.df$AD<- (AD))
(LogEvents.df$WSN<- (WSN))
(LogEvents.df$SA<- (SA))
}
欢迎任何想法。谢谢!
I am writing a routine to parse xml files that are system log files to capture the logon and logoff messages. As you can see in my code, there are several check points where I verify that the xml file is being completely read, however, when I print the dataframe, only the first event is captured.
LG.df <- read_xml("Syslog.xml")
xml_ns(LG.df)
NS<- xml_children(xml_children(LG.df))
EV<- ""
DT<- ""
AN<- ""
AD<- ""
WSN<- ""
SA<- ""
LogEvents.df<- data.frame(EV, DT, AN, AD, WSN, SA)
print(LG.df)
for (i in sequence(length(NS))) {
DT<- NS[1]
DT<- gsub("<DateTime>","",as.character(DT))
DT<- gsub("</DateTime>","",as.character(DT))
#print(DT)
Pr<- NS[2]
Pr<- gsub("<Priority>","",as.character(Pr))
Pr<- gsub("</Priority>","",as.character(Pr))
#print(Pr)
SH<- NS[3]
SH<- gsub("<Source_Host>","",as.character(SH))
SH<- gsub("</Source_Host>","",as.character(SH))
#print(SH)
MT<- NS[4]
MT<- gsub("<MessageText>","",as.character(MT))
MT<- gsub("</MessageText>","",as.character(MT))
#print(MT)
EV<- str_match(MT, "\\d{4}(?=\\tMicrosoft-Windows-Security-Auditing)")
#print(EV)
AN<- str_match(MT, "(?<=Account Name:\\t\\t).*?(?=\\n)")
SID<- str_match(MT, "(?<=Security ID:\\t\\t).*?(?=\\n)")
AD<- str_match(MT, "(?<=Account Domain:\\t\\t).*?(?=\\n)")
WSN<- str_match(MT, "(?<=Workstation Name:\\t\\t).*?(?=\\n)")
SA<- str_match(MT, "(?<=Source Network Address:\\t\\t).*?(?=\\n)")
if(EV == 4624){(LogEvents.df$EV <-(EV))
(LogEvents.df$DT<- (DT))
(LogEvents.df$AN<- (AN))
(LogEvents.df$AD<- (AD))
(LogEvents.df$WSN<- (WSN))
(LogEvents.df$SA<- (SA))
} else if(EV == 4634){(LogEvents.df$EV <-( EV))}
(LogEvents.df$DT<- (DT))
(LogEvents.df$AN<- (AN))
(LogEvents.df$AD<- (AD))
(LogEvents.df$WSN<- (WSN))
(LogEvents.df$SA<- (SA))
}
Any ideas are welcome. Thanks!
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论