使用 VB.NET 比较和合并多个文件的文本文件

发布于 2024-11-16 23:39:49 字数 576 浏览 5 评论 0原文

我有多个文本文件需要合并。但我需要在合并之前比较参考号。

下面是文本文件

Text 1    
001Email
002Video
003SocialNetwork

Text 2
001Gmail
001Yahoo
002Youtube
002Metacafe
003Facebook
003Myspace

Text 3
www.gmail.com001
www.yahoo.com001
www.youtube.com002
www.myspace.com002
www.facebook.com003
www.myspace.com003


Output


001Email
001Gmail
www.gmail.com001
001Yahoo
wwww.yahoo.com001
002Video
002Youtube
www.youtube.com002
002Metacafe
www.metacafe.com002
003SocialNetwork
003Facebook
www.facebook.com003
003Myspace
www.myspace.com003

什么是最快的处理方法,逐行读取进行比较。文本文件由数千行组成

I have a multiple text files that I need to merge. but I need to compare the reference number before merge it.

below is the text file

Text 1    
001Email
002Video
003SocialNetwork

Text 2
001Gmail
001Yahoo
002Youtube
002Metacafe
003Facebook
003Myspace

Text 3
www.gmail.com001
www.yahoo.com001
www.youtube.com002
www.myspace.com002
www.facebook.com003
www.myspace.com003


Output


001Email
001Gmail
www.gmail.com001
001Yahoo
wwww.yahoo.com001
002Video
002Youtube
www.youtube.com002
002Metacafe
www.metacafe.com002
003SocialNetwork
003Facebook
www.facebook.com003
003Myspace
www.myspace.com003

What will be the fastest way to deal it read line by line to compare. the text file consist of thousand of line

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

何止钟意 2024-11-23 23:39:49

这可能是一个过于复杂的解决方案。代码中的注释应该能够解释一切。输出与您所拥有的不完全匹配,因为我不知道顺序对所有事情有多重要。它首先按参考号对所有内容进行排序,然后按字符串的文本部分(不包括 www.)进行排序。您发布的结果按参考编号顺序排列,然后按文件解析顺序排列,然后按字母顺序排列(002Metacafe 在 002Video 之后)。让我知道这是否重要。

Option Explicit On
Option Strict On

Imports System.IO
Imports System.Text.RegularExpressions

Public Class Form1
    Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
        ''//List of files to process
        Dim Files As New List(Of String)
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text1.txt"))
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text2.txt"))
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text3.txt"))

        ''//Will hold the current line being read
        Dim Line As String

        ''//Holds our main collection of data
        Dim MyData As New List(Of Data)

        ''//Loop through each file
        For Each F In Files
            ''//Open the file for reading
            Using FS As New FileStream(F, FileMode.Open, FileAccess.Read, FileShare.Read)
                Using SR As New StreamReader(FS)

                    ''//Read each line
                    Line = SR.ReadLine()
                    Do While Line IsNot Nothing
                        ''//The data constructor handles parsing of the line
                        MyData.Add(New Data(Line))
                        ''//Read next line
                        Line = SR.ReadLine()
                    Loop

                End Using
            End Using
        Next

        ''//Our data implements IComparable(Of Data) so we can just sort the list
        MyData.Sort()

        ''//Output our data
        For Each D In MyData
            Trace.WriteLine(D)
        Next

        Me.Close()
    End Sub
End Class
Public Class Data
    Implements IComparable(Of Data)

    ''//Our RegEx pattern for looking for a string that either starts or ends with numbers
    Private Shared ReadOnly Pattern As String = "^(?<RefStart>\d+)?(?<Text>.*?)(?<RefEnd>\d+)?$"

    Public Text As String                      ''//The _text_ portion of the data
    Public Reference As String                 ''//The reference number stored as text
    Public ReferenceAtStart As Boolean         ''//Whether the reference number was found at the start or end of the line
    Public ReadOnly Property ReferenceAsNum() As Integer  ''//Numeric version of the reference number for sorting
        Get
            Return Integer.Parse(Me.Reference)
        End Get
    End Property
    Public ReadOnly Property TextComparable() As String   ''//Remove the www for sorting
        Get
            Return Me.Text.Replace("www.", "")
        End Get
    End Property

    Public Sub New(ByVal line As String)
        ''//Sanity check
        If String.IsNullOrEmpty(line) Then Throw New ArgumentNullException("line")

        ''//Parse the line
        Dim M = Regex.Match(line, Pattern)
        If M Is Nothing Then Throw New ArgumentException("Line does not conform to expected pattern")

        ''//If the RefStart has a value then the number is at the beginning of the string
        If M.Groups("RefStart").Success Then
            Me.ReferenceAtStart = True
            Me.Reference = M.Groups("RefStart").Value
        Else ''//Otherwise its at the end
            Me.ReferenceAtStart = False
            Me.Reference = M.Groups("RefEnd").Value
        End If
        Me.Text = M.Groups("Text").Value
    End Sub

    Public Function CompareTo(ByVal other As Data) As Integer Implements System.IComparable(Of Data).CompareTo
        ''//Compare the reference numbers first
        Dim Ret = Me.ReferenceAsNum.CompareTo(other.ReferenceAsNum)
        ''//If they are the same then compare the strings
        If Ret = 0 Then Ret = String.Compare(Me.TextComparable, other.TextComparable, StringComparison.InvariantCultureIgnoreCase)

        Return Ret
    End Function
    Public Overrides Function ToString() As String
        ''//Reproduce the original string
        If Me.ReferenceAtStart Then
            Return String.Format("{0}{1}", Me.Reference, Me.Text)
        Else
            Return String.Format("{1}{0}", Me.Reference, Me.Text)
        End If
    End Function
End Class

Here's what might possibly be an overly complex solution. The comments in the code should explain everything hopefully. The output doesn't match exactly what you have because I don't know how much order is important for everything. It sorts everything first by the reference number and then by the text portion of the string (excluding www.). The results you posted were in reference number order and then file parsing order and then alphabetical (002Metacafe came after 002Video). Let me know if that's important.

Option Explicit On
Option Strict On

Imports System.IO
Imports System.Text.RegularExpressions

Public Class Form1
    Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
        ''//List of files to process
        Dim Files As New List(Of String)
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text1.txt"))
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text2.txt"))
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text3.txt"))

        ''//Will hold the current line being read
        Dim Line As String

        ''//Holds our main collection of data
        Dim MyData As New List(Of Data)

        ''//Loop through each file
        For Each F In Files
            ''//Open the file for reading
            Using FS As New FileStream(F, FileMode.Open, FileAccess.Read, FileShare.Read)
                Using SR As New StreamReader(FS)

                    ''//Read each line
                    Line = SR.ReadLine()
                    Do While Line IsNot Nothing
                        ''//The data constructor handles parsing of the line
                        MyData.Add(New Data(Line))
                        ''//Read next line
                        Line = SR.ReadLine()
                    Loop

                End Using
            End Using
        Next

        ''//Our data implements IComparable(Of Data) so we can just sort the list
        MyData.Sort()

        ''//Output our data
        For Each D In MyData
            Trace.WriteLine(D)
        Next

        Me.Close()
    End Sub
End Class
Public Class Data
    Implements IComparable(Of Data)

    ''//Our RegEx pattern for looking for a string that either starts or ends with numbers
    Private Shared ReadOnly Pattern As String = "^(?<RefStart>\d+)?(?<Text>.*?)(?<RefEnd>\d+)?$"

    Public Text As String                      ''//The _text_ portion of the data
    Public Reference As String                 ''//The reference number stored as text
    Public ReferenceAtStart As Boolean         ''//Whether the reference number was found at the start or end of the line
    Public ReadOnly Property ReferenceAsNum() As Integer  ''//Numeric version of the reference number for sorting
        Get
            Return Integer.Parse(Me.Reference)
        End Get
    End Property
    Public ReadOnly Property TextComparable() As String   ''//Remove the www for sorting
        Get
            Return Me.Text.Replace("www.", "")
        End Get
    End Property

    Public Sub New(ByVal line As String)
        ''//Sanity check
        If String.IsNullOrEmpty(line) Then Throw New ArgumentNullException("line")

        ''//Parse the line
        Dim M = Regex.Match(line, Pattern)
        If M Is Nothing Then Throw New ArgumentException("Line does not conform to expected pattern")

        ''//If the RefStart has a value then the number is at the beginning of the string
        If M.Groups("RefStart").Success Then
            Me.ReferenceAtStart = True
            Me.Reference = M.Groups("RefStart").Value
        Else ''//Otherwise its at the end
            Me.ReferenceAtStart = False
            Me.Reference = M.Groups("RefEnd").Value
        End If
        Me.Text = M.Groups("Text").Value
    End Sub

    Public Function CompareTo(ByVal other As Data) As Integer Implements System.IComparable(Of Data).CompareTo
        ''//Compare the reference numbers first
        Dim Ret = Me.ReferenceAsNum.CompareTo(other.ReferenceAsNum)
        ''//If they are the same then compare the strings
        If Ret = 0 Then Ret = String.Compare(Me.TextComparable, other.TextComparable, StringComparison.InvariantCultureIgnoreCase)

        Return Ret
    End Function
    Public Overrides Function ToString() As String
        ''//Reproduce the original string
        If Me.ReferenceAtStart Then
            Return String.Format("{0}{1}", Me.Reference, Me.Text)
        Else
            Return String.Format("{1}{0}", Me.Reference, Me.Text)
        End If
    End Function
End Class
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文