python中的文件读取

发布于 2024-10-21 22:35:16 字数 529 浏览 7 评论 0原文

所以我的整个问题是我有两个文件，其中一个具有以下格式（对于Python 2.6）：

#comments
config = {
    #comments
    'name': 'hello',
    'see?': 'world':'ABC',CLASS=3
}

该文件有很多这样的部分。第二个文件具有格式：

[23]
[config]
 'name'='abc'
 'see?'=
[23]

现在的要求是我需要比较两个文件并生成文件为：

#comments
config = {
    #comments
    'name': 'abc',
    'see?': 'world':'ABC',CLASS=3
}

因此结果文件将包含第一个文件中的值，除非第二个文件中存在相同属性的值，这将覆盖价值。现在我的问题是如何使用Python 操作这些文件。

提前感谢您之前在短时间内的回答，我需要使用 python 2.6

原文

So my whole problem is that I have two files one with following format(for Python 2.6):

#comments
config = {
    #comments
    'name': 'hello',
    'see?': 'world':'ABC',CLASS=3
}

This file has number of sections like this. Second file has format:

[23]
[config]
 'name'='abc'
 'see?'=
[23]

Now the requirement is that I need to compare both files and generate file as:

#comments
config = {
    #comments
    'name': 'abc',
    'see?': 'world':'ABC',CLASS=3
}

So the result file will contain the values from the first file, unless the value for same attribute is there in second file, which will overwrite the value. Now my problem is how to manipulate these files using Python.

Thanks in advance and for your previous answers in short time ,I need to use python 2.6

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

孤芳又自赏 2024-10-28 22:35:16

由于评论而无法找到一个漂亮的解决方案。这已经过测试并且适用于我，但需要 Python 3.1 或更高版本：

from collections import OrderedDict

indenting = '\t'

def almost_py_read(f):
    sections = OrderedDict()
    contents = None
    active = sections
    for line in f:
        line = line.strip()
        if line.startswith('#'):
            active[line] = None
        elif line.endswith('{'):
            k = line.split('=')[0].strip()
            contents = OrderedDict()
            active = contents
            sections[k] = contents
        elif line.endswith('}'):
            active = sections
        else:
            try:
                k, v = line.split(':')
                k = k.strip()
                v = v.strip()
                active[k] = v
            except:
                pass
    return sections

def almost_ini_read(f):
    sections = OrderedDict()
    contents = None
    for line in f:
        line = line.strip()
        try:
            k, v = line.split('=')
            k = k.strip()
            v = v.strip()
            if v:
                contents[k] = v
        except:
            if line.startswith('[') and line.endswith(']'):
                contents = OrderedDict()
                sections[line[1:-1]] = contents
    print(sections)
    return sections

def compilefiles(pyname, ininame):
    sections = almost_py_read(open(pyname, 'rt'))
    override_sections = almost_ini_read(open(ininame, "rt"))
    for section_key, section_value in override_sections.items():
        if not sections.get(section_key):
            sections[section_key] = OrderedDict()
        for k, v in section_value.items():
            sections[section_key][k] = v
    return sections

def output(d, indent=''):
    for k, v in d.items():
        if v == None:
            print(indent+k)
        elif v:
            if type(v) == str:
                print(indent+k+': '+v+',')
            else:
                print(indent+k+' = {')
                output(v, indent+indenting)
                print(indent+'}')

d = compilefiles('a.txt', 'b.ini')
output(d)

输出：

#comments
config = {
    #comments
    'name': 'abc',
    'see?': 'world',
}

Was unable to find a beautiful solution due to the comments. This is tested and works for me, but requires Python 3.1 or higher:

from collections import OrderedDict

indenting = '\t'

def almost_py_read(f):
    sections = OrderedDict()
    contents = None
    active = sections
    for line in f:
        line = line.strip()
        if line.startswith('#'):
            active[line] = None
        elif line.endswith('{'):
            k = line.split('=')[0].strip()
            contents = OrderedDict()
            active = contents
            sections[k] = contents
        elif line.endswith('}'):
            active = sections
        else:
            try:
                k, v = line.split(':')
                k = k.strip()
                v = v.strip()
                active[k] = v
            except:
                pass
    return sections

def almost_ini_read(f):
    sections = OrderedDict()
    contents = None
    for line in f:
        line = line.strip()
        try:
            k, v = line.split('=')
            k = k.strip()
            v = v.strip()
            if v:
                contents[k] = v
        except:
            if line.startswith('[') and line.endswith(']'):
                contents = OrderedDict()
                sections[line[1:-1]] = contents
    print(sections)
    return sections

def compilefiles(pyname, ininame):
    sections = almost_py_read(open(pyname, 'rt'))
    override_sections = almost_ini_read(open(ininame, "rt"))
    for section_key, section_value in override_sections.items():
        if not sections.get(section_key):
            sections[section_key] = OrderedDict()
        for k, v in section_value.items():
            sections[section_key][k] = v
    return sections

def output(d, indent=''):
    for k, v in d.items():
        if v == None:
            print(indent+k)
        elif v:
            if type(v) == str:
                print(indent+k+': '+v+',')
            else:
                print(indent+k+' = {')
                output(v, indent+indenting)
                print(indent+'}')

d = compilefiles('a.txt', 'b.ini')
output(d)

Output:

#comments
config = {
    #comments
    'name': 'abc',
    'see?': 'world',
}

回复收藏 0 原文

ま柒月 2024-10-28 22:35:16

我花了很长一段时间才写出下面的代码。

我很难用逗号来管理。我希望更新后的文件在更新后具有与更新 : 行之前更新的文件相同的格式，以逗号结尾，最后一行除外。

该代码是针对提问者提出的特定问题而设计的，不能按原样用于其他类型的问题。我知道。这是使用基于正则表达式而不是解析器的代码的问题，我完全意识到这一点。但我认为，通过更改正则表达式，它是一个可以相对容易地适应其他情况的画布，由于正则表达式的可塑性，这是一个相对容易的过程。

def file_updating(updating_filename,updating_data_extractor,filename_to_update):
    # function whose name is hold by updating_data_extractor parameter 
    # is a function that 
    # extracts data from the file whose name is hold by updating_filename parameter
    # and must return a tuple:
    # ( updating dictionary , compiled regex )
    updating_dico,pat = updating_data_extractor( updating_filename )

    with open(filename_to_update,'r+') as f:
        lines = f.readlines()

        def jiji(line,dico = updating_dico ):
            mat = pat.search(line.rstrip())
            if mat and mat.group(3) in dico:
                return '%s: %s,' % (mat.group(1),dico.pop(mat.group(3)))
            else:
                return line.rstrip(',') + ','

        li = [jiji(line) for line in lines[0:-1] ] # [0:-1] because last line is '}'
        front = (mit.group(2) for mit in ( pat.search(line) for line in lines ) if mit).next()
        li.extend(front + '%s: %s,' % item for item in updating_dico.iteritems() )
        li[-1] = li[-1].rstrip(',')
        li.append('}')

        f.seek(0,0)
        f.writelines( '\n'.join(li)  )
        f.truncate()

示例代码：

import re

bef1 = '''#comments
config =
{
#comments
    'name': 'hello',
    'arctic':01011101,
    'summu': 456,
    'see?': 'world',
    'armorique': 'bretagne'
}'''

bef2 = '''#comments
config =
{
#comments
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
}'''



def one_extractor(data_containing_filename):

    with open(data_containing_filename) as g:
        contg = re.search('\[(\d+)\].+\[config\](.*?)\[(\\1)\]',g.read(),re.DOTALL)
        if contg:
            updtgen = ( re.match("([^=]+)=[ \f\t\v]*([^ \f\t\v].*|)",line.strip())
                        for line in contg.group(2).splitlines() )
            updating_data =  dict( mi.groups() for mi in updtgen if mi and mi.group(2))
        else:
            from sys import exit
            exit(updating_filename + " isn't a valid file for updating")

    pat = re.compile("(([ \t]*)([^:]+)):\s*(.+),?")

    return (updating_data,pat)



for bef in (bef1,bef2):

    # file to update:  rudu.txt
    with open('rudu.txt','w') as jecr:
        jecr.write(bef)

    # updating data:   renew_rudu.txt
    with open('renew_rudu.txt','w') as jecr:
        jecr.write('''[23]
    [config]
     'nuclear'= 'apocalypse'
     'name'='abc'
     'armorique'= 'BRETAGNE'
     'arctic'=
     'boloni'=7600
     'see?'=
     'summu'='tumulus'
    [23]''')


    print 'BEFORE ---------------------------------'
    with open('rudu.txt') as lir:
        print lir.read()

    print '\nUPDATING DATA --------------------------'
    with open('renew_rudu.txt') as lir:
        print lir.read()

    file_updating('renew_rudu.txt',one_extractor,'rudu.txt')

    print '\nAFTER ================================='
    with open('rudu.txt','r') as f:
        print f.read()

    print '\n\nX#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#\n'

结果：

>>> 
BEFORE ---------------------------------
#comments
config =
{
#comments
    'name': 'hello',
    'arctic':01011101,
    'summu': 456,
    'see?': 'world',
    'armorique': 'bretagne'
}

UPDATING DATA --------------------------
[23]
    [config]
     'nuclear'= 'apocalypse'
     'name'='abc'
     'armorique'= 'BRETAGNE'
     'arctic'=
     'boloni'=7600
     'see?'=
     'summu'='tumulus'
    [23]

AFTER =================================
#comments,
config =,
{,
#comments,
    'name': 'abc',
    'arctic':01011101,
    'summu': 'tumulus',
    'see?': 'world',
    'armorique': 'BRETAGNE',
    'boloni': 7600,
    'nuclear': 'apocalypse'
}


X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#

BEFORE ---------------------------------
#comments
config =
{
#comments
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
}

UPDATING DATA --------------------------
[23]
    [config]
     'nuclear'= 'apocalypse'
     'name'='abc'
     'armorique'= 'BRETAGNE'
     'arctic'=
     'boloni'=7600
     'see?'=
     'summu'='tumulus'
    [23]

AFTER =================================
#comments,
config =,
{,
#comments,
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
    'armorique': 'BRETAGNE',
    'boloni': 7600,
    'summu': 'tumulus',
    'nuclear': 'apocalypse'
}


X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#

>>>

编辑：

我改进了代码，因为我仍然不满意。现在，“变量”front 捕获包含数据的行开头的空白字符（ ' ' 或 '\t' ）要更新的文件。

我还忘记了指令 f.truncate() ，该指令对于不保留不需要的字符的尾部非常重要。

我很高兴看到我的代码即使在以下文件中也能正常工作，其中值是字典，如 Jagdev 所示：

#comments
config =
{
#comments
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
}

这证实了我选择逐行处理，而不是尝试使用一个正则表达式。

。

编辑2：

我再次更改了代码。更新是由一个函数执行的，该函数接受以下参数：

更新文件的名称（包含用于更新另一个文件的数据的文件）
以及适合从该特定更新文件中提取数据的函数

因此，可以更新给定文件包含来自各种更新文件的数据。这使得代码更加通用。

I had a really long and hard time to manage to write the following code.

I had difficulties to manage with commas. I wanted the updated file to have after the updating the same format as the file to update before the updating : lines end with a comma, except for the last one.

This code is crafted for the particular problem as exposed by the questioner and can't be used as-is for another type of problem. I know. It's the problem of using a code based on regex and not on a parser, I'm fully aware of that. But I think that it is a canvas that can be relatively easily adapted to other cases, by changing the regexes, which is a relatively readily process thanks to the malleability of regexes.

def file_updating(updating_filename,updating_data_extractor,filename_to_update):
    # function whose name is hold by updating_data_extractor parameter 
    # is a function that 
    # extracts data from the file whose name is hold by updating_filename parameter
    # and must return a tuple:
    # ( updating dictionary , compiled regex )
    updating_dico,pat = updating_data_extractor( updating_filename )

    with open(filename_to_update,'r+') as f:
        lines = f.readlines()

        def jiji(line,dico = updating_dico ):
            mat = pat.search(line.rstrip())
            if mat and mat.group(3) in dico:
                return '%s: %s,' % (mat.group(1),dico.pop(mat.group(3)))
            else:
                return line.rstrip(',') + ','

        li = [jiji(line) for line in lines[0:-1] ] # [0:-1] because last line is '}'
        front = (mit.group(2) for mit in ( pat.search(line) for line in lines ) if mit).next()
        li.extend(front + '%s: %s,' % item for item in updating_dico.iteritems() )
        li[-1] = li[-1].rstrip(',')
        li.append('}')

        f.seek(0,0)
        f.writelines( '\n'.join(li)  )
        f.truncate()

Exemplifying code:

import re

bef1 = '''#comments
config =
{
#comments
    'name': 'hello',
    'arctic':01011101,
    'summu': 456,
    'see?': 'world',
    'armorique': 'bretagne'
}'''

bef2 = '''#comments
config =
{
#comments
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
}'''



def one_extractor(data_containing_filename):

    with open(data_containing_filename) as g:
        contg = re.search('\[(\d+)\].+\[config\](.*?)\[(\\1)\]',g.read(),re.DOTALL)
        if contg:
            updtgen = ( re.match("([^=]+)=[ \f\t\v]*([^ \f\t\v].*|)",line.strip())
                        for line in contg.group(2).splitlines() )
            updating_data =  dict( mi.groups() for mi in updtgen if mi and mi.group(2))
        else:
            from sys import exit
            exit(updating_filename + " isn't a valid file for updating")

    pat = re.compile("(([ \t]*)([^:]+)):\s*(.+),?")

    return (updating_data,pat)



for bef in (bef1,bef2):

    # file to update:  rudu.txt
    with open('rudu.txt','w') as jecr:
        jecr.write(bef)

    # updating data:   renew_rudu.txt
    with open('renew_rudu.txt','w') as jecr:
        jecr.write('''[23]
    [config]
     'nuclear'= 'apocalypse'
     'name'='abc'
     'armorique'= 'BRETAGNE'
     'arctic'=
     'boloni'=7600
     'see?'=
     'summu'='tumulus'
    [23]''')


    print 'BEFORE ---------------------------------'
    with open('rudu.txt') as lir:
        print lir.read()

    print '\nUPDATING DATA --------------------------'
    with open('renew_rudu.txt') as lir:
        print lir.read()

    file_updating('renew_rudu.txt',one_extractor,'rudu.txt')

    print '\nAFTER ================================='
    with open('rudu.txt','r') as f:
        print f.read()

    print '\n\nX#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#\n'

Result:

>>> 
BEFORE ---------------------------------
#comments
config =
{
#comments
    'name': 'hello',
    'arctic':01011101,
    'summu': 456,
    'see?': 'world',
    'armorique': 'bretagne'
}

UPDATING DATA --------------------------
[23]
    [config]
     'nuclear'= 'apocalypse'
     'name'='abc'
     'armorique'= 'BRETAGNE'
     'arctic'=
     'boloni'=7600
     'see?'=
     'summu'='tumulus'
    [23]

AFTER =================================
#comments,
config =,
{,
#comments,
    'name': 'abc',
    'arctic':01011101,
    'summu': 'tumulus',
    'see?': 'world',
    'armorique': 'BRETAGNE',
    'boloni': 7600,
    'nuclear': 'apocalypse'
}


X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#

BEFORE ---------------------------------
#comments
config =
{
#comments
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
}

UPDATING DATA --------------------------
[23]
    [config]
     'nuclear'= 'apocalypse'
     'name'='abc'
     'armorique'= 'BRETAGNE'
     'arctic'=
     'boloni'=7600
     'see?'=
     'summu'='tumulus'
    [23]

AFTER =================================
#comments,
config =,
{,
#comments,
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
    'armorique': 'BRETAGNE',
    'boloni': 7600,
    'summu': 'tumulus',
    'nuclear': 'apocalypse'
}


X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#

>>>

EDIT:

I have improved the code because I was still insatisfied. Now the "variable" front catches the blank characters ( ' ' or '\t' ) at the beginning of the data-containing lines in the file to be updated.

I had also forgot the instruction f.truncate() which is very important to not keep a tail of undesired characters.

I am satisfied to see that my code works well even with the following file in which a value is a dictionnary, as presented by Jagdev:

#comments
config =
{
#comments
    'name': 'abc',
    'see?': { 'world':'india':'jagdev'},
}

That confirms me in my choice to process line after line , and not trying to run through the entire file with a regex.

EDIT 2:

I again changed the code. The updating is performed by a function that takes as arguments :

the name of the updating file (the file containing the data used to udpdate another file)
and the function that is suited to extract the data from this particular updating file

Hence, it is possible to update a given file with data from various updating files. That makes the code more generic.

回复收藏 0 原文

孤千羽 2024-10-28 22:35:16

非常粗略（即这根本没有经过测试，并且可以进行许多改进，例如使用正则表达式和/或漂亮打印）：

dicts = []
with open('file1') as file1:
  try:
    file1content = file1.read()
    eval(file1content )
    file1content.strip(' ')
    file1content.strip('\t')
    for line in file1content.splitlines():
      if '={' in line: 
        dicts.append(line.split('={').strip())
  except:
    print 'file1 not valid'
with open('file2') as file2:
  filelines = file2.readlines()
  while filelines:
    while filelines and '[23]' not in filelines[0]:
      filelines.pop(0)
    if filelines:
      filelines.pop(0)
      dictname = filelines.pop(0).split('[')[1].split(']')[0]
      if dictname not in dicts:
        dicts.append(dictname)
        exec(dictname + ' = {}')
      while filelines and '[23]' not in filelines[0]:
        line = filelines.pop(0)
        [k,v] = line.split('=')
        k.strip()
        v.strip()
        if v:
          exec(dictname + '[k] = v')
with open('file3', 'w') as file3:
  file3content = '\n'.join([`eval(dictname)` for dictname in dicts])
  file3.write(file3content)

Very roughly (i.e. this hasn't been tested at all, and there are numerous imprvements that could be made such as the use of regex and/or pretty-printing):

dicts = []
with open('file1') as file1:
  try:
    file1content = file1.read()
    eval(file1content )
    file1content.strip(' ')
    file1content.strip('\t')
    for line in file1content.splitlines():
      if '={' in line: 
        dicts.append(line.split('={').strip())
  except:
    print 'file1 not valid'
with open('file2') as file2:
  filelines = file2.readlines()
  while filelines:
    while filelines and '[23]' not in filelines[0]:
      filelines.pop(0)
    if filelines:
      filelines.pop(0)
      dictname = filelines.pop(0).split('[')[1].split(']')[0]
      if dictname not in dicts:
        dicts.append(dictname)
        exec(dictname + ' = {}')
      while filelines and '[23]' not in filelines[0]:
        line = filelines.pop(0)
        [k,v] = line.split('=')
        k.strip()
        v.strip()
        if v:
          exec(dictname + '[k] = v')
with open('file3', 'w') as file3:
  file3content = '\n'.join([`eval(dictname)` for dictname in dicts])
  file3.write(file3content)

回复收藏 0 原文

~没有更多了~