解析pdf文件时从字段中没有获得任何字段

发布于 2025-01-22 04:15:01 字数 1607 浏览 0 评论 0原文

我正在尝试解析PDF文件。我想在复选框值的列表或字典中获取所有值。但是我遇到了这个错误。

“返回orderddict（（（k，v.get（'/v'''））for fields.items（）） attributeError：'nontype'对象没有属性'项目'

我正在尝试的代码是

from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader

def _getFields(obj, tree=None, retval=None, fileobj=None):
    
    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',
                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}
    if retval is None:
        retval = OrderedDict()
        catalog = obj.trailer["/Root"]
        # get the AcroForm tree
        if "/AcroForm" in catalog:
            tree = catalog["/AcroForm"]
        else:
            return None
    if tree is None:
        return retval

    obj._checkKids(tree, retval, fileobj)
    for attr in fieldAttributes:
        if attr in tree:
            # Tree is a field
            obj._buildField(tree, retval, fileobj, fieldAttributes)
            break

    if "/Fields" in tree:
        fields = tree["/Fields"]
        for f in fields:
            field = f.getObject()
            obj._buildField(field, retval, fileobj, fieldAttributes)

    return retval

def get_form_fields(infile):
    infile = PdfFileReader(open(infile, 'rb'))
    fields = _getFields(infile)
    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())

if __name__ == '__main__':
    from pprint import pprint

    pdf_file_name = 'Guild.pdf'

    pprint(get_form_fields(pdf_file_name))

原文

I am trying to parse a pdf file. I want to get all the values in a list or dictionary of the checkbox values. But I am getting this error.

"return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())
AttributeError: 'NoneType' object has no attribute 'items'"

The code I am trying is this

from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader

def _getFields(obj, tree=None, retval=None, fileobj=None):
    
    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',
                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}
    if retval is None:
        retval = OrderedDict()
        catalog = obj.trailer["/Root"]
        # get the AcroForm tree
        if "/AcroForm" in catalog:
            tree = catalog["/AcroForm"]
        else:
            return None
    if tree is None:
        return retval

    obj._checkKids(tree, retval, fileobj)
    for attr in fieldAttributes:
        if attr in tree:
            # Tree is a field
            obj._buildField(tree, retval, fileobj, fieldAttributes)
            break

    if "/Fields" in tree:
        fields = tree["/Fields"]
        for f in fields:
            field = f.getObject()
            obj._buildField(field, retval, fileobj, fieldAttributes)

    return retval

def get_form_fields(infile):
    infile = PdfFileReader(open(infile, 'rb'))
    fields = _getFields(infile)
    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())

if __name__ == '__main__':
    from pprint import pprint

    pdf_file_name = 'Guild.pdf'

    pprint(get_form_fields(pdf_file_name))

分享到QQ

分享到微博