@@ -33,11 +33,11 @@ def pdfua(pdf, metadata, document, page_streams):
33
33
structure = {}
34
34
document .build_element_structure (structure )
35
35
36
- elements = []
36
+ document_children = []
37
37
content_mapping ['Nums' ] = pydyf .Array ()
38
38
links = []
39
39
for page_number , page_stream in enumerate (page_streams ):
40
- page_elements = []
40
+ parents = [None ] * len ( page_stream . marked )
41
41
for mcid , (key , box ) in enumerate (page_stream .marked ):
42
42
# Build structure elements
43
43
kids = [mcid ]
@@ -78,31 +78,40 @@ def pdfua(pdf, metadata, document, page_streams):
78
78
'P' : child .reference ,
79
79
})
80
80
pdf .add_object (real_child )
81
+ for kid in kids :
82
+ if isinstance (kid , int ):
83
+ parents [kid ] = real_child .reference
81
84
child ['K' ] = pydyf .Array ([real_child .reference ])
82
85
structure_data ['element' ] = real_child
86
+ else :
87
+ for kid in kids :
88
+ if isinstance (kid , int ):
89
+ parents [kid ] = child .reference
83
90
else :
84
91
child = structure_data ['element' ]
85
92
child ['K' ].extend (kids )
93
+ for kid in kids :
94
+ if isinstance (kid , int ):
95
+ parents [kid ] = child .reference
86
96
kid = child .reference
87
97
if key == 'Link' :
88
98
links .append ((kid , box .link_annotation ))
89
99
if child_structure_data_element is not None :
90
100
child_structure_data_element ['P' ] = kid
91
101
if not new_element :
92
102
break
93
- page_elements .append (kid )
94
103
kids = [kid ]
95
104
child_structure_data_element = child
96
105
if structure_data ['parent' ] is None :
97
106
child ['P' ] = structure_document .reference
107
+ document_children .append (child .reference )
98
108
break
99
109
else :
100
110
etree_element = structure_data ['parent' ]
101
111
key = page_stream .get_marked_content_tag (etree_element .tag )
102
112
content_mapping ['Nums' ].append (page_number )
103
- content_mapping ['Nums' ].append (pydyf .Array (page_elements ))
104
- elements .extend (page_elements )
105
- structure_document ['K' ] = pydyf .Array (elements )
113
+ content_mapping ['Nums' ].append (pydyf .Array (parents ))
114
+ structure_document ['K' ] = pydyf .Array (document_children )
106
115
for i , (link , annotation ) in enumerate (links , start = page_number + 1 ):
107
116
content_mapping ['Nums' ].append (i )
108
117
content_mapping ['Nums' ].append (link )
0 commit comments