% \iffalse meta-comment % %% File: tagpdf-struct.dtx % % Copyright (C) 2019-2024 Ulrike Fischer % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % https://www.latex-project.org/lppl.txt % % This file is part of the "tagpdf bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/tagpdf % % for those people who are interested. %<*driver> \DocumentMetadata{} \documentclass{l3doc} \usepackage{array,booktabs,caption} \hypersetup{pdfauthor=Ulrike Fischer, pdftitle=tagpdf-tree module (tagpdf)} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % \title{^^A % The \pkg{tagpdf-struct} module\\ Commands to create the structure ^^A % \\ Part of the tagpdf package % } % % \author{^^A % Ulrike Fischer\thanks % {^^A % E-mail: % \href{mailto:fischer@troubleshooting-tex.de} % {fischer@troubleshooting-tex.de}^^A % }^^A % } % % \date{Version 0.99b, released 2024-04-12} % \maketitle % \begin{documentation} % \section{Public Commands} % \begin{function}{\tag_struct_begin:n,\tag_struct_end:,\tag_struct_end:n} % \begin{syntax} % \cs{tag_struct_begin:n}\Arg{key-values}\\ % \cs{tag_struct_end:}\\ % \cs{tag_struct_end:n}\Arg{tag} % \end{syntax} % These commands start and end a new structure. % They don't start a group. They set all their values globally. % \cs{tag_struct_end:n} does nothing special normally (apart from % swallowing its argument, but if \texttt{tagpdf-debug} is loaded, % it will check if the \Arg{tag} (after expansion) % is identical to the current structure on the stack. The tag is not role mapped! % \end{function} % \begin{function}{\tag_struct_use:n,\tag_struct_use_num:n} % \begin{syntax} % \cs{tag_struct_use:n}\Arg{label}\\ % \cs{tag_struct_use_num:n}\Arg{structure number} % \end{syntax} % These commands insert a structure previously stashed away as kid % into the currently active structure. % A structure should be used only once, % if the structure already has a parent a warning is issued. % \end{function} % \begin{function}{\tag_struct_object_ref:n,\tag_struct_object_ref:e} % \begin{syntax} % \cs{tag_struct_object_ref:n}\Arg{struct number} % \end{syntax} % This is a small wrapper around |\pdf_object_ref:n| to retrieve the % object reference of the structure with the number \meta{struct number}. % This number can be retrieved and stored for the current structure % for example with \cs{tag_get:n}\Arg{struct_num}. Be aware that it can only % be used if the structure has already been created and that it doesn't check % if the object actually exists! % \end{function} % % The following two functions are used to add annotations. They must be used % together and with care to get the same numbers. Perhaps some improvements are needed % here. % \begin{function}{\tag_struct_insert_annot:nn} % \begin{syntax} % \cs{tag_struct_insert_annot:nn}\Arg{object reference}\Arg{struct parent number} % \end{syntax} % This inserts an annotation in the structure. \meta{object reference} % is there reference to the annotation. \meta{struct parent number} % should be the same number as had been inserted with \cs{tag_struct_parent_int:} % as |StructParent| value to the dictionary of the annotion. % The command will increase the value of the counter % used by \cs{tag_struct_parent_int:}. % \end{function} % \begin{function}{\tag_struct_parent_int:} % \begin{syntax} % \cs{tag_struct_parent_int:} % \end{syntax} % This gives back the next free /StructParent number (assuming that it is % together with \cs{tag_struct_insert_annot:nn} which will increase the number. % \end{function} % % \begin{function}{\tag_struct_gput:nnn} % \begin{syntax} % \cs{tag_struct_gput:nnn}\Arg{structure number}\Arg{keyword}\Arg{value} % \end{syntax} % This is a command that allows to update the data of a structure. % This often can't done simply by replacing the value, as we have to % preserve and extend existing content. We use therefore dedicated functions % adjusted to the key in question. % The first argument is the number of the structure, % the second a keyword referring to a function, % the third the value. Currently the only keyword is \texttt{ref} which updates % the Ref key (an array) % \end{function} % % \section{Public keys} % \subsection{Keys for the structure commands} % \begin{function}{tag (struct-key)} % This is required. The value of the key is normally one of the % standard types listed in the main tagpdf documentation. % It is possible to setup new tags/types. % The value can also be of the form |type/NS|, where |NS| is the % shorthand of a declared name space. % Currently the names spaces |pdf|, |pdf2|, |mathml| and |user| are defined. % This allows to use a different name space than % the one connected by default to the tag. But normally this should not be needed. % \end{function} % \begin{function}{stash (struct-key)} % Normally a new structure inserts itself as a kid % into the currently active structure. This key prohibits this. % The structure is nevertheless from now on % \enquote{the current active structure} % and parent for following marked content and structures. % \end{function} % \begin{function}{label (struct-key)} % This key sets a label by which % one can refer to the structure. It is e.g. % used by \cs{tag_struct_use:n} (where a real label is actually not % needed as you can only use structures already defined), and by the % |ref| key (which can refer to future structures). % Internally the label name will start with \texttt{tagpdfstruct-} and it stores % the two attributs |tagstruct| (the structure number) and |tagstructobj| (the % object reference). % \end{function} % \begin{function}{parent (struct-key)} % By default a structure is added as kid to the currently active structure. % With the parent key one can choose another parent. The value is a structure number which % must refer to an already existing, previously created structure. Such a structure % number can for example be have been stored with \cs{tag_get:n}, but one can also use % a label on the parent structure and then use % \cs{property_ref:nn}|{tagpdfstruct-label}{tagstruct}| to retrieve it. % \end{function} % \begin{function}{title (struct-key),title-o (struct-key)} % This keys allows to set the dictionary entry % \texttt{/Title} in the structure object. % The value is handled as verbatim string and hex encoded. % Commands are not expanded. |title-o| will expand the value once. % \end{function} % % \begin{function}{alt (struct-key)} % This key inserts an \texttt{/Alt} value in the dictionary of structure object. % The value is handled as verbatim string and hex encoded. % The value will be expanded first once. If it is empty, nothing will happen. % \end{function} % \begin{function}{actualtext (struct-key)} % This key inserts an \texttt{/ActualText} value in the dictionary of structure object. % The value is handled as verbatim string and hex encoded. % The value will be expanded first once. If it is empty, nothing will happen. % \end{function} % \begin{function}{lang (struct-key)} % This key allows to set the language for a structure element. The value should be a bcp-identifier, % e.g. |de-De|. % \end{function} % \begin{function}{ref (struct-key)} % This key allows to add references to other structure elements, % it adds the |/Ref| array to the structure. % The value should be a comma separated list of structure labels % set with the |label| key. e.g. |ref={label1,label2}|. % \end{function} % \begin{function}{E (struct-key)} % This key sets the |/E| key, the expanded form of an % abbreviation or an acronym % (I couldn't think of a better name, so I sticked to E). % \end{function} % \begin{function}{AF (struct-key),AFref (struct-key), % AFinline (struct-key),AFinline-o (struct-key),texsource,mathml} % \begin{syntax} % AF = \meta{object name}\\ % AFref = \meta{object reference}\\ % AF-inline = \meta{text content}\\ % \end{syntax} % These keys allows to reference an associated file in the structure element. % The value \meta{object name} should be the name of an object pointing % to the \texttt{/Filespec} dictionary as expected by % |\pdf_object_ref:n| from a current \texttt{l3kernel}. % % The value |AF-inline| is some text, % which is embedded in the PDF as a text file with mime type text/plain. % |AF-inline-o| is like |AF-inline| but expands the value once. % % Future versions will perhaps extend this to more mime types, but it is % still a research task to find out what is really needed. % % |texsource| is a special variant of |AF-inline-o| which embeds the file % as |.tex| source with the |/AFrelationship| key set to |/Source|. It also sets the |/Desc| key % to a (currently) fix text. % % |mathml| is a special variant of |AF-inline-o| which embeds the file % as |.xml| file with the |/AFrelationship| key set to |/Supplement|. % It also sets the |/Desc| key to a (currently) fix text. % % The argument of |AF| is an object name referring an embedded file as declared for example with % \cs{pdf_object_new:n} or with the l3pdffile module. |AF| expands its argument % (this allows e.g. to use some variable for automatic numbering) % and can be used more than once, to associate more than one file. % % The argument of |AFref| is an object reference to an embedded file % or a variable expanding to such a object reference in the format % as you would get e.g. from \cs{pdf_object_ref_last:} or \cs{pdf_object_ref:n} % (and which is different for the various engines!). The key allows to make % use of anonymous objects. Like |AF| the |AFref| key expands its argument % and can be used more than once, to associate more than one file. \emph{It % does not check if the reference is valid!} % % The inline keys can be used only once per structure. Additional calls are ignored. % \end{function} % % \begin{function}{attribute (struct-key)} % This key takes as argument a comma list of attribute names % (use braces to protect the commas from the external key-val parser) % and allows to add one or more attribute dictionary entries in % the structure object. As an example % \begin{verbatim} % \tagstructbegin{tag=TH,attribute= TH-row} % \end{verbatim} % Attribute names and their content must be declared first in \cs{tagpdfsetup}. % % \end{function} % % \begin{function}{attribute-class (struct-key)} % This key takes as argument a comma list of attribute class names % (use braces to protect the commas from the external key-val parser) % and allows to add one or more attribute classes to the structure object. % % Attribute class names and their content % must be declared first in \cs{tagpdfsetup}. % \end{function} % \subsection{Setup keys} % \begin{function}{role/new-attribute (setup-key), newattribute (deprecated)} % \begin{syntax} % role/new-attribute = \Arg{name}\Arg{Content} % \end{syntax} % This key can be used in the setup command \cs{tagpdfsetup} and allow to declare a % new attribute, which can be used as attribute or attribute class. % The value are two brace groups, the first contains the name, the second the content. % \begin{verbatim} % \tagpdfsetup % { % role/new-attribute = % {TH-col}{/O /Table /Scope /Column}, % role/new-attribute = % {TH-row}{/O /Table /Scope /Row}, % } % \end{verbatim} % % \end{function} % \begin{function}{root-AF (setup-key)} % \begin{syntax} % root-AF = \meta{object name} % \end{syntax} % This key can be used in the setup command \cs{tagpdfsetup} and allows % to add associated files to the root structure. Like |AF| it can be used more than % once to add more than one file. % \end{function} % \end{documentation} % \begin{implementation} % \begin{macrocode} %<@@=tag> %<*header> \ProvidesExplPackage {tagpdf-struct-code} {2024-04-12} {0.99b} {part of tagpdf - code related to storing structure} % % \end{macrocode} % \section{Variables} % \begin{variable}{\c@g_@@_struct_abs_int} % Every structure will have a unique, absolute number. % I will use a latex counter for the structure count % to have a chance to avoid double structures in align etc. % % \begin{macrocode} %\newcounter { g_@@_struct_abs_int } %\int_gset:Nn \c@g_@@_struct_abs_int { 1 } % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_struct_objR_seq} % a sequence to store mapping between the % structure number and the object number. % We assume that structure numbers are assign % consecutively and so the index of the seq can be used. % A seq allows easy mapping over the structures. % \begin{macrocode} %<*package> \@@_seq_new:N \g_@@_struct_objR_seq % \end{macrocode} % \end{variable} % \begin{variable}{\c_@@_struct_null_tl} % In lua mode we have to test if the kids a null % \begin{macrocode} \tl_const:Nn\c_@@_struct_null_tl {null} % \end{macrocode} % \end{variable} % \begin{variable}{\g_@@_struct_cont_mc_prop} % in generic mode it can happen after % a page break that we have to inject into a structure % sequence an additional mc after. We will store this additional % info in a property. The key is the absolut mc num, the value the pdf directory. % \begin{macrocode} \@@_prop_new:N \g_@@_struct_cont_mc_prop % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_struct_stack_seq} % A stack sequence for the structure stack. % When a sequence is opened it's number is put on the stack. % \begin{macrocode} \seq_new:N \g_@@_struct_stack_seq \seq_gpush:Nn \g_@@_struct_stack_seq {1} % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_struct_tag_stack_seq} % We will perhaps also need the tags. While it is possible to get them from the % numbered stack, lets build a tag stack too. % \begin{macrocode} \seq_new:N \g_@@_struct_tag_stack_seq \seq_gpush:Nn \g_@@_struct_tag_stack_seq {{Root}{StructTreeRoot}} % \end{macrocode} % \end{variable} % % % \begin{variable}{\g_@@_struct_stack_current_tl,\l_@@_struct_stack_parent_tmpa_tl} % The global variable will hold the current structure number. It is already % defined in \texttt{tagpdf-base}. % The local temporary variable will hold the parent when we fetch it from the stack. % \begin{macrocode} % %\tl_new:N \g_@@_struct_stack_current_tl %\tl_gset:Nn \g_@@_struct_stack_current_tl {\int_use:N\c@g_@@_struct_abs_int} %<*package> \tl_new:N \l_@@_struct_stack_parent_tmpa_tl % \end{macrocode} % \end{variable} % % I will need at least one structure: the StructTreeRoot % normally it should have only one kid, e.g. the document element. % The data of the StructTreeRoot and the StructElem are in properties: % |\g_@@_struct_1_prop| for the root and % |\g_@@_struct_N_prop|, $N \geq =2$ for the other. % % This creates quite a number of properties, so perhaps we will have to % do this more efficiently in the future. % % All properties have at least the keys % \begin{description} % \item[Type] StructTreeRoot or StructElem % \end{description} % and the keys from the two following lists % (the root has a special set of properties). % the values of the prop should be already escaped properly % when the entries are created (title,lange,alt,E,actualtext) % \begin{variable} % { % \c_@@_struct_StructTreeRoot_entries_seq, % \c_@@_struct_StructElem_entries_seq % } % These seq contain the keys we support in the two object types. % They are currently no longer used, but are provided as documentation and % for potential future checks. % They should be adapted if there are changes in the PDF format. % \begin{macrocode} \seq_const_from_clist:Nn \c_@@_struct_StructTreeRoot_entries_seq {%p. 857/858 Type, % always /StructTreeRoot K, % kid, dictionary or array of dictionaries IDTree, % currently unused ParentTree, % required,obj ref to the parent tree ParentTreeNextKey, % optional RoleMap, ClassMap, Namespaces, AF %pdf 2.0 } \seq_const_from_clist:Nn \c_@@_struct_StructElem_entries_seq {%p 858 f Type, %always /StructElem S, %tag/type P, %parent ID, %optional Ref, %optional, pdf 2.0 Use? Pg, %obj num of starting page, optional K, %kids A, %attributes, probably unused C, %class "" %R, %attribute revision number, irrelevant for us as we % don't update/change existing PDF and (probably) % deprecated in PDF 2.0 T, %title, value in () or <> Lang, %language Alt, % value in () or <> E, % abreviation ActualText, AF, %pdf 2.0, array of dict, associated files NS, %pdf 2.0, dict, namespace PhoneticAlphabet, %pdf 2.0 Phoneme %pdf 2.0 } % \end{macrocode} % \end{variable} % % \subsection{Variables used by the keys} % \begin{variable}{\g_@@_struct_tag_tl,\g_@@_struct_tag_NS_tl, % \l_@@_struct_roletag_tl,\g_@@_struct_roletag_NS_tl} % Use by the tag key to store the tag and the namespace. % The role tag variables will hold locally rolemapping info needed % for the parent-child checks % \begin{macrocode} \tl_new:N \g_@@_struct_tag_tl \tl_new:N \g_@@_struct_tag_NS_tl \tl_new:N \l_@@_struct_roletag_tl \tl_new:N \l_@@_struct_roletag_NS_tl % \end{macrocode} % \end{variable} % \begin{variable}{\l_@@_struct_key_label_tl} % This will hold the label value. % \begin{macrocode} \tl_new:N \l_@@_struct_key_label_tl % \end{macrocode} % \end{variable} % \begin{variable}{\l_@@_struct_elem_stash_bool} % This will keep track of the stash status % \begin{macrocode} \bool_new:N \l_@@_struct_elem_stash_bool % \end{macrocode} % \end{variable} % % \subsection{Variables used by tagging code of basic elements} % % \begin{variable}{\g_@@_struct_dest_num_prop} % This variable records for (some or all, not clear yet) % destination names the related structure number to allow % to reference them in a Ref. The key is the destination. % It is currently used by the toc-tagging and sec-tagging code. % \begin{macrocode} % %\prop_new_linked:N \g_@@_struct_dest_num_prop %<*package> % \end{macrocode} % \end{variable} % % \begin{variable}{\g_@@_struct_ref_by_dest_prop} % This variable contains structures whose Ref key should be updated % at the end to point to structured related with this destination. % As this is probably need in other places too, it is not only a toc-variable. % \begin{macrocode} \prop_new_linked:N \g_@@_struct_ref_by_dest_prop % \end{macrocode} % \end{variable} % % \section{Commands} % % The properties must be in some places handled expandably. % So I need an output handler for each prop, to get expandable output % see \url{https://tex.stackexchange.com/questions/424208}. % There is probably room here for a more efficient implementation. % TODO check if this can now be implemented with the pdfdict commands. % The property contains currently non pdf keys, but e.g. object numbers are % perhaps no longer needed as we have named object anyway. % % \begin{macro}{\@@_struct_output_prop_aux:nn,\@@_new_output_prop_handler:n} % \begin{macrocode} \cs_new:Npn \@@_struct_output_prop_aux:nn #1 #2 %#1 num, #2 key { \prop_if_in:cnT { g_@@_struct_#1_prop } { #2 } { \c_space_tl/#2~ \prop_item:cn{ g_@@_struct_#1_prop } { #2 } } } \cs_new_protected:Npn \@@_new_output_prop_handler:n #1 { \cs_new:cn { @@_struct_output_prop_#1:n } { \@@_struct_output_prop_aux:nn {#1}{##1} } } % % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_struct_prop_gput:nnn} % The structure props must be filled in various places. % For this we use a common command which also takes care of the debug package: % \begin{macrocode} %<*package|debug> %\cs_new_protected:Npn \@@_struct_prop_gput:nnn #1 #2 #3 %\cs_set_protected:Npn \@@_struct_prop_gput:nnn #1 #2 #3 { \@@_prop_gput:cnn { g_@@_struct_#1_prop }{#2}{#3} %\prop_gput:cnn { g_@@_struct_debug_#1_prop } {#2} {#3} } \cs_generate_variant:Nn \@@_struct_prop_gput:nnn {nne,nee,nno} % % \end{macrocode} % \end{macro} % \subsection{Initialization of the StructTreeRoot} % The first structure element, the StructTreeRoot is special, so % created manually. The underlying object is |@@/struct/1| which is currently % created in the tree code (TODO move it here). % The |ParentTree| and |RoleMap| entries are added at begin document % in the tree code as they refer to object which are setup in other parts of the % code. This avoid timing issues. % % \begin{macrocode} %<*package> \tl_gset:Nn \g_@@_struct_stack_current_tl {1} % \end{macrocode} % \begin{macro}{\@@_pdf_name_e:n} % \begin{macrocode} \cs_new:Npn \@@_pdf_name_e:n #1{\pdf_name_from_unicode_e:n{#1}} % % \end{macrocode} % \end{macro} % % \begin{variable}{g_@@_struct_1_prop,g_@@_struct_kids_1_seq} % \begin{macrocode} %<*package> \@@_prop_new:c { g_@@_struct_1_prop } \@@_new_output_prop_handler:n {1} \@@_seq_new:c { g_@@_struct_kids_1_seq } \@@_struct_prop_gput:nne { 1 } { Type } { \pdf_name_from_unicode_e:n {StructTreeRoot} } \@@_struct_prop_gput:nne { 1 } { S } { \pdf_name_from_unicode_e:n {StructTreeRoot} } \@@_struct_prop_gput:nne { 1 } { rolemap } { {StructTreeRoot}{pdf} } \@@_struct_prop_gput:nne { 1 } { parentrole } { {StructTreeRoot}{pdf} } % \end{macrocode} % Namespaces are pdf 2.0. % If the code moves into the kernel, the setting must be probably delayed. % \begin{macrocode} \pdf_version_compare:NnF < {2.0} { \@@_struct_prop_gput:nne { 1 } { Namespaces } { \pdf_object_ref:n { @@/tree/namespaces } } } % % \end{macrocode} % In debug mode we have to copy the root manually as it is already setup: % \begin{macrocode} %\prop_new:c { g_@@_struct_debug_1_prop } %\seq_new:c { g_@@_struct_debug_kids_1_seq } %\prop_gset_eq:cc { g_@@_struct_debug_1_prop }{ g_@@_struct_1_prop } %\prop_gremove:cn { g_@@_struct_debug_1_prop }{Namespaces} % \end{macrocode} % \end{variable} % % \subsection{Adding the /ID key} % Every structure gets automatically an ID which is currently % simply calculated from the structure number. % \begin{macro}{\@@_struct_get_id:n} % \begin{macrocode} %<*package> \cs_new:Npn \@@_struct_get_id:n #1 %#1=struct num { ( ID. \prg_replicate:nn { \int_abs:n{\g_@@_tree_id_pad_int - \tl_count:e { \int_to_arabic:n { #1 } }} } { 0 } \int_to_arabic:n { #1 } ) } % \end{macrocode} % \end{macro} % % \subsection{Filling in the tag info} % \begin{macro}{\@@_struct_set_tag_info:nnn } % This adds or updates the tag info to a structure given by a number. % We need also the original data, so we store both. % \begin{macrocode} \pdf_version_compare:NnTF < {2.0} { \cs_new_protected:Npn \@@_struct_set_tag_info:nnn #1 #2 #3 %#1 structure number, #2 tag, #3 NS { \@@_struct_prop_gput:nne { #1 } { S } { \pdf_name_from_unicode_e:n {#2} } % } } { \cs_new_protected:Npn \@@_struct_set_tag_info:nnn #1 #2 #3 { \@@_struct_prop_gput:nne { #1 } { S } { \pdf_name_from_unicode_e:n {#2} } % \prop_get:NnNT \g_@@_role_NS_prop {#3} \l_@@_get_tmpc_tl { \@@_struct_prop_gput:nne { #1 } { NS } { \l_@@_get_tmpc_tl } % } } } \cs_generate_variant:Nn \@@_struct_set_tag_info:nnn {eVV} % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_struct_get_parentrole:nNN} % We also need a way to get the tag info needed for parent child % check from parent structures. % \begin{macrocode} \cs_new_protected:Npn \@@_struct_get_parentrole:nNN #1 #2 #3 %#1 struct num, #2 tlvar for tag , #3 tlvar for NS { \prop_get:cnNTF { g_@@_struct_#1_prop } { parentrole } \l_@@_get_tmpc_tl { \tl_set:Ne #2{\exp_last_unbraced:NV\use_i:nn \l_@@_get_tmpc_tl} \tl_set:Ne #3{\exp_last_unbraced:NV\use_ii:nn \l_@@_get_tmpc_tl} } { \tl_clear:N#2 \tl_clear:N#3 } } \cs_generate_variant:Nn\@@_struct_get_parentrole:nNN {eNN} % \end{macrocode} % \end{macro} % \subsection{Handlings kids} % Commands to store the kids. Kids in a structure can be a reference to a mc-chunk, % an object reference to another structure element, or a object reference to an % annotation (through an OBJR object). % \begin{macro}{\@@_struct_kid_mc_gput_right:nn,\@@_struct_kid_mc_gput_right:ne} % The command to store an mc-chunk, this is a dictionary of type MCR. % It would be possible to write out the content directly as unnamed object % and to store only the object reference, but probably this would be slower, % and the PDF is more readable like this. % The code doesn't try to avoid the use of the /Pg key by checking page numbers. % That imho only slows down without much gain. % In generic mode the page break code will perhaps to have to insert % an additional mcid after an existing one. For this we use a property list % At first an auxiliary to write the MCID dict. This should normally be expanded! % \begin{macrocode} \cs_new:Npn \@@_struct_mcid_dict:n #1 %#1 MCID absnum { << /Type \c_space_tl /MCR \c_space_tl /Pg \c_space_tl \pdf_pageobject_ref:n { \@@_property_ref:enn{mcid-#1}{tagabspage}{1} } /MCID \c_space_tl \@@_property_ref:enn{mcid-#1}{tagmcid}{1} >> } % % \end{macrocode} % \begin{macrocode} %<*package|debug> %\cs_new_protected:Npn \@@_struct_kid_mc_gput_right:nn #1 #2 %#1 structure num, #2 MCID absnum% %\cs_set_protected:Npn \@@_struct_kid_mc_gput_right:nn #1 #2 %#1 structure num, #2 MCID absnum% { \@@_seq_gput_right:ce { g_@@_struct_kids_#1_seq } { \@@_struct_mcid_dict:n {#2} } % \seq_gput_right:cn % { g_@@_struct_debug_kids_#1_seq } % { % MC~#2 % } \@@_seq_gput_right:cn { g_@@_struct_kids_#1_seq } { \prop_item:Nn \g_@@_struct_cont_mc_prop {#2} } } %\cs_generate_variant:Nn \@@_struct_kid_mc_gput_right:nn {ne} % \end{macrocode} % \end{macro} % \begin{macro} % { % \@@_struct_kid_struct_gput_right:nn,\@@_struct_kid_struct_gput_right:ee % } % This commands adds a structure as kid. We only need to record the object % reference in the sequence. % \begin{macrocode} %\cs_new_protected:Npn\@@_struct_kid_struct_gput_right:nn #1 #2 %#1 num of parent struct, #2 kid struct %\cs_set_protected:Npn\@@_struct_kid_struct_gput_right:nn #1 #2 %#1 num of parent struct, #2 kid struct { \@@_seq_gput_right:ce { g_@@_struct_kids_#1_seq } { \pdf_object_ref_indexed:nn { @@/struct }{ #2 } } % \seq_gput_right:cn % { g_@@_struct_debug_kids_#1_seq } % { % Struct~#2 % } } %\cs_generate_variant:Nn \@@_struct_kid_struct_gput_right:nn {ee} % \end{macrocode} % \end{macro} % \begin{macro} % {\@@_struct_kid_OBJR_gput_right:nnn,\@@_struct_kid_OBJR_gput_right:eee} % At last the command to add an OBJR object. This has to write an object first. % The first argument is the number of the parent structure, the second the % (expanded) object reference of the annotation. The last argument is the page % object reference % % \begin{macrocode} %\cs_new_protected:Npn\@@_struct_kid_OBJR_gput_right:nnn #1 #2 #3 %#1 num of parent struct, % %#2 obj reference % %#3 page object reference %\cs_set_protected:Npn\@@_struct_kid_OBJR_gput_right:nnn #1 #2 #3 { \pdf_object_unnamed_write:nn { dict } { /Type/OBJR/Obj~#2/Pg~#3 } \@@_seq_gput_right:ce { g_@@_struct_kids_#1_seq } { \pdf_object_ref_last: } % \seq_gput_right:ce % { g_@@_struct_debug_kids_#1_seq } % { % OBJR~reference % } } % %<*package> \cs_generate_variant:Nn\@@_struct_kid_OBJR_gput_right:nnn { eee } % \end{macrocode} % \end{macro} % \begin{macro} % {\@@_struct_exchange_kid_command:N, \@@_struct_exchange_kid_command:c} % In luamode it can happen that a single kid in a structure is split at a page % break into two or more mcid. In this case the lua code has to convert % put the dictionary of the kid into an array. See issue 13 at tagpdf repo. % We exchange the dummy command for the kids to mark this case. % Change 2024-03-19: don't use a regex - that is slow. % \begin{macrocode} \cs_new_protected:Npn\@@_struct_exchange_kid_command:N #1 %#1 = seq var { \seq_gpop_left:NN #1 \l_@@_tmpa_tl \tl_replace_once:Nnn \l_@@_tmpa_tl {\@@_mc_insert_mcid_kids:n} {\@@_mc_insert_mcid_single_kids:n} \seq_gput_left:NV #1 \l_@@_tmpa_tl } \cs_generate_variant:Nn\@@_struct_exchange_kid_command:N { c } % \end{macrocode} % \end{macro} % \begin{macro}{ \@@_struct_fill_kid_key:n } % This command adds the kid info to the K entry. In lua mode the % content contains commands which are expanded later. The argument is the structure % number. % % \begin{macrocode} \cs_new_protected:Npn \@@_struct_fill_kid_key:n #1 %#1 is the struct num { \bool_if:NF\g_@@_mode_lua_bool { \seq_clear:N \l_@@_tmpa_seq \seq_map_inline:cn { g_@@_struct_kids_#1_seq } { \seq_put_right:Ne \l_@@_tmpa_seq { ##1 } } %\seq_show:c { g_@@_struct_kids_#1_seq } %\seq_show:N \l_@@_tmpa_seq \seq_remove_all:Nn \l_@@_tmpa_seq {} %\seq_show:N \l_@@_tmpa_seq \seq_gset_eq:cN { g_@@_struct_kids_#1_seq } \l_@@_tmpa_seq } \int_case:nnF { \seq_count:c { g_@@_struct_kids_#1_seq } } { { 0 } { } %no kids, do nothing { 1 } % 1 kid, insert { % in this case we need a special command in % luamode to get the array right. See issue #13 \bool_if:NTF\g_@@_mode_lua_bool { \@@_struct_exchange_kid_command:c {g_@@_struct_kids_#1_seq} % \end{macrocode} % check if we get null % \begin{macrocode} \tl_set:Ne\l_@@_tmpa_tl {\use:e{\seq_item:cn {g__tag_struct_kids_#1_seq} {1}}} \tl_if_eq:NNF\l__tag_tmpa_tl \c_@@_struct_null_tl { \@@_struct_prop_gput:nne {#1} {K} { \seq_item:cn { g_@@_struct_kids_#1_seq } {1} } } } { \@@_struct_prop_gput:nne {#1} {K} { \seq_item:cn { g_@@_struct_kids_#1_seq } {1} } } } % } { %many kids, use an array \@@_struct_prop_gput:nne {#1} {K} { [ \seq_use:cn { g_@@_struct_kids_#1_seq } { \c_space_tl } ] } } } % \end{macrocode} % \end{macro} % \subsection{Output of the object} % \begin{macro}{\@@_struct_get_dict_content:nN} % This maps the dictionary content of a structure into a tl-var. % Basically it does what |\pdfdict_use:n| does. % This is used a lot so should be rather fast. % \begin{macrocode} \cs_new_protected:Npn \@@_struct_get_dict_content:nN #1 #2 %#1: stucture num { \tl_clear:N #2 \prop_map_inline:cn { g_@@_struct_#1_prop } { \tl_put_right:Ne #2 { % \end{macrocode} % Some keys needs the option to format the value, e.g. add brackets for an % array, we also need the option to ignore some entries in the properties. % \begin{macrocode} \cs_if_exist_use:cTF {@@_struct_format_##1:nn} {{##1}{##2}} {\c_space_tl/##1~##2} } } } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_struct_format_rolemap:nn,\@@_struct_format_parentrole:nn} % This two entries should not end in the PDF. % \begin{macrocode} \cs_new:Nn\@@_struct_format_rolemap:nn{} \cs_new:Nn\@@_struct_format_parentrole:nn{} % \end{macrocode} % \end{macro} % \begin{macro}{\@@_struct_format_Ref:nn} % Ref is an array, we store only the content to be able to extend it % so the formatting command adds the brackets: % \begin{macrocode} \cs_new:Nn\__tag_struct_format_Ref:nn{\c_space_tl/#1~[#2]} % \end{macrocode} % \end{macro} % \begin{macro}{\@@_struct_write_obj:n} % This writes out the structure object. % This is done in the finish code, in the tree module and % guarded by the tree boolean. % \begin{macrocode} \cs_new_protected:Npn \@@_struct_write_obj:n #1 % #1 is the struct num { \prop_if_exist:cTF { g_@@_struct_#1_prop } { % \end{macrocode} % It can happen that a structure is not used and so has not parent. % Simply ignoring it is problematic as it is also recorded in % the IDTree, so we make an artifact out of it. % \begin{macrocode} \prop_get:cnNF { g_@@_struct_#1_prop } {P}\l_@@_tmpb_tl { \prop_gput:cne { g_@@_struct_#1_prop } {P}{\pdf_object_ref_indexed:nn { @@/struct }{1}} \prop_gput:cne { g_@@_struct_#1_prop } {S}{/Artifact} \seq_if_empty:cF {g_@@_struct_kids_#1_seq} { \msg_warning:nnee {tag} {struct-orphan} { #1 } {\seq_count:c{g_@@_struct_kids_#1_seq}} } } \@@_struct_fill_kid_key:n { #1 } \@@_struct_get_dict_content:nN { #1 } \l_@@_tmpa_tl \pdf_object_write_indexed:nnne { @@/struct }{ #1 } {dict} { \l_@@_tmpa_tl\c_space_tl /ID~\@@_struct_get_id:n{#1} } } { \msg_error:nnn { tag } { struct-no-objnum } { #1} } } % \end{macrocode} % \end{macro} % \begin{macro}{\@@_struct_insert_annot:nn} % This is the command to insert an annotation into the structure. % It can probably be used for xform too. % % Annotations used as structure content must % \begin{enumerate} % \item add a StructParent integer to their dictionary % \item push the object reference as OBJR object in the structure % \item Add a Structparent/obj-nr reference to the parent tree. % \end{enumerate} % For a link this looks like this % \begin{verbatim} % \tag_struct_begin:n { tag=Link } % \tag_mc_begin:n { tag=Link } % (1) \pdfannot_dict_put:nne % { link/URI } % { StructParent } % { \int_use:N\c@g_@@_parenttree_obj_int } % link text % (2+3) \@@_struct_insert_annot:nn {obj ref}{parent num} % \tag_mc_end: % \tag_struct_end: % \end{verbatim} % \begin{macrocode} \cs_new_protected:Npn \@@_struct_insert_annot:nn #1 #2 %#1 object reference to the annotation/xform %#2 structparent number { \bool_if:NT \g_@@_active_struct_bool { %get the number of the parent structure: \seq_get:NNF \g_@@_struct_stack_seq \l_@@_struct_stack_parent_tmpa_tl { \msg_error:nn { tag } { struct-faulty-nesting } } %put the obj number of the annot in the kid entry, this also creates %the OBJR object \@@_property_record:nn {@tag@objr@page@#2 }{ tagabspage } \@@_struct_kid_OBJR_gput_right:eee { \l_@@_struct_stack_parent_tmpa_tl } { #1 % } { \pdf_pageobject_ref:n { \@@_property_ref:nnn {@tag@objr@page@#2 }{ tagabspage }{1} } } % add the parent obj number to the parent tree: \exp_args:Nne \@@_parenttree_add_objr:nn { #2 } { \pdf_object_ref_indexed:nn { @@/struct }{ \l_@@_struct_stack_parent_tmpa_tl } } % increase the int: \int_gincr:N \c@g_@@_parenttree_obj_int } } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_get_data_struct_tag:} % this command allows \cs{tag_get:n} to get the current % structure tag with the keyword |struct_tag|. % \begin{macrocode} \cs_new:Npn \@@_get_data_struct_tag: { \exp_args:Ne \tl_tail:n { \prop_item:cn {g_@@_struct_\g_@@_struct_stack_current_tl _prop}{S} } } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_get_data_struct_id:} % this command allows \cs{tag_get:n} to get the current % structure id with the keyword |struct_id|. % \begin{macrocode} \cs_new:Npn \@@_get_data_struct_id: { \@@_struct_get_id:n {\g_@@_struct_stack_current_tl} } % % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_get_data_struct_num:} % this command allows \cs{tag_get:n} to get the current % structure number with the keyword |struct_num|. We will need to handle nesting % \begin{macrocode} %<*base> \cs_new:Npn \@@_get_data_struct_num: { \g_@@_struct_stack_current_tl } % % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_get_data_struct_counter:} % this command allows \cs{tag_get:n} to get the current % state of the structure counter with the keyword |struct_counter|. % By comparing the numbers it can be used to check the number of % structure commands in a piece of code. % \begin{macrocode} %<*base> \cs_new:Npn \@@_get_data_struct_counter: { \int_use:N \c@g_@@_struct_abs_int } % % \end{macrocode} % \end{macro} % \section{Keys} % This are the keys for the user commands. % we store the tag in a variable. But we should be careful, it is only reliable % at the begin. % % This socket is used by the tag key. It allows to switch between % the latex-tabs and the standard tags. % \begin{macrocode} %<*package> \socket_new:nn { tag/struct/tag }{1} \socket_new_plug:nnn { tag/struct/tag }{ latex-tags } { \seq_set_split:Nne \l_@@_tmpa_seq { / } {#1/\prop_item:Ne\g__tag_role_tags_NS_prop{#1}} \tl_gset:Ne \g_@@_struct_tag_tl { \seq_item:Nn\l_@@_tmpa_seq {1} } \tl_gset:Ne \g_@@_struct_tag_NS_tl{ \seq_item:Nn\l_@@_tmpa_seq {2} } \@@_check_structure_tag:N \g_@@_struct_tag_tl } \socket_new_plug:nnn { tag/struct/tag }{ pdf-tags } { \seq_set_split:Nne \l_@@_tmpa_seq { / } {#1/\prop_item:Ne\g_@@_role_tags_NS_prop{#1}} \tl_gset:Ne \g_@@_struct_tag_tl { \seq_item:Nn\l_@@_tmpa_seq {1} } \tl_gset:Ne \g_@@_struct_tag_NS_tl{ \seq_item:Nn\l_@@_tmpa_seq {2} } \@@_role_get:VVNN \g_@@_struct_tag_tl\g_@@_struct_tag_NS_tl\l_@@_tmpa_tl\l_@@_tmpb_tl \tl_gset:Ne \g_@@_struct_tag_tl {\l_@@_tmpa_tl} \tl_gset:Ne \g_@@_struct_tag_NS_tl{\l_@@_tmpb_tl} \@@_check_structure_tag:N \g_@@_struct_tag_tl } \socket_assign_plug:nn { tag/struct/tag } {latex-tags} % \end{macrocode} % \begin{macro} % { % label (struct-key), % stash (struct-key), % parent (struct-key), % tag (struct-key), % title (struct-key), % title-o (struct-key), % alt (struct-key), % actualtext (struct-key), % lang (struct-key), % ref (struct-key), % E (struct-key) % } % \begin{macrocode} \keys_define:nn { @@ / struct } { label .tl_set:N = \l_@@_struct_key_label_tl, stash .bool_set:N = \l_@@_struct_elem_stash_bool, parent .code:n = { \bool_lazy_and:nnTF { \prop_if_exist_p:c { g_@@_struct_\int_eval:n {#1}_prop } } { \int_compare_p:nNn {#1}<{\c@g_@@_struct_abs_int} } { \tl_set:Ne \l_@@_struct_stack_parent_tmpa_tl { \int_eval:n {#1} } } { \msg_warning:nnee { tag } { struct-unknown } { \int_eval:n {#1} } { parent~key~ignored } } }, parent .default:n = {-1}, tag .code:n = % S property { \socket_use:nn { tag/struct/tag }{#1} }, title .code:n = % T property { \str_set_convert:Nnnn \l_@@_tmpa_str { #1 } { default } { utf16/hex } \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { T } { <\l_@@_tmpa_str> } }, title-o .code:n = % T property { \str_set_convert:Nonn \l_@@_tmpa_str { #1 } { default } { utf16/hex } \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { T } { <\l_@@_tmpa_str> } }, alt .code:n = % Alt property { \tl_if_empty:oF{#1} { \str_set_convert:Noon \l_@@_tmpa_str { #1 } { default } { utf16/hex } \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { Alt } { <\l_@@_tmpa_str> } } }, alttext .meta:n = {alt=#1}, actualtext .code:n = % ActualText property { \tl_if_empty:oF{#1} { \str_set_convert:Noon \l_@@_tmpa_str { #1 } { default } { utf16/hex } \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { ActualText } { <\l_@@_tmpa_str>} } }, lang .code:n = % Lang property { \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { Lang } { (#1) } }, % \end{macrocode} % Ref is an array, the brackets are added through the formatting command. % \begin{macrocode} ref .code:n = % ref property { \tl_clear:N\l_@@_tmpa_tl \clist_map_inline:on {#1} { \tl_put_right:Ne \l_@@_tmpa_tl {~\@@_property_ref:en{tagpdfstruct-##1}{tagstructobj} } } \@@_struct_gput_data_ref:ee { \int_use:N \c@g_@@_struct_abs_int } {\l_@@_tmpa_tl} }, E .code:n = % E property { \str_set_convert:Nnon \l_@@_tmpa_str { #1 } { default } { utf16/hex } \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { E } { <\l_@@_tmpa_str> } }, } % \end{macrocode} % \end{macro} % \begin{macro}{AF (struct-key), AFref (struct-key), % AFinline (struct-key),AFinline-o (struct-key)} % keys for the AF keys (associated files). They use commands from l3pdffile! % The stream variants use txt as extension to get the mimetype. % TODO: check if this should be configurable. For math we will perhaps need another % extension. % AF/AFref is an array and can be used more than once, so we store it in a tl. % which is expanded. % AFinline currently uses the fix extention txt. % texsource is a special variant which creates a tex-file, it expects a % tl-var as value (e.g. from math grabbing) % % \begin{variable}{\g_@@_struct_AFobj_int} % This variable is used to number the AF-object names % \begin{macrocode} \int_new:N\g_@@_struct_AFobj_int % \end{macrocode} % \end{variable} % % \begin{macrocode} \cs_generate_variant:Nn \pdffile_embed_stream:nnN {neN} \cs_new_protected:Npn \@@_struct_add_inline_AF:nn #1 #2 % #1 content, #2 extension { \tl_if_empty:nF{#1} { \group_begin: \int_gincr:N \g_@@_struct_AFobj_int \pdffile_embed_stream:neN {#1} {tag-AFfile\int_use:N\g_@@_struct_AFobj_int.#2} \l_@@_tmpa_tl \@@_struct_add_AF:ee { \int_use:N \c@g_@@_struct_abs_int } { \l_@@_tmpa_tl } \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { AF } { [ \tl_use:c { g_@@_struct_\int_eval:n {\c@g_@@_struct_abs_int}_AF_tl } ] } \group_end: } } \cs_generate_variant:Nn \@@_struct_add_inline_AF:nn {on} % \end{macrocode} % % \begin{macrocode} \cs_new_protected:Npn \@@_struct_add_AF:nn #1 #2 % #1 struct num #2 object reference { \tl_if_exist:cTF { g_@@_struct_#1_AF_tl } { \tl_gput_right:ce { g_@@_struct_#1_AF_tl } { \c_space_tl #2 } } { \tl_new:c { g_@@_struct_#1_AF_tl } \tl_gset:ce { g_@@_struct_#1_AF_tl } { #2 } } } \cs_generate_variant:Nn \@@_struct_add_AF:nn {en,ee} \keys_define:nn { @@ / struct } { AF .code:n = % AF property { \pdf_object_if_exist:eTF {#1} { \@@_struct_add_AF:ee { \int_use:N \c@g_@@_struct_abs_int }{\pdf_object_ref:e {#1}} \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { AF } { [ \tl_use:c { g_@@_struct_\int_eval:n {\c@g_@@_struct_abs_int}_AF_tl } ] } } { % message? } }, AFref .code:n = % AF property { \tl_if_empty:eF {#1} { \@@_struct_add_AF:ee { \int_use:N \c@g_@@_struct_abs_int }{#1} \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { AF } { [ \tl_use:c { g_@@_struct_\int_eval:n {\c@g_@@_struct_abs_int}_AF_tl } ] } } }, ,AFinline .code:n = { \@@_struct_add_inline_AF:nn {#1}{txt} } ,AFinline-o .code:n = { \@@_struct_add_inline_AF:on {#1}{txt} } ,texsource .code:n = { \group_begin: \pdfdict_put:nnn { l_pdffile/Filespec } {Desc}{(TeX~source)} \pdfdict_put:nnn { l_pdffile/Filespec }{AFRelationship} { /Source } \@@_struct_add_inline_AF:on {#1}{tex} \group_end: } ,mathml .code:n = { \group_begin: \pdfdict_put:nnn { l_pdffile/Filespec } {Desc}{(mathml~representation)} \pdfdict_put:nnn { l_pdffile/Filespec }{AFRelationship} { /Supplement } \@@_struct_add_inline_AF:on {#1}{xml} \group_end: } } % \end{macrocode} % \end{macro} % \begin{macro}{root-AF (setup-key)} % The root structure can take AF keys too, so we provide a key for it. % This key is used with |\tagpdfsetup|, not in a structure! % \begin{macrocode} \keys_define:nn { @@ / setup } { root-AF .code:n = { \pdf_object_if_exist:nTF {#1} { \@@_struct_add_AF:ee { 1 }{\pdf_object_ref:n {#1}} \@@_struct_prop_gput:nne { 1 } { AF } { [ \tl_use:c { g_@@_struct_1_AF_tl } ] } } { } }, } % \end{macrocode} % \end{macro} % \section{User commands} % We allow to set a language by default % \begin{macro}{\l_@@_struct_lang_tl} % \begin{macrocode} \tl_new:N \l_@@_struct_lang_tl % % \end{macrocode} % \end{macro} % % \begin{macro}{\tag_struct_begin:n,\tag_struct_end:} % \begin{macrocode} %\cs_new_protected:Npn \tag_struct_begin:n #1 {\int_gincr:N \c@g_@@_struct_abs_int} %\cs_new_protected:Npn \tag_struct_end:{} %\cs_new_protected:Npn \tag_struct_end:n{} %<*package|debug> %\cs_set_protected:Npn \tag_struct_begin:n #1 %#1 key-val %\cs_set_protected:Npn \tag_struct_begin:n #1 %#1 key-val { %\@@_check_if_active_struct:T %\@@_check_if_active_struct:TF { \group_begin: \int_gincr:N \c@g_@@_struct_abs_int \@@_prop_new:c { g_@@_struct_\int_eval:n { \c@g_@@_struct_abs_int }_prop } % \prop_new:c { g_@@_struct_debug_\int_eval:n {\c@g_@@_struct_abs_int}_prop } \@@_new_output_prop_handler:n {\int_eval:n { \c@g_@@_struct_abs_int }} \@@_seq_new:c { g_@@_struct_kids_\int_eval:n { \c@g_@@_struct_abs_int }_seq} % \seq_new:c { g_@@_struct_debug_kids_\int_eval:n {\c@g_@@_struct_abs_int}_seq } \pdf_object_new_indexed:nn { @@/struct } { \c@g_@@_struct_abs_int } \@@_struct_prop_gput:nnn { \int_use:N \c@g_@@_struct_abs_int } { Type } { /StructElem } \tl_if_empty:NF \l_@@_struct_lang_tl { \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { Lang } { (\l_@@_struct_lang_tl) } } \@@_struct_prop_gput:nnn { \int_use:N \c@g_@@_struct_abs_int } { Type } { /StructElem } \tl_set:Nn \l_@@_struct_stack_parent_tmpa_tl {-1} \keys_set:nn { @@ / struct} { #1 } % \end{macrocode} % \begin{macrocode} \@@_struct_set_tag_info:eVV { \int_use:N \c@g_@@_struct_abs_int } \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl \@@_check_structure_has_tag:n { \int_use:N \c@g_@@_struct_abs_int } \tl_if_empty:NF \l_@@_struct_key_label_tl { \@@_property_record:eV {tagpdfstruct-\l_@@_struct_key_label_tl} \c_@@_property_struct_clist } % \end{macrocode} % The structure number of the parent is either taken from the stack or % has been set with the parent key. % \begin{macrocode} \int_compare:nNnT { \l_@@_struct_stack_parent_tmpa_tl } = { -1 } { \seq_get:NNF \g_@@_struct_stack_seq \l_@@_struct_stack_parent_tmpa_tl { \msg_error:nn { tag } { struct-faulty-nesting } } } \seq_gpush:NV \g_@@_struct_stack_seq \c@g_@@_struct_abs_int \@@_role_get:VVNN \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl \l_@@_struct_roletag_tl \l_@@_struct_roletag_NS_tl % \end{macrocode} % to target role and role NS % \begin{macrocode} \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { rolemap } { {\l_@@_struct_roletag_tl}{\l_@@_struct_roletag_NS_tl} } % \end{macrocode} % we also store which role to use for parent/child test. If the role is % one of Part, Div, NonStruct we have to retrieve it from the parent. % If the structure is stashed, this must be updated! % \begin{macrocode} \str_case:VnTF \l_@@_struct_roletag_tl { {Part} {} {Div} {} {NonStruct} {} } { \prop_get:cnNT { g_@@_struct_ \l_@@_struct_stack_parent_tmpa_tl _prop } { parentrole } \l_@@_get_tmpc_tl { \@@_struct_prop_gput:nno { \int_use:N \c@g_@@_struct_abs_int } { parentrole } { \l_@@_get_tmpc_tl } } } { \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { parentrole } { {\l_@@_struct_roletag_tl}{\l_@@_struct_roletag_NS_tl} } } % \end{macrocode} % \begin{macrocode} \seq_gpush:Ne \g_@@_struct_tag_stack_seq {{\g_@@_struct_tag_tl}{\l_@@_struct_roletag_tl}} \tl_gset:NV \g_@@_struct_stack_current_tl \c@g_@@_struct_abs_int %\seq_show:N \g_@@_struct_stack_seq \bool_if:NF \l_@@_struct_elem_stash_bool { % \end{macrocode} % check if the tag can be used inside the parent. It only makes sense, % if the structure is actually used here, so it is guarded by the stash boolean. % For now we ignore the namespace! % \begin{macrocode} \@@_struct_get_parentrole:eNN {\l_@@_struct_stack_parent_tmpa_tl} \l_@@_get_parent_tmpa_tl \l_@@_get_parent_tmpb_tl \@@_check_parent_child:VVVVN \l_@@_get_parent_tmpa_tl \l_@@_get_parent_tmpb_tl \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl \l_@@_parent_child_check_tl \int_compare:nNnT {\l_@@_parent_child_check_tl}<0 { \prop_get:cnN { g_@@_struct_ \l_@@_struct_stack_parent_tmpa_tl _prop} {S} \l_@@_tmpa_tl \msg_warning:nneee { tag } {role-parent-child} { \l_@@_get_parent_tmpa_tl/\l_@@_get_parent_tmpb_tl } { \g_@@_struct_tag_tl/\g_@@_struct_tag_NS_tl } { not~allowed~ (struct~\l_@@_struct_stack_parent_tmpa_tl,~\l_@@_tmpa_tl \c_space_tl-->~struct~\int_eval:n {\c@g_@@_struct_abs_int}) } \cs_set_eq:NN \l_@@_role_remap_tag_tl \g_@@_struct_tag_tl \cs_set_eq:NN \l_@@_role_remap_NS_tl \g_@@_struct_tag_NS_tl \@@_role_remap: \cs_gset_eq:NN \g_@@_struct_tag_tl \l_@@_role_remap_tag_tl \cs_gset_eq:NN \g_@@_struct_tag_NS_tl \l_@@_role_remap_NS_tl \@@_struct_set_tag_info:eVV { \int_use:N \c@g_@@_struct_abs_int } \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl } % \end{macrocode} % Set the Parent. % \begin{macrocode} \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { P } { \pdf_object_ref_indexed:nn { @@/struct} { \l_@@_struct_stack_parent_tmpa_tl } } % \end{macrocode} % \begin{macrocode} %record this structure as kid: %\tl_show:N \g_@@_struct_stack_current_tl %\tl_show:N \l_@@_struct_stack_parent_tmpa_tl \@@_struct_kid_struct_gput_right:ee { \l_@@_struct_stack_parent_tmpa_tl } { \g_@@_struct_stack_current_tl } %\prop_show:c { g_@@_struct_\g_@@_struct_stack_current_tl _prop } %\seq_show:c {g_@@_struct_kids_\l_@@_struct_stack_parent_tmpa_tl _seq} } % \end{macrocode} % the debug mode stores in second prop and replaces value with more suitable ones. % (If the structure is updated later this gets perhaps lost, but well ...) % This must be done outside of the stash boolean. % \begin{macrocode} % \prop_gset_eq:cc % { g_@@_struct_debug_\int_eval:n {\c@g_@@_struct_abs_int}_prop } % { g_@@_struct_\int_eval:n {\c@g_@@_struct_abs_int}_prop } % \prop_gput:cne % { g_@@_struct_debug_\int_eval:n {\c@g_@@_struct_abs_int}_prop } % { P } % { % \bool_if:NTF \l_@@_struct_elem_stash_bool % {no~parent:~stashed} % { % parent~structure:~\l_@@_struct_stack_parent_tmpa_tl\c_space_tl =~ % \prop_item:cn{ g__tag_struct_\l_@@_struct_stack_parent_tmpa_tl _prop }{S} % } % } % \prop_gput:cne % { g_@@_struct_debug_\int_eval:n {\c@g_@@_struct_abs_int}_prop } % { NS } % { \g_@@_struct_tag_NS_tl } % \end{macrocode} % \begin{macrocode} %\prop_show:c { g_@@_struct_\g_@@_struct_stack_current_tl _prop } %\seq_show:c {g_@@_struct_kids_\l_@@_struct_stack_parent_tmpa_tl _seq} % \@@_debug_struct_begin_insert:n { #1 } \group_end: } %{ \@@_debug_struct_begin_ignore:n { #1 }} } %\cs_set_protected:Nn \tag_struct_end: %\cs_set_protected:Nn \tag_struct_end: { %take the current structure num from the stack: %the objects are written later, lua mode hasn't all needed info yet %\seq_show:N \g_@@_struct_stack_seq %\@@_check_if_active_struct:T %\@@_check_if_active_struct:TF { \seq_gpop:NN \g_@@_struct_tag_stack_seq \l_@@_tmpa_tl \seq_gpop:NNTF \g_@@_struct_stack_seq \l_@@_tmpa_tl { \@@_check_info_closing_struct:o { \g_@@_struct_stack_current_tl } } { \@@_check_no_open_struct: } % get the previous one, shouldn't be empty as the root should be there \seq_get:NNTF \g_@@_struct_stack_seq \l_@@_tmpa_tl { \tl_gset:NV \g_@@_struct_stack_current_tl \l_@@_tmpa_tl } { \@@_check_no_open_struct: } \seq_get:NNT \g_@@_struct_tag_stack_seq \l_@@_tmpa_tl { \tl_gset:Ne \g_@@_struct_tag_tl { \exp_last_unbraced:NV\use_i:nn \l_@@_tmpa_tl } \prop_get:NVNT\g_@@_role_tags_NS_prop \g_@@_struct_tag_tl\l_@@_tmpa_tl { \tl_gset:Ne \g_@@_struct_tag_NS_tl { \l_@@_tmpa_tl } } } %\@@_debug_struct_end_insert: } %{\@@_debug_struct_end_ignore:} } \cs_set_protected:Npn \tag_struct_end:n #1 { % \@@_check_if_active_struct:T{\@@_debug_struct_end_check:n{#1}} \tag_struct_end: } % % \end{macrocode} % \end{macro} % \begin{macro}{\tag_struct_use:n} % This command allows to use a stashed structure in another place. % TODO: decide how it should be guarded. Probably by the struct-check. % \begin{macrocode} %\cs_new_protected:Npn \tag_struct_use:n #1 {} %<*package|debug> \cs_set_protected:Npn \tag_struct_use:n #1 %#1 is the label { \@@_check_if_active_struct:T { \prop_if_exist:cTF { g_@@_struct_\@@_property_ref:enn{tagpdfstruct-#1}{tagstruct}{unknown}_prop } % { \@@_check_struct_used:n {#1} %add the label structure as kid to the current structure (can be the root) \@@_struct_kid_struct_gput_right:ee { \g_@@_struct_stack_current_tl } { \@@_property_ref:enn{tagpdfstruct-#1}{tagstruct}{1} } %add the current structure to the labeled one as parents \@@_prop_gput:cne { g_@@_struct_\@@_property_ref:enn{tagpdfstruct-#1}{tagstruct}{1}_prop } { P } { \pdf_object_ref_indexed:nn { @@/struct } { \g_@@_struct_stack_current_tl } } % \end{macrocode} % debug code % \begin{macrocode} % \prop_gput:cne % { g_@@_struct_debug_\@@_property_ref:enn{tagpdfstruct-#1}{tagstruct}{1}_prop } % { P } % { % parent~structure:~\g_@@_struct_stack_current_tl\c_space_tl=~ % \g_@@_struct_tag_tl % } % \end{macrocode} % check if the tag is allowed as child. Here we have to retrieve the % tag info for the child, while the data for the parent is in % the global tl-vars: % \begin{macrocode} \@@_struct_get_parentrole:eNN {\@@_property_ref:enn{tagpdfstruct-#1}{tagstruct}{1}} \l_@@_tmpa_tl \l_@@_tmpb_tl \@@_check_parent_child:VVVVN \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl \l_@@_tmpa_tl \l_@@_tmpb_tl \l_@@_parent_child_check_tl \int_compare:nNnT {\l_@@_parent_child_check_tl}<0 { \cs_set_eq:NN \l_@@_role_remap_tag_tl \g_@@_struct_tag_tl \cs_set_eq:NN \l_@@_role_remap_NS_tl \g_@@_struct_tag_NS_tl \@@_role_remap: \cs_gset_eq:NN \g_@@_struct_tag_tl \l_@@_role_remap_tag_tl \cs_gset_eq:NN \g_@@_struct_tag_NS_tl \l_@@_role_remap_NS_tl \@@_struct_set_tag_info:eVV { \int_use:N \c@g_@@_struct_abs_int } \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl } } { \msg_warning:nnn{ tag }{struct-label-unknown}{#1} } } } % % \end{macrocode} % \end{macro} % \begin{macro}{\tag_struct_use_num:n} % This command allows to use a stashed structure in another place. % differently to the previous command it doesn't use a label but directly % a structure number to find the parent. % TODO: decide how it should be guarded. Probably by the struct-check. % \begin{macrocode} %\cs_new_protected:Npn \tag_struct_use_num:n #1 {} %<*package|debug> \cs_set_protected:Npn \tag_struct_use_num:n #1 %#1 is structure number { \@@_check_if_active_struct:T { \prop_if_exist:cTF { g_@@_struct_#1_prop } % { \prop_get:cnNT {g_@@_struct_#1_prop} {P} \l_@@_tmpa_tl { \msg_warning:nnn { tag } {struct-used-twice} {#1} } %add the \#1 structure as kid to the current structure (can be the root) \@@_struct_kid_struct_gput_right:ee { \g_@@_struct_stack_current_tl } { #1 } %add the current structure to \#1 as parent \@@_struct_prop_gput:nne { #1 } { P } { \pdf_object_ref_indexed:nn { @@/struct }{ \g_@@_struct_stack_current_tl } } % \prop_gput:cne % { g_@@_struct_debug_#1_prop } % { P } % { % parent~structure:~\g_@@_struct_stack_current_tl\c_space_tl=~ % \g_@@_struct_tag_tl % } % \end{macrocode} % check if the tag is allowed as child. Here we have to retrieve the % tag info for the child, while the data for the parent is in % the global tl-vars: % \begin{macrocode} \@@_struct_get_parentrole:eNN {#1} \l_@@_tmpa_tl \l_@@_tmpb_tl \@@_check_parent_child:VVVVN \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl \l_@@_tmpa_tl \l_@@_tmpb_tl \l_@@_parent_child_check_tl \int_compare:nNnT {\l_@@_parent_child_check_tl}<0 { \cs_set_eq:NN \l_@@_role_remap_tag_tl \g_@@_struct_tag_tl \cs_set_eq:NN \l_@@_role_remap_NS_tl \g_@@_struct_tag_NS_tl \@@_role_remap: \cs_gset_eq:NN \g_@@_struct_tag_tl \l_@@_role_remap_tag_tl \cs_gset_eq:NN \g_@@_struct_tag_NS_tl \l_@@_role_remap_NS_tl \@@_struct_set_tag_info:eVV { \int_use:N \c@g_@@_struct_abs_int } \g_@@_struct_tag_tl \g_@@_struct_tag_NS_tl } } { \msg_warning:nnn{ tag }{struct-label-unknown}{#1} } } } % % \end{macrocode} % \end{macro} % \begin{macro}[EXP]{\tag_struct_object_ref:n} % This is a command that allows to reference a structure. The argument is the % number which can be get for the current structure with |\tag_get:n{struct_num}| % TODO check if it should be in base too. % \begin{macrocode} %<*package> \cs_new:Npn \tag_struct_object_ref:n #1 { \pdf_object_ref_indexed:nn {@@/struct}{ #1 } } \cs_generate_variant:Nn \tag_struct_object_ref:n {e} % \end{macrocode} % % \end{macro} % % \begin{macro}{\tag_struct_gput:nnn} % This is a command that allows to update the data of a structure. % This often can't done simply by replacing the value, as we have to % preserve and extend existing content. We use therefore dedicated functions % adjusted to the key in question. % The first argument is the number of the structure, % the second a keyword referring to a function, % the third the value. Currently the only keyword is \texttt{ref} which updates % the Ref key (an array) % \begin{macrocode} \cs_new_protected:Npn \tag_struct_gput:nnn #1 #2 #3 { \cs_if_exist_use:cF {@@_struct_gput_data_#2:nn} { %warning?? \use_none:nn } {#1}{#3} } \cs_generate_variant:Nn \tag_struct_gput:nnn {ene,nne} % % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_struct_gput_data_ref:nn} % \begin{macrocode} %<*package> \cs_new_protected:Npn \@@_struct_gput_data_ref:nn #1 #2 % #1 receiving struct num, #2 list of object ref { \prop_get:cnN { g_@@_struct_#1_prop } {Ref} \l_@@_get_tmpc_tl \@@_struct_prop_gput:nne { #1 } { Ref } { \quark_if_no_value:NF\l_@@_get_tmpc_tl { \l_@@_get_tmpc_tl\c_space_tl }#2 } } \cs_generate_variant:Nn \@@_struct_gput_data_ref:nn {ee} % \end{macrocode} % \end{macro} % \begin{macro} % { % \tag_struct_insert_annot:nn, % \tag_struct_insert_annot:ee, % \tag_struct_insert_annot:ee % } % \begin{macro}[EXP] % { % \tag_struct_parent_int: % } % This are the user command to insert annotations. They must be used % together to get the numbers right. They use a counter to the % |StructParent| and \cs{tag_struct_insert_annot:nn} increases the % counter given back by \cs{tag_struct_parent_int:}. % % It must be used together with |\tag_struct_parent_int:| to insert an % annotation. % TODO: decide how it should be guarded if tagging is deactivated. % \begin{macrocode} \cs_new_protected:Npn \tag_struct_insert_annot:nn #1 #2 %#1 should be an object reference %#2 struct parent num { \@@_check_if_active_struct:T { \@@_struct_insert_annot:nn {#1}{#2} } } \cs_generate_variant:Nn \tag_struct_insert_annot:nn {xx,ee} \cs_new:Npn \tag_struct_parent_int: {\int_use:c { c@g_@@_parenttree_obj_int }} % % \end{macrocode} % \end{macro} % \end{macro} % \section{Attributes and attribute classes} % \begin{macrocode} %<*header> \ProvidesExplPackage {tagpdf-attr-code} {2024-04-12} {0.99b} {part of tagpdf - code related to attributes and attribute classes} % % \end{macrocode} % \subsection{Variables} % \begin{variable} % { % ,\g_@@_attr_entries_prop % ,\g_@@_attr_class_used_prop % ,\g_@@_attr_objref_prop % ,\l_@@_attr_value_tl % } % |\g_@@_attr_entries_prop| will store attribute names and their dictionary content.\\ % |\g_@@_attr_class_used_prop| will hold the attributes which have been used as % class name. % |\l_@@_attr_value_tl| is used to build the attribute array or key. % Every time an attribute is used for the first time, and object is created % with its content, the name-object reference relation is stored in % |\g_@@_attr_objref_prop| % \begin{macrocode} %<*package> \prop_new:N \g_@@_attr_entries_prop \prop_new_linked:N \g_@@_attr_class_used_prop \tl_new:N \l_@@_attr_value_tl \prop_new:N \g_@@_attr_objref_prop %will contain obj num of used attributes % \end{macrocode} % This seq is currently kept for compability with the table code. % \begin{macrocode} \seq_new:N\g_@@_attr_class_used_seq % \end{macrocode} % \end{variable} % \subsection{Commands and keys} % \begin{macro}{\@@_attr_new_entry:nn,role/new-attribute (setup-key), newattribute (deprecated)} % This allows to define attributes. Defined attributes % are stored in a global property. |role/new-attribute| expects % two brace group, the name and the content. The content typically % needs an |/O| key for the owner. An example look like % this. % % TODO: consider to put them directly in the ClassMap, that is perhaps % more effective. % \begin{verbatim} % \tagpdfsetup % { % role/new-attribute = % {TH-col}{/O /Table /Scope /Column}, % role/new-attribute = % {TH-row}{/O /Table /Scope /Row}, % } % \end{verbatim} % \begin{macrocode} \cs_new_protected:Npn \@@_attr_new_entry:nn #1 #2 %#1:name, #2: content { \prop_gput:Nen \g_@@_attr_entries_prop {\pdf_name_from_unicode_e:n{#1}}{#2} } \cs_generate_variant:Nn \__tag_attr_new_entry:nn {ee} \keys_define:nn { @@ / setup } { role/new-attribute .code:n = { \@@_attr_new_entry:nn #1 } % \end{macrocode} % deprecated name % \begin{macrocode} ,newattribute .code:n = { \@@_attr_new_entry:nn #1 }, } % \end{macrocode} % \end{macro} % % \begin{macro}{attribute-class (struct-key)} % attribute-class has to store the used attribute names so that % they can be added to the ClassMap later. % \begin{macrocode} \keys_define:nn { @@ / struct } { attribute-class .code:n = { \clist_set:Ne \l_@@_tmpa_clist { #1 } \seq_set_from_clist:NN \l_@@_tmpb_seq \l_@@_tmpa_clist % \end{macrocode} % we convert the names into pdf names with slash % \begin{macrocode} \seq_set_map_e:NNn \l_@@_tmpa_seq \l_@@_tmpb_seq { \pdf_name_from_unicode_e:n {##1} } \seq_map_inline:Nn \l_@@_tmpa_seq { \prop_if_in:NnF \g_@@_attr_entries_prop {##1} { \msg_error:nnn { tag } { attr-unknown } { ##1 } } \prop_gput:Nnn\g_@@_attr_class_used_prop { ##1} {} } \tl_set:Ne \l_@@_tmpa_tl { \int_compare:nT { \seq_count:N \l_@@_tmpa_seq > 1 }{[} \seq_use:Nn \l_@@_tmpa_seq { \c_space_tl } \int_compare:nT { \seq_count:N \l_@@_tmpa_seq > 1 }{]} } \int_compare:nT { \seq_count:N \l_@@_tmpa_seq > 0 } { \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { C } { \l_@@_tmpa_tl } %\prop_show:c { g_@@_struct_\int_eval:n {\c@g_@@_struct_abs_int}_prop } } } } % \end{macrocode} % \end{macro} % \begin{macro}{attribute (struct-key)} % \begin{macrocode} \keys_define:nn { @@ / struct } { attribute .code:n = % A property (attribute, value currently a dictionary) { \clist_set:Ne \l_@@_tmpa_clist { #1 } \clist_if_empty:NF \l_@@_tmpa_clist { \seq_set_from_clist:NN \l_@@_tmpb_seq \l_@@_tmpa_clist % \end{macrocode} % we convert the names into pdf names with slash % \begin{macrocode} \seq_set_map_e:NNn \l_@@_tmpa_seq \l_@@_tmpb_seq { \pdf_name_from_unicode_e:n {##1} } \tl_set:Ne \l_@@_attr_value_tl { \int_compare:nT { \seq_count:N \l_@@_tmpa_seq > 1 }{[}%] } \seq_map_inline:Nn \l_@@_tmpa_seq { \prop_if_in:NnF \g_@@_attr_entries_prop {##1} { \msg_error:nnn { tag } { attr-unknown } { ##1 } } \prop_if_in:NnF \g_@@_attr_objref_prop {##1} {%\prop_show:N \g_@@_attr_entries_prop \pdf_object_unnamed_write:ne { dict } { \prop_item:Nn\g_@@_attr_entries_prop {##1} } \prop_gput:Nne \g_@@_attr_objref_prop {##1} {\pdf_object_ref_last:} } \tl_put_right:Ne \l_@@_attr_value_tl { \c_space_tl \prop_item:Nn \g_@@_attr_objref_prop {##1} } % \tl_show:N \l_@@_attr_value_tl } \tl_put_right:Ne \l_@@_attr_value_tl { %[ \int_compare:nT { \seq_count:N \l_@@_tmpa_seq > 1 }{]}% } % \tl_show:N \l_@@_attr_value_tl \@@_struct_prop_gput:nne { \int_use:N \c@g_@@_struct_abs_int } { A } { \l_@@_attr_value_tl } } }, } % % \end{macrocode} % \end{macro} % \end{implementation}