% \iffalse %% Source File: l3sys-query.dtx %% Copyright (C) 2024 %% %% The LaTeX Project and any individual authors listed elsewhere %% in this file. %% %% This file may be distributed under the terms of the LPPL. %% See README for details. % %<*dtx> \ProvidesFile{l3sys-query.dtx} % %\ProvidesPackage{l3sys-query} % \ProvidesFile{l3sys-query.drv} % \fi % \ProvidesFile{l3sys-query.dtx} [2024-03-13 v1.0a LaTeX2e interface for l3sys file queries] % % \iffalse %<*driver> \documentclass{l3doc}%{ltxdoc} \begin{document} \DocInput{l3sys-query.dtx} \end{document} % % \fi % % \GetFileInfo{l3sys-query.dtx} % % \title{\LaTeX\ document interface to the \pkg{l3sys-query} script: % System queries for LaTeX using Lua\thanks{This file % has version number \fileversion, last % revised \filedate.}} % \author{\LaTeX\ project} % \date{\filedate} % % % \maketitle % % \tableofcontents % % \section{Introduction} % % \TeX{} engines provide only very limited access to information about the system % they are used on: using primitives, one can for example get the size of a single % file, but not a list of files in a given location. For most documents, this is % not an issue as they are self-contained. However, for cases where % \enquote{dynamic} construction of parts of a document is needed based on file % lists or other system-dependent data, methods to obtain this from (restricted) % shell escape are desirable. % % Security considerations mean that directly querying the system shell is % problematic for general use. Instead, \emph{restricted} shell escape may be used % to get many details, provided a suitable tool is available to provide the % information in a platform-neutral and security-conscious way. The Java program % \texttt{texosquery}, written by Nicola Talbot, has been available for a number % of years to provide this facility. As well as file system insight, % \texttt{texosquery} also provides for example locale data and other system % information. However, the requirement for Java means that the script is not % automatically usable when a \TeX{} system is installed. % % The \LaTeX{} team have therefore provided a Lua-based script, % \texttt{l3sys-query}, which conforms to the security requirements of \TeX{} Live % using Lua to obtain the system information. This means that it can be used % \enquote{out of the box} across platforms. The facilities provided by % \texttt{l3sys-query} are more limited than \texttt{texosquery}, partly as some % information is available in modern \TeX{} systems using primitives, and partly % as the aim of \texttt{l3sys-query} is to provide information where there are % defined use cases. Requests for additional data interfaces are welcome. % % This package provides a document level \textit{Key-Value} interface % to The \texttt{l3sys-query} functionality, although as a convenience % it also summarizes the documentation for the L3 programming % interface layer (prvided by the \pkg{l3sys} module of % \pkg{l3kernel}, and of the command line Lua script % \texttt{l3sys-query} which is distributed separately and provides % the underlying functionality. % % % % \section{The document level key-value interface\label{sec:kv}} % % This provides an interface to \texttt{l3sys-query} based on % \pkg{l3keys} that allows the options to be specified and checked % individually. Internally the supplied key values are used to build % up the arguments to the commands described above in the L3 % programming layer commands described in section~\ref{sec:expl3} % % Results containing path separators \emph{always} % use~|/|, irrespective of the platform in use. % % % \begin{function} % {\QueryWorkingDirectory} % \begin{syntax} % \cs{QueryWorkingDirectory} \Arg{result cmd} % \end{syntax} % Defines supplied command \meta{result cmd} to hold % the absolute path to the current working directory of the % \TeX\ system. % This is the directory (folder) from which \TeX\ was started, so usually % the location of the main document file. % \end{function} % % \begin{function} % {\QueryFiles, \QueryFilesTF} % \begin{syntax} % \cs{QueryFiles} \oarg{options} \Arg{spec} \Arg{function} % \cs{QueryFilesTF} \oarg{options} \Arg{spec} \Arg{function} \Arg{pre code} \Arg{empty list code} % \end{syntax} % This generates a file list based on the \meta{spec} and \meta{options}. % The command then applies \meta{function} to each item in the sequence of filenames. % The \meta{function} should be a macro body which will be passed the file path as |#1|. % % The TF version executes the T (\meta{pre code}) argument before iterating over the list % and the F (\meta{empty list code}) argument if the list is empty. % \end{function} % % Note that this interface in mapping directly over the sequence of % filenames does not allow some uses which are provided by the % programming interface described in the following section, which allows % the sequence to be manipulated before being used. % % The defined keys map vey closely to the options of the % \texttt{l3sys-query} command which is described in % section~\ref{sec:lua}. % % \begin{itemize} % \item % The keys \texttt{recursive}, \texttt{ignore-case}, \texttt{reverse}, \texttt{pattern} % take no values and map directly to the command line options of the same name. % % \item % The key \texttt{sort} accepts the values \texttt{date} and \texttt{name}. % % \item % The key \texttt{type} accepts \texttt{d} or \texttt{f}. % % \item % The key \texttt{exclude} accepts a glob (or Lua pattern) mathing % files to be excluded. The package arranges that the quoting of the % argument is automatically added if unrestricted shell escape is % enabled. % % \end{itemize} % % Within the main \meta{spec} argument, and the value of the \texttt{exclude} key, % the following characters may need special handling. % |~| may be used (which \texttt{kpathsea} uses to denote the users home directory). % |\%|, or at the top level |%|, may be used to produce a literal |%| % which may be especially useful if the \texttt{pattern} key is used % as \% is the escape character in Lua patterns. % % \subsection{Examples\label{sec:examples2e}} % % \begin{itemize} % \item % Include every |png| file in the current directory. % \begin{verbatim} % \QueryFiles{*.png}{\includegraphics{#1}\par} % \end{verbatim} % % \item % Input every \TeX\ file with a filename matching \texttt{chapter[0-9].tex}. % \begin{verbatim} % \QueryFiles[pattern]{chapter%d.*%.tex}{\input{#1}} % \end{verbatim} % \end{itemize} % % \section{The L3 programming layer interface\label{sec:expl3}} % % \begin{function} % {\sys_get_query:nN, \sys_get_query:nnN, \sys_get_query:nnnN} % \begin{syntax} % \cs{sys_get_query:nN} \Arg{cmd} \Arg{tl var} % \cs{sys_get_query:nnN} \Arg{cmd} \Arg{spec} \Arg{tl var} % \cs{sys_get_query:nnnN} \Arg{cmd} \Arg{options} \Arg{spec} \Arg{tl var} % \end{syntax} % Sets the \meta{tl var} to the information returned by the % \texttt{l3sys-query} \meta{cmd}, potentially supplying the \meta{options} % and \meta{spec} to the query call. The valid \meta{cmd} names are at present % \begin{itemize} % \item \texttt{pwd} Returns the absolute path to the current working directory % \item \texttt{ls} Returns a directory listing, using the \meta{spec} to % select files and applying the \meta{options} if given % \end{itemize} % The \meta{spec} should be a file glob and will automatically be passed to % the script without shell expansion. % \end{function} % % \begin{function} % {\sys_split_query:nN, \sys_split_query:nnN, \sys_split_query:nnnN} % \begin{syntax} % \cs{sys_split_query:nN} \Arg{cmd} \Arg{seq} % \cs{sys_split_query:nnN} \Arg{cmd} \Arg{spec} \Arg{seq} % \cs{sys_split_query:nnnN} \Arg{cmd} \Arg{options} \Arg{spec} \Arg{seq} % \end{syntax} % Works as described for \cs{sys_get_query:nnnN}, but sets the \meta{seq} % to contain one entry for each line returned by \texttt{l3sys-query}. % \end{function} % % \section{The \texttt{l3sys-query} Lua script\label{sec:lua}} % \subsection{The command line interface} % % The command line interface to % \begin{center} % \ttfamily % l3sys-query \meta{cmd} [\meta{option(s)}] [\meta{args}] % \end{center} % where \texttt{\meta{cmd}} can be one of the following: % \begin{itemize}[noitemsep]\ttfamily % \item ls % \item ls \meta{args} % \item pwd % \end{itemize} % The \meta{cmd} are described below. The result of the \meta{cmd} will be printed % to the terminal in an interactive run; in normal usage, this will be piped to % the calling \TeX{} process. Results containing path separators \emph{always} % use~|/|, irrespective of the platform in use. % % As well as these targets, the script recognizes the options % \begin{itemize} % \item |--exclude| Specification for directory entries to exclude % \item |--ignore-case| Ignores case when sorting directory listings % \item |--pattern| (|-p|) Treat the \meta{args} as Lua patterns rather % than converting from wildcards % \item |--recursive| (|-r|) Enables recursive searching during directory % listings % \item |--reverse| Causes sorting to go from highest to lowest rather % than lowest to highest % \item |--sort| Sets the method used to sort entries returned by |ls| % \item |--type| Selects the type of entry returned by |ls| % \end{itemize} % The action of these options on the appropriate \meta{cmd(s)} is detailed below. % % \subsubsection{\texttt{ls [\meta{args}]}} % % Lists the contents of one or more directories, in a manner somewhat reminiscent % of the Unix command |ls| or the Windows command |dir|. The exact nature of the % output will depend on the \meta{args}, if given, along with the prevailing % options. Note that the options names are inspired by ideas from the Unix % commands |ls| and |find| as well as the Windows command |dir|: they therefore do % not map directly to those of any one of the command line tools that they % somewhat mirror. % % When no \meta{args} are given, all entries in the current directory will be % listed, one per line in the output. This will include both files and % subdirectories. Each entry will include a path relative to the current % directory: for files \emph{in} the current directory, this will be |./|. The % order of results will be determined by the underlying operating system process: % unless requested \emph{via} an option, no sorting takes place. % % As standard, the \meta{args} are treated as a file/path name potentially % including |?| and |*| as wildcards, for example |*.png| or |file?.txt|. % \begin{verbatim} % l3sys-query ls '*.png' % \end{verbatim} % Some care is needed in preventing expansion of such wildcards by the shell or % \texttt{texlua} process: these are detailed in Section~\ref{sec:wildcard}. In % this section, |'| is used to indicate a character being used to suppress % expansion: this is for example normal on macOS and Linux. % % Removal of entries from the listing can be achieved using the |--exclude| % option, which should be given with a \meta{xarg}, for example % \begin{verbatim} % l3sys-query ls --exclude '*.bak' 'graphics/*' % \end{verbatim} % Directory entries starting |.| are traditionally hidden on Linux and macOS % systems: these \enquote{dot} entries are excluded from the output of % \texttt{l3sys-query}. The entries |.| and |..| for the current and parent % directory are also excluded from the results returned by \texttt{l3sys-query} as % they can always be assumed. % % For more complex matching, the \meta{args} can be treated as a Lua pattern using % the |--pattern| (|-p|) option; this also applies to the \meta{xarg} argument to % the |--exclude| option. For example, the equivalent to wildcard |*.png| could be % obtained using % \begin{verbatim} % l3sys-query ls --pattern '^.*%.png$' % \end{verbatim} % % The results returned by |ls| can be sorted using the |--sort| option. This can % be set to |none| (use the order from the file system: the default), |name| (sort % by file name) or |date| (sort by date last modified). The sorting order can be % reversed using |--reverse|. Sorting normally takes account of case: this can be % suppressed with the |--ignore-case| option. % % The listing can be filtered based on the type of entry using the |--type| % option. This takes a string argument, one of |d| (directory) or |f| (file). % % As standard, only the path specified as part of the \meta{args} is queried. % However, if the |--recursive| (|-r|) option is set, the query is applied within % all subdirectories. Subdirectories starting with~|.| (macOS and Linux hidden) % are excluded from recursion. % % For security reasons, only paths within the current working directory can be % queried, thus for example |graphics/*.png| will list all |png| files in the % |graphics| subdirectory, but |../graphics/*.png| will yield no output. % % \subsubsection{\texttt{pwd}} % % Returns the current working directory from which \texttt{l3sys-query} is run. % From within a \TeX{} run, this will (usually) be the directory containing the % main file, assuming a command such as % \begin{verbatim} % pdflatex main.tex % \end{verbatim} % The \texttt{pwd} command is unaffected by any options. % % \subsection{Spaces in arguments} % % Since \texttt{l3sys-query} is intended primarily for use with restricted shell % escape calls from \TeX{} processes, handling of spaces is unusual. It is not % possible to quote spaces in such a call, so for example whilst % \begin{verbatim} % l3sys-query ls "foo *" % \end{verbatim} % does work from the command prompt to find all files with names starting % \verb*|foo |, it would not work \emph{via} restricted shell escape. To % circumvent this, \texttt{l3sys-query} will collect all command line arguments % after any \meta{options}, and combine these as a space-separated \meta{args}, % for example allowing % \begin{verbatim} % l3sys-query ls foo '*' % \end{verbatim} % to achieve the same result as the first example. The result is that the % \meta{args} will only every be interpreted by \texttt{l3sys-query} as a single % argument. It also means that spaces cannot be used at the start or end of the % argument, nor can multiple spaces appear between non-space arguments. % % \subsection{Wildcard expansion handling\label{sec:wildcard}} % % The handling of wildcards needs some further comment for those using % \texttt{l3sys-query} from the command line: the \pkg{expl3} interface described % in Section~\ref{sec:expl3} handles this aspect automatically for the user. % % On macOS and Linux, the shell normally expands globs, which include the % wildcards |*| and |?|, before passing arguments to the appropriate command. % This can be suppressed by surrounding the argument with |'| characters, hence % the formulation % \begin{verbatim} % l3sys-query ls '*.png' % \end{verbatim} % earlier. % % On Windows, the shell does no expansion, and thus arguments are passed as-is to % the relevant command. As such, |'| has no special meaning here. However, to % allow quoting of wildcards from the shell in a platform-neutral manner, % \texttt{l3sys-query} will strip exactly one set of |'| characters around each % argument before further processing. % % It is not possible to use |"| quotes at all in the argument passed to % \texttt{l3sys-query} from \TeX{}, as the \TeX{} system removes all |"| in % |\input| while handling space quoting. % % Restricted shell escape prevents shell expansion of wildcards entirely. On % non-Windows systems, it does this by ensuring that each argument is |'| quoted % to ensure further expansion. Thus a \TeX{} call such as % \begin{verbatim} % \input|"l3sys-query ls '*.png'" % \end{verbatim} % will work if |--shell-escape| is used as the argument is passed directly to the % shell, but in restricted shell escape will give an error such as: % \begin{verbatim} % ! I can't find file `"|l3sys-query ls '*.png'"'. % \end{verbatim} % The \LaTeX{} interfaces described above adjust the quoting used depending on the % |shell-escape| status. % % % \MaybeStop{} % % \section{The \LaTeXe\ package implementation} % % \begin{macrocode} %<*package> % \end{macrocode} % % \begin{macrocode} \ExplSyntaxOn %<@@=queryfiles> % \end{macrocode} % % The package should eventually work in restricted shell escape but will do % nothing useful if the process was started with |--no-shell-escape|. % \begin{macrocode} \sys_if_shell:F{ \PackageWarningNoLine{l3sys-query} {Shell ~Escape ~is ~disabled.\MessageBreak All ~queries ~will ~return ~empty ~results} } % \end{macrocode} % % \subsection{\cs{QueryWorkingDirectory}} % |\QueryWorkingDirectory| is a direct call to the |pwd| command provided by \texttt{l3sys-query}. % \begin{macrocode} \NewDocumentCommand\QueryWorkingDirectory {m} { \sys_get_query:nN {pwd} #1 } % \end{macrocode} % % \subsection{\cs{QueryFiles} and \cs{QueryFilesTF}} % % The declarations of these commands are done in two steps to allow catcode chanes % before the arguments are read. This allows the use of |%| and |^^| in patterns % at least if the command is not nested in another command argument. (|\%| may be used % to generate |%| in all cases). % % Variables for saving the current definition of |\%| and |~|. % \begin{macrocode} \tl_new:N\l_query_percent_tl \tl_new:N\l_query_tilde_tl % \end{macrocode} % % Allow |%| and |^^| at the top level. % \begin{macrocode} \NewDocumentCommand\QueryFiles {} { \group_begin: \char_set_catcode_other:N \% \char_set_catcode_other:N \^ \QueryFiles_inner } % \end{macrocode} % % \begin{macrocode} \char_set_catcode_active:N \~ % \end{macrocode} % % \begin{macrocode} \NewDocumentCommand\QueryFiles_inner {O{}m}{ \group_end: \tl_set:Nn\l_tmpa_tl{} \cs_set_eq:NN \l_query_percent_tl \% \cs_set_eq:NN \% \c_percent_str \cs_set_eq:NN \l_query_tilde_tl ~ \cs_set_eq:NN ~ \c_tilde_str \keys_set:nn{QueryFiles}{#1} \exp_args:NnV\sys_split_query:nnnN {ls} \l_tmpa_tl {#2} \l_tmpa_seq \cs_set_eq:NN \% \l_query_percent_tl \cs_set_eq:NN ~ \l_query_tilde_tl \seq_map_inline:Nn\l_tmpa_seq } % \end{macrocode} % % This duplicates rather than shares code so as to read the function % and TF arguments with normal catcode regime. % (This could probably be optimised.) % \begin{macrocode} \NewDocumentCommand\QueryFilesTF {} { \group_begin: \char_set_catcode_other:N \% \QueryFilesTF_inner } % \end{macrocode} % % \begin{macrocode} \NewDocumentCommand\QueryFilesTF_inner {O{}m}{ \group_end: \tl_set:Nn\l_tmpa_tl{} \cs_set_eq:NN \l_query_percent_tl \% \cs_set_eq:NN \% \c_percent_str \cs_set_eq:NN \l_query_tilde_tl ~ \cs_set_eq:NN ~ \c_tilde_str \keys_set:nn{QueryFiles}{#1} \exp_args:NnV\sys_split_query:nnnN {ls} \l_tmpa_tl {#2} \l_tmpa_seq \cs_set_eq:NN \% \l_query_percent_tl \cs_set_eq:NN ~ \l_query_tilde_tl \seq_if_empty:NTF \l_tmpa_seq \use_iii:nnn \__queryfiles_aux:nnn } % \end{macrocode} % % \begin{macrocode} \char_set_catcode_space:N \~ % \end{macrocode} % % \begin{macrocode} \cs_new:Npn \__queryfiles_aux:nnn #1#2#3 { #2 \seq_map_inline:Nn\l_tmpa_seq {#1} } % \end{macrocode} % % Defining the keys. Most take no value and simply add a \texttt{l3sys-query} \texttt{-{}-} % option to the command linebeing constructed. % \begin{macrocode} \keys_define:nn {QueryFiles} { % \end{macrocode} % % \begin{macrocode} recursive .code:n =\tl_put_right:Nn \l_tmpa_tl {--recursive ~ } , recursive .value_forbidden:n = true , % \end{macrocode} % % \begin{macrocode} ignore-case .code:n =\tl_put_right:Nn \l_tmpa_tl {--ignore-case ~ } , ignore-case .value_forbidden:n = true , % \end{macrocode} % % \begin{macrocode} reverse .code:n =\tl_put_right:Nn \l_tmpa_tl {--reverse ~ } , reverse .value_forbidden:n = true , % \end{macrocode} % % \begin{macrocode} exclude .code:n =\tl_put_right:Ne \l_tmpa_tl { --exclude ~ \sys_if_shell_restricted:F' \exp_not:n{#1} \sys_if_shell_restricted:F' ~ } , exclude .value_required:n = true , % \end{macrocode} % % The |type| key checks the supplied value is valid |d| for directories % or |f| for files The default behaviour lists both. % \begin{macrocode} type .choices:nn = {d,f} {\tl_put_right:Nn \l_tmpa_tl {--type ~ #1 ~ }} , % \end{macrocode} % % The |sort| key checks the supplied value is valid |date| or |name|. % \begin{macrocode} sort .choices:nn = {date,name} {\tl_put_right:Nn \l_tmpa_tl {--sort ~ #1 ~ }} , % \end{macrocode} % % \begin{macrocode} pattern .code:n =\tl_put_right:Nn \l_tmpa_tl {--pattern ~ } , pattern .value_forbidden:n = true , % \end{macrocode} % % \begin{macrocode} } % \end{macrocode} % % \begin{macrocode} \ExplSyntaxOff % \end{macrocode} % % \begin{macrocode} % % \end{macrocode} % % \Finale %