% \iffalse meta-comment % %% Copyright (C) 2020--2021 by Marcel Krueger %% %% This file may be distributed and/or modified under the %% conditions of the LaTeX Project Public License, either %% version 1.3 of this license or (at your option) any later %% version. The latest version of this license is in: %% %% http://www.latex-project.org/lppl.txt %% %% and version 1.3 or later is part of all distributions of %% LaTeX version 2005/12/01 or later. % %<*batch> %<*gobble> \ifx\jobname\relax\let\documentclass\undefined\fi \ifx\documentclass\undefined \csname fi\endcsname % \input docstrip.tex \keepsilent \generate{% \file{inputnormalization.sty}{\from{inputnormalization.dtx}{package}} \file{inputnormalization.tex}{\from{inputnormalization.dtx}{tex-package}}} \endbatchfile % %<*gobble> \fi \expandafter\ifx\csname @currname\endcsname\empty \csname fi\endcsname % %<*driver> \RequirePackage{inputnormalization} \documentclass{article} \usepackage{csquotes,doc,metalogo,hyperref,luacolor,tikzducks,pict2e} \RecordChanges \MakeShortVerb\| \begin{document} \DocInput{inputnormalization.dtx} \PrintIndex \PrintChanges \end{document} % %<*gobble> \fi % % \fi % % \GetFileInfo{inputnormalization.sty} % \title{The inputnormalization package\thanks{This document % corresponds to inputnormalization~\fileversion, dated~\filedate.}} % \author{Marcel Kr\"uger \\ % \href{mailto:tex@2krueger.de}{tex@2krueger.de}} % % \maketitle % Add support for normalising input files for LuaTeX and provide a common interface for \LuaTeX\ and \XeTeX. % % \section{Motivation} % Modern \TeX\ engines like \XeTeX\ or \LuaTeX\ natively accept Unicode input. % Unicode is a rather special encoding since many characters can be encoded in % different ways which are officially considered equivalent. This can sometimes % lead to surprising behavior since many parts of \TeX\ are not aware of this % equivalences and therefore treats different encodings as different strings. % This can show itself during rendering when some the same text might appear % in different ways depending on the input, but it might also show itself in % macro or option names: When e.g.\ non-English macro names are in use, the % different encodings of the same name can name different macros, leading to % hard to understand and solve errors. % % Unicode defines a mechanism to solve such issues: It defines the normalization % forms NFC and NFD. When text is normalized to one of these forms, then two % equivalent strings are always encoded in the same way, leading to unique names % and consistent rendering. % % This package provides a uniform way to enable input normalization to either % of these forms in both \XeTeX\ and \LuaTeX. % % \section{Usage} % Just loading the package is enough to enable NFC normalization. This is the % right option for almost all users: % % \begin{verbatim} % \documentclass{article} % \usepackage{inputnormalization} % \begin{document} % Everything here gets normalized before it's processed by \TeX. % \end{document} % \end{verbatim} % % If you are a plain \LuaTeX/\XeTeX\ user, you can use % % \begin{verbatim} % \input inputnormalization % Everything here gets normalized before it's processed by \TeX. % \bye % \end{verbatim} % instead. % % \section{Advanced usage} % In addition to enabling NFC normalization by default, the package makes % \verb|\Uinputnormalization| available as a cross engine version of % \verb|\XeTeXinputnormalization| to make the normalization controllable. % See the \XeTeX\ documentation for detailed usage. E.g.\ you could write % \begin{verbatim} % \documentclass{article} % \usepackage{inputnormalization} % \begin{document} % Everything here gets normalized to NFC before it's processed by \TeX. % % \Uinputnormalization=0 % Now normalization is disabled. % % \Uinputnormalization=2 % Here we normalize to NFD instead. % \end{document} % \end{verbatim} % % \paragraph{Warning:} It is almost never a good idea to use different kinds % of normalization in the same document, therefore you should set one kind of % normalization directly after loading the package and not modify it afterwards. % % Additionally NFC works much better in a \TeX\ context than NFD, so you should % not set this at all unless you know exactly what you are doing. % % \StopEventually{} % \section{The implementation} % \changes{0.0.1}{2020-05-17}{Initial release} % \iffalse %<*package|tex-package> % \fi % \begin{macrocode} %<*package> \NeedsTeXFormat{LaTeX2e} \ProvidesPackage {inputnormalization} [2021/07/05 v0.2 Unicode input normalization] % % \end{macrocode} % Only \LuaTeX\ and \XeTeX\ are supported. % For other engines we show an error. % \begin{macrocode} \ifx\directlua\undefined \ifx\XeTeXinputnormalization\undefined % \end{macrocode} % \iffalse %<*gobble> \iffalse % % \fi \fi % \begin{macrocode} %<*tex-package> \begingroup \ifx\PackageError\undefined \def\PackageError#1#2#3{\errhelp{#3}\errmessage{#1: #2}} \fi % % \end{macrocode} % \iffalse \iffalse %<*gobble> \fi % % \fi % \begin{macrocode} \PackageError{inputnormalization}{LuaTeX or XeTeX required}% {inputnormalization requires LuaTeX or XeTeX. Maybe you forgot to switch the engine in your editor?} % \end{macrocode} % \iffalse %<*gobble> \iffalse % % \fi \fi % \begin{macrocode} %<*tex-package> \endgroup % % \end{macrocode} % \iffalse \iffalse %<*gobble> \fi % % \fi % \begin{macrocode} \else % \end{macrocode} % First deal with \XeTeX: Define |\Uinputnormalization| as an alias for |\XeTeXinputnormalization|. % Make sure that \texttt{ltluatex} is loaded. % \begin{macrocode} \let\Uinputnormalization\XeTeXinputnormalization \fi \else % \end{macrocode} % In \LuaTeX\ we emulate |\Uinputnormalization| using a |process_input_buffer| callback. % First ensure that |ltluatex| is loaded to have proper callback handling: % \begin{macrocode} \ifx\newluafunction\@undefined \input ltluatex \fi % \end{macrocode} % We need a integer register to control the normalization and then the actual implementation of the callback. % Nothing particularly interesting is happening here, the actual normalization is handled by \texttt{lua-uni-algos}. % \begin{macrocode} \newcount\Uinputnormalization \directlua{ local getcount = tex.getcount local function ident(buf) return buf end local uni_normalize = require'lua-uni-normalize' local normalize = {[0] = ident, uni_normalize.NFC, uni_normalize.NFD} luatexbase.add_to_callback('process_input_buffer', function(buf) return normalize[getcount(\the\allocationnumber)](buf) end, 'inputnormalization') } \fi % \end{macrocode} % \changes{0.2}{2021-07-05}{Enable NFC by default} % Finally we enable NFC normalization as a reasonable default: % \begin{macrocode} \Uinputnormalization=1 \endinput % \end{macrocode} % \iffalse % % \fi % \Finale