RANDOM_SEED = math.randomseed(os.time()) MATRIX_AND = {{0,0},{0,1}} MATRIX_OR = {{0,1},{1,1}} HEXES = '0123456789abcdef' local SciKGTeX = {} SciKGTeX.whole_string = "" SciKGTeX.properties_used = {} SciKGTeX.property_commands = {} SciKGTeX.mandatory_properties = { 'researchproblem', 'objective', 'method', 'result', 'conclusion' } SciKGTeX.PRODUCE_XMP_FILE = true SciKGTeX.WARNING_LEVEL = 1 local XMP = {} XMP.lines = {} XMP.namespaces = {} XMP.property_ns = {} XMP.XMP_TOP = [[]] XMP.XMP_BOTTOM = [[]] XMP.PACKET_END = [[]] local UUID = {} ---------------------------- utilities ------------------------------- -- performs the bitwise operation specified by truth matrix on two numbers. function BITWISE(x, y, matrix) local z = 0 local pow = 1 while x > 0 or y > 0 do z = z + (matrix[x%2+1][y%2+1] * pow) pow = pow * 2 x = math.floor(x/2) y = math.floor(y/2) end return z end function INT2HEX(x) local s,base,pow = '',16,0 local d while x > 0 do d = x % base + 1 x = math.floor(x/base) s = string.sub(HEXES, d, d)..s end if #s == 1 then s = "0" .. s end return s end function get_output_dir() if arg ~= nil then for k,v in ipairs(arg) do val, is_output_argument = v:gsub('%-%-output%-directory=(.*)','%1') if is_output_argument > 0 then return val end end return nil end return nil end function read_header_of_file(path) local fh = io.open(path, "rb") if fh then local first_line = assert(fh:read()) fh:close() return first_line else print ("No xmp metadata file found!") return nil end end function extract_uuid_from_header(header) return header:gsub('.*id=\"(.-)\".*','%1') end function generate_UUID() UUID:initialize('00:0c:29:69:41:c6') return UUID:toString() end function string:split(sep) if sep == nil then sep = "%s" end local t = {} for str in self:gmatch("([^"..sep.."]+)") do table.insert(t, str) end return t end function spaces_to_underscores(s) return s:gsub('%s+','_') end function remove_environments(s) s,c = s:gsub('\\begin%s*{.-}{.-}%s*','') s,c = s:gsub('\\begin%s*{.-}%s*','') s,c = s:gsub('\\end%s*{.-}%s*','') return s end function remove_any_latex_command(s) s, c = s:gsub('\\%w+%s*%[%d*%]%s*{(.*)}','%1') if c > 0 then return remove_latex_commands(s) end s, c = s:gsub('\\%w+%s*{(.*)}','%1') if c > 0 then return remove_latex_commands(s) end s, c = s:gsub('\\%w+%s*','') if c > 0 then return remove_latex_commands(s) end return s end function find_last_occurence(s, repls) occurences = {} for pattern, repl in pairs(repls) do i, j = s:find(pattern) if i ~= nil then table.insert(occurences, {i,j,pattern}) end end table.sort(occurences, function(l, r) return l[1]>r[1] end) if #occurences > 0 then return occurences[1] else return nil end end function exhaustively_replace_last_occurence_of_pattern(s, repls) last_occurence = find_last_occurence(s, repls) if last_occurence ~= nil then starts, ends, pattern = table.unpack(last_occurence) to_replace = s:sub(starts,ends) else return s end new_string = s:sub(0,starts-1) .. to_replace:gsub(pattern, repls[pattern], 1) .. s:sub(ends+1) return exhaustively_replace_last_occurence_of_pattern(new_string, repls) end function remove_latex_commands(s) replacements = { -- contribution with * and [] ['\\contribution%s*%*%s*%[%d*%]%s*{.-}{.*}%s*'] = '', -- contribution with * ['\\contribution%s*%*%s*{.-}%s*{.*}%s*'] = '', -- contribution with [] ['\\contribution%s*%[%d*%]%s*{.-}{(.*)}'] = '%1', -- contribution normal ['\\contribution%s*{.-}%s*{(.*)}'] = '%1', ['\\uri%s*{.-}%s*{(.*)}'] = '%1', } for cmd, used in pairs(SciKGTeX.property_commands) do -- [] replacements['\\'.. cmd .. '%s*%[%d*%]%s*{(.*)}'] = '%1' -- normal command replacements['\\'.. cmd .. '%s*{(.*)}'] = '%1' -- with * and [] replacements['\\'.. cmd .. '%s*%*%s*%[%d*%]%s*{.*}%s*'] = '' -- with * replacements['\\'.. cmd .. '%s*%*%s*{.*}%s*'] ='' end s = remove_environments(s) s = exhaustively_replace_last_occurence_of_pattern(s, replacements) s = remove_any_latex_command(s) -- remove escape chars s = s:gsub('\\','') return s end function uri_valid(s) if s:find('http') ~= 1 then return false else return true end end function resolve_entity(s) -- make sure the entity is only resolved at the innermost of nested commands. for _, cmd in ipairs(SciKGTeX.mandatory_properties) do if s:find('\\' .. cmd) then return false end end if s:find('\\contribution') then return false end uri, found = s:gsub('.*\\uri%s*{(.-)}%s*{.*}.*', '%1') if found == 1 then label = s:gsub('.*\\uri%s*{.-}%s*{(.*)}.*', '%1') entity = string.format('%s', uri, label) return entity else uri, found = s:gsub('.*\\uri%s*{(.-)}.*', '%1') if found == 1 then entity = string.format('', uri) return entity else return false end end end ---------------------------- UUID class methods ------------------------------------- -- hwaddr is a string: hexes delimited by colons. e.g.: 00:0c:29:69:41:c6 function UUID:initialize(hwaddr) self._bytes = { math.random(0, 255), math.random(0, 255), math.random(0, 255), math.random(0, 255), math.random(0, 255), math.random(0, 255), math.random(0, 255), math.random(0, 255), math.random(0, 255), math.random(0, 255), -- should come from mac address tonumber(hwaddr:sub(1, 2), 16), tonumber(hwaddr:sub(4, 5), 16), tonumber(hwaddr:sub(7, 8), 16), tonumber(hwaddr:sub(10, 11), 16), tonumber(hwaddr:sub(13, 14), 16), tonumber(hwaddr:sub(16, 17), 16) } -- set the version self._bytes[7] = BITWISE(self._bytes[7], 0x0f, MATRIX_AND) self._bytes[7] = BITWISE(self._bytes[7], 0x40, MATRIX_OR) -- set the variant self._bytes[9] = BITWISE(self._bytes[7], 0x3f, MATRIX_AND) self._bytes[9] = BITWISE(self._bytes[7], 0x80, MATRIX_OR) self._string = nil end -- lazy string creation. function UUID:toString() if self._string == nil then self._string = INT2HEX(self._bytes[1])..INT2HEX(self._bytes[2])..INT2HEX(self._bytes[3])..INT2HEX(self._bytes[4]).."-".. INT2HEX(self._bytes[5])..INT2HEX(self._bytes[6]).."-".. INT2HEX(self._bytes[7])..INT2HEX(self._bytes[8]).."-".. INT2HEX(self._bytes[9])..INT2HEX(self._bytes[10]).."-".. INT2HEX(self._bytes[11])..INT2HEX(self._bytes[12])..INT2HEX(self._bytes[13])..INT2HEX(self._bytes[14])..INT2HEX(self._bytes[15])..INT2HEX(self._bytes[16]) end return self._string end ---------------------------- Main class methods ------------------------------- function SciKGTeX:set_warning_level(wl) self.WARNING_LEVEL = wl end function SciKGTeX:warn(warning_message, ...) if self.WARNING_LEVEL > 0 then texio.write_nl("term and log", [[Package SciKGTeX Warning: ]] .. string.format(warning_message, ...)) texio.write_nl("term and log","\n") end end function SciKGTeX:error(warning_message, ...) tex.error([[Package SciKGTeX Error: ]] .. string.format(warning_message, ...)) end SciKGTeX.command_factory = {} SciKGTeX.command_factory.cmd_top = [[\newcommand{\%s}[2][]{]] SciKGTeX.command_factory.cmd_top_star = [[\WithSuffix\newcommand\%s*[2][]{]] SciKGTeX.command_factory.cmd_top_override = [[\renewcommand{\%s}[2][]{]] SciKGTeX.command_factory.cmd_top_star_override = [[\WithSuffix\renewcommand\%s*[2][]{]] SciKGTeX.command_factory.directlua_part = [[ \directlua{ local content = "\luaescapestring{\unexpanded{#2}}" local belongs_to_contribution = "\luaescapestring{\unexpanded{#1}}" SciKGTeX.XMP:add_annotation(belongs_to_contribution, '%s', content, 'annotation-id') }]] SciKGTeX.command_factory.cmd_bottom = [[}]] SciKGTeX.command_factory.cmd_bottom_star = [[\ignorespaces}]] function SciKGTeX.command_factory:build_command(command_name) full_cmd = self.cmd_top .. "\n" .. self.directlua_part .. "\n #2\n" .. self.cmd_bottom formatted_cmd = string.format(full_cmd, command_name, command_name) for i, line in ipairs(formatted_cmd:split("\n")) do tex.print(line .. "%") end end function SciKGTeX.command_factory:build_star_command(command_name) full_cmd = self.cmd_top_star .. "\n" .. self.directlua_part .. "\n" .. self.cmd_bottom_star formatted_cmd = string.format(full_cmd, command_name, command_name) for i, line in ipairs(formatted_cmd:split("\n")) do tex.print(line .. "%") end end function SciKGTeX.command_factory:override_command(command_name) full_cmd = self.cmd_top_override .. "\n" .. self.directlua_part .. "\n #2\n" .. self.cmd_bottom formatted_cmd = string.format(full_cmd, command_name, command_name) for i, line in ipairs(formatted_cmd:split("\n")) do tex.print(line .. "%") end end function SciKGTeX.command_factory:override_star_command(command_name) full_cmd = self.cmd_top_star_override .. "\n" .. self.directlua_part .. "\n" .. self.cmd_bottom_star formatted_cmd = string.format(full_cmd, command_name, command_name) for i, line in ipairs(formatted_cmd:split("\n")) do tex.print(line .. "%") end end function SciKGTeX:make_new_command(new_property, namespace) -- check if property already exists if self.property_commands[new_property]~=nil then self:warn([[Method newpropertycommand: Repeated definition. Command %s already exists! Are you sure you want to override it?]], new_property) self:add_property(new_property, namespace) self.command_factory:override_command(new_property, namespace) self.command_factory:override_star_command(new_property, namespace) else self.property_commands[new_property] = false self:add_property(new_property, namespace) self.command_factory:build_command(new_property, namespace) self.command_factory:build_star_command(new_property, namespace) end end function SciKGTeX:add_property(new_property, namespace) new_property = self.XMP:escape_xml_tags(new_property) -- check if property already exists if self.properties_used[new_property]~=nil then self:warn([[Method addmetaproperty: Repeated definition. Property %s already added! Are you sure you want to replace it?]], new_property) -- if not make it known to the object else self.properties_used[new_property] = false end ns_prefix = self.XMP:extract_namespace_prefix(namespace) self.XMP.property_ns[new_property] = ns_prefix end function SciKGTeX:register_property(prop_type) self.properties_used[prop_type] = true end function SciKGTeX:warn_unused_command() warning_message = [[No %s annotation found! Are you sure you don't want to mark an entity with %s?]] for i, p in ipairs(self.mandatory_properties) do used = self.properties_used[p] if not used then self:warn(warning_message, p, p); end end end function SciKGTeX:print_entity(uri, label, hyperrefloaded) if label ~= "" and hyperrefloaded then tex.print(string.format('\\href{%s}{%s}', uri , label)) elseif label ~= "" then tex.print(label) elseif hyperrefloaded then tex.print(string.format('\\url{%s}',uri)) else tex.print(uri) end end ---------------------------- XMP class methods ------------------------------- function XMP:escape_xml_tags(s) s = spaces_to_underscores(s) s, i = s:gsub('[^%a%d%.-_]','') if i > 0 then SciKGTeX:warn([[Method escape_xml_tags: Forbidden characters. Property %s can only contain letters, digits, underscores, hyphens and periods! Forbidden characters removed.]], s) end s, i = s:gsub('^([Xx][Mm][Ll])','_%1') if i > 0 then SciKGTeX:warn([[Method escape_xml_tags: Forbidden characters. Property %s can not start with xml! Changed to _xml.]], s) end return s end function XMP:escape_xml_content(s) s = s:gsub('&', '&') s = s:gsub('>', '>') return s:gsub('<', '<') end function XMP:add_line(...) table.insert(self.lines, string.format(...)) end function XMP:add_paper_node(paper_iri) self.paper = {} self.paper.contributions = {} self.paper.id = paper_iri self.paper.title = nil self.paper.authors = {} self.paper.researchfield = nil end function XMP:add_contribution(key, contribution_iri) local contribution = {} contribution.properties = {} contribution.id = contribution_iri:gsub("<(default_contribution)>", "ORKG_default") self.paper.contributions[key] = contribution end function XMP:extract_namespace_prefix(ns_arg) if ns_arg == '' then return nil end uri_and_prefix = ns_arg:split(',%s+?') if #uri_and_prefix < 2 then SciKGTeX:error([[Method addmetaproperty: No prefix found. Unknown prefix, URI specification: %s. Please specify the arguments as [prefix, URI]!]], ns_arg) return nil elseif #uri_and_prefix > 2 then SciKGTeX:warn([[Method addmetaproperty: Too many arguments. Too many arguments in prefix, URI specification: %s. Excess arguments are ignored.]], ns_arg) end if not uri_valid(uri_and_prefix[2]) then message = [[Method addmetaproperty: Invalid URI. The given URI %s is not a valid choice! Please use a resolvable URI starting with 'http'.]] SciKGTeX:error(message, uri_and_prefix[2]) return nil end -- add the namespace if it has not been added yet if self.namespaces[uri_and_prefix[1]]==nil then self:add_namespace(uri_and_prefix[1], uri_and_prefix[2]) end return uri_and_prefix[1] end function XMP:process_content(c) c = self:escape_xml_content(c) entity = resolve_entity(c) if entity ~= false then return entity end c = remove_latex_commands(c) return c end function XMP:property_has_namespace(annotation_type) annotation_type_t = annotation_type:split(':') if #annotation_type_t > 1 then annotation_type = annotation_type_t[2] prefix = annotation_type_t[1] else annotation_type = annotation_type_t[1] prefix = nil end return prefix, annotation_type end function XMP:set_title(title) self.paper.title = title end function XMP:add_author(author) table.insert(self.paper.authors, author) end function XMP:set_researchfield(researchfield) self.paper.researchfield = researchfield end function XMP:add_annotation(contribution_ids, annotation_type, content, annotation_id) local annotation = {} -- check if a namespace is attached to the property specification prefix, annotation_type = self:property_has_namespace(annotation_type) annotation.content = content annotation.id = annotation_id annotation.type = self:escape_xml_tags(annotation_type) -- take the prefix given, the prefix saved in the namespace dictionary or the default ns annotation.prefix = prefix or self.property_ns[annotation.type] or 'orkg_property' -- register the use of the property in text SciKGTeX:register_property(annotation.type) -- check if the annotation was numbered if contribution_ids == '' then contribution_ids = '' end contributions_ids_t = contribution_ids:split(',%s+?') -- add the annotations at the specified contribution for i, contribution_id in ipairs(contributions_ids_t) do -- add a new contribution if it has not been added yet if self.paper.contributions[contribution_id] == nil then self:add_contribution(contribution_id, 'contribution_'..contribution_id) end -- add the property annotation to the list of properties of a contribution -- check if the same annotation already exists (in case of double evaluation of the LaTeX command for example) already_there = false for _, prop in pairs(self.paper.contributions[contribution_id].properties) do if prop.content == annotation.content and prop.type == annotation.type then already_there = true break end end if not already_there then table.insert(self.paper.contributions[contribution_id].properties, annotation) end end end function XMP:add_namespace(abbr, uri) self.namespaces[abbr] = uri end function XMP:generate_rdf_root() ns_key_array = {} for ns, uri in pairs(self.namespaces) do table.insert(ns_key_array, ns) end root_string = [[]] return root_string end function XMP:generate_xmp_string(lb_char) lb_char = lb_char or "\n" if lb_char == "r" then lb_char = "\r" end output_string = "" sorted_contributions = {} for cb_id, contribution in pairs(XMP.paper.contributions) do table.insert(sorted_contributions,cb_id) end table.sort(sorted_contributions) self:add_line('',self.paper.id) self:add_line(self.XMP_TOP) self:add_line(self:generate_rdf_root()) --print(debug.traceback()) if self.paper then self:add_line( ' ', self.paper.id ) self:add_line(' ') if self.paper.title ~= nil then self:add_line( ' %s', self:process_content(self.paper.title) ) end for i, author in ipairs(self.paper.authors) do self:add_line( ' %s', self:process_content(author)) end if self.paper.researchfield ~= nil then self:add_line( ' %s', self:process_content(self.paper.researchfield) ) end for i, cb_id in pairs(sorted_contributions) do contribution = self.paper.contributions[cb_id] if i==1 then if #sorted_contributions > 1 then self:add_line(' ') else self:add_line(' ') end end self:add_line( ' ', self.paper.id .. "/" ..contribution.id ) for j, property in ipairs(contribution.properties) do self:add_line( ' <%s:%s>%s', property.prefix, property.type, self:process_content(property.content), property.prefix, property.type ) end self:add_line(' ') if i == #sorted_contributions then self:add_line(' ') end end self:add_line(' ') end self:add_line('') self:add_line(self.XMP_BOTTOM) self:add_line(self.PACKET_END) return table.concat(self.lines, lb_char) end function XMP:attach_metadata_pdfstream(metadata_type) local xmp_string = self:generate_xmp_string() local new_pdf = pdf.obj { type = 'stream', attr = '/Type /'..metadata_type..' /Subtype /XML', immediate = true, compresslevel = 0, string = xmp_string, } self.lines = {} return new_pdf end function XMP:dump_metadata() local xmp_string = self:generate_xmp_string() local dir = get_output_dir() or '.' f = io.open(dir .. '/' .. tex.jobname .. '.xmp_metadata.xml','w') io.output(f) io.write(xmp_string) io.close(f) end luatexbase.add_to_callback('stop_run', function() SciKGTeX:warn_unused_command() if SciKGTeX.PRODUCE_XMP_FILE then XMP:dump_metadata() end end, 'at_end') -- Writing metadata packets luatexbase.add_to_callback('finish_pdffile', function() if XMP.paper then if CONFORM_TO_PDFA then catalog_key='SciKGMetadata' else catalog_key='Metadata' end local metadata_obj = XMP:attach_metadata_pdfstream(catalog_key) local catalog = pdf.getcatalog() or '' pdf.setcatalog(catalog..string.format('/%s %s 0 R', catalog_key, metadata_obj)) end end, 'finish') -- TODO: real identifier assigned -- get the id or generate UUID local output_dir = get_output_dir() or '.' local header = read_header_of_file(output_dir .. '/' .. tex.jobname .. '.xmp_metadata.xml') if header ~= nil then id = extract_uuid_from_header(header) end if id == nil then id = generate_UUID() print('generate new id:', id) end XMP:add_paper_node(id) XMP:add_namespace("rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") XMP:add_namespace("rdfs","http://www.w3.org/2000/01/rdf-schema#") XMP:add_namespace("orkg","http://orkg.org/core#") XMP:add_namespace("orkg_property","http://orkg.org/property/") SciKGTeX.XMP = XMP return SciKGTeX