-- This is not actually full DVI reader. It just calculates hash for each page, -- so it can be detected if it changed between compilations and needs to be -- converted to image using Dvisvgm or Dvipng -- -- information about DVI format is from here: https://web.archive.org/web/20070403030353/http://www.math.umd.edu/~asnowden/comp-cont/dvi.html -- local M -- the file after post_post is filled with bytes 223 local endfill = 223 -- numbers of bytes for each data type in DVI file local int = 4 local byte = 1 local sixteen = 2 local function read_char(str, pos) if pos and pos > string.len(str) then return nil end return string.sub(str, pos, pos + 1) end local function read_byte(str, pos) return string.byte(read_char(str, pos)) end -- DVI file format uses signed big endian integers. This code doesn't take into account -- the sign, so it will return incorrect result for negative numbers. It doesn't matter -- for the original purpose of this library, but it should be fixed for general use. local function read_integer(str, pos) local first = read_byte(str, pos) local num = first * (256 ^ 3) num = read_byte(str, pos + 1) * (256 ^ 2) + num num = read_byte(str, pos + 2) * 256 + num num = read_byte(str, pos + 3) + num return num end local function read_sixteen(str, pos) local num = read_byte(str, pos) * 256 num = read_byte(str, pos + 1) + num return num end -- select reader function with number of bytes of an argument local readers = { [byte] = read_byte, [int] = read_integer, [sixteen] = read_sixteen } local opcodes = { post_post = { opcode = 249, args = { {name="q", type = int}, -- postamble address {name="i", type = byte} } }, post = { opcode = 248, args = { {name="p", type = int}, -- address of the last page {name="num", type = int}, {name="den", type = int}, {name="mag", type = int}, {name="l", type = int}, {name="u", type = int}, {name="s", type = sixteen}, {name="t", type = sixteen}, } }, bop = { opcode = 139, args = { {name="c0", type=int}, {name="c1", type=int}, {name="c2", type=int}, {name="c3", type=int}, {name="c4", type=int}, {name="c5", type=int}, {name="c6", type=int}, {name="c7", type=int}, {name="c8", type=int}, {name="c9", type=int}, {name="p", type=int}, -- previous page } } } local function read_arguments(str, pos, args) local t = {} for _, v in ipairs(args) do local fn = readers[v.type] t[v.name] = fn(str, pos) -- seek the position. v.type contains size of the current data type in bytes pos = pos + v.type end return t end local function read_opcode(opcode, str, pos) local format = opcodes[opcode] if not format then return nil, "Cannot find opcode format: " .. opcode end -- check that opcode byte in the current position is the same as required opcode local op = read_byte(str, pos) if op ~= format.opcode then return nil, "Wrong opcode " .. op .. " at position " .. pos end return read_arguments(str, pos+1, format.args) end -- find the postamble address local function get_postamble_addr(dvicontent) local pos = string.len(dvicontent) local last = read_char(dvicontent, pos) -- skip endfill bytes at the end of file while string.byte(last) == endfill do pos = pos - 1 last = read_char(dvicontent, pos) end -- first read post_post to get address of the postamble local post_postamble, msg = read_opcode("post_post", dvicontent, pos-5) if not post_postamble then return nil, msg end -- return the postamble address return post_postamble.q + 1 -- return read_opcode("post", dvicontent, post_postamble.q + 1) end local function read_page(str, start, stop) local function get_end_of_page(str, pos) if read_byte(str, pos) == 140 then -- end of page return pos end return get_end_of_page(str, pos - 1) end -- we reached the end of file if start == 2^32-1 then return nil end local current_page = read_opcode("bop", str, start + 1) if not current_page then return nil end local endofpage = get_end_of_page(str, stop) -- get the page contents, but skip all parameters, because they can change -- (especially pointer to the previous page) local page = str:sub(start + 46, endofpage) local page_obj = { number = current_page.c0, -- the page number hash = md5.sumhexa(page) -- hash the page contents } return page_obj, current_page.p, start end local function get_pages(dvicontent) local pages = {} local postamble_pos = get_postamble_addr(dvicontent) local postamble = read_opcode("post", dvicontent, postamble_pos) local next_page_pos = postamble.p local page, previous_page = nil, postamble_pos local page_sequence = {} while next_page_pos do page, next_page_pos, previous_page = read_page(dvicontent, next_page_pos, previous_page) page_sequence[#page_sequence+1] = page end -- reorder pages for _, v in ipairs(page_sequence) do pages[v.number] = v.hash end return pages end -- if arg[1] then -- local f = io.open(arg[1], "r") -- local dvicontent = f:read("*all") -- f:close() -- local pages = get_pages(dvicontent) -- for k,v in pairs(pages) do -- print(k,v) -- end -- end return { get_pages = get_pages }