# Copyright 2020 Facundo Batista # All Rights Reserved # Licensed under Apache 2.0 """USAGE: jupynote.py notebook.ipynb cells cells is a string with which cells to include, separate groups with comma, ranges with dash (with defaults to start and end. """ import base64 import json import sys import tempfile import traceback def _verbatimize(lines): """Wrap a series of lines around a verbatim indication.""" result = [r"\begin{verbatim}"] for line in lines: result.append(line.rstrip()) result.append(r"\end{verbatim}") return result def _save_content(data): """Save the received b64encoded data to a temp file.""" _, fname = tempfile.mkstemp(suffix='.png') with open(fname, 'wb') as fh: fh.write(base64.b64decode(data)) return fname class Notebook: """The notebook converter to latex.""" def __init__(self, path): with open(path, 'rt', encoding='utf8') as fh: nb_data = json.load(fh) self._cells = nb_data['cells'] def __len__(self): return len(self._cells) def _proc_src(self, content): """Process the source of a cell.""" source = content['source'] result = [] if content['cell_type'] == 'code': result.extend(_verbatimize(source)) elif content['cell_type'] == 'markdown': # XXX: maybe we could parse this? result.extend(_verbatimize(source)) else: raise ValueError( "Cell type not supported when processing source: {!r}".format( content['cell_type'])) return '\n'.join(result) + '\n' def _proc_out(self, content): """Process the output of a cell.""" outputs = content.get('outputs') if not outputs: return result = [] for item in outputs: output_type = item['output_type'] if output_type == 'execute_result': data = item['data'] if 'image/png' in data: fname = _save_content(data['image/png']) result.append(r"\includegraphics{{{}}}".format(fname)) elif 'text/latex' in data: result.extend(data["text/latex"]) else: result.extend(_verbatimize(data["text/plain"])) elif output_type == 'stream': result.extend(_verbatimize(x.rstrip() for x in item["text"])) elif output_type == 'display_data': data = item['data'] fname = _save_content(data['image/png']) result.append(r"\includegraphics{{{}}}".format(fname)) else: raise ValueError("Output type not supported in item {!r}".format(item)) return '\n'.join(result) + '\n' def get(self, cell_idx): """Return the content from a specific cell in the notebook. The content is already splitted in source and output, and converted to latex. """ content = self._cells[cell_idx - 1] source = self._proc_src(content) output = self._proc_out(content) return source, output def _parse_cells(spec, maxlen): """Convert the cells spec to a range of ints.""" if not spec: raise ValueError("Empty cells spec not allowed") if set(spec) - set('0123456789-,'): raise ValueError( "Found forbidden characters in cells definition (allowed digits, '-' and ',')") cells = set() groups = spec.split(',') for group in groups: if '-' in group: cfrom, cto = group.split('-') cfrom = 1 if cfrom == '' else int(cfrom) cto = maxlen if cto == '' else int(cto) if cfrom >= cto: raise ValueError( "Range 'from' need to be smaller than 'to' (got {!r})".format(group)) cells.update(range(cfrom, cto + 1)) else: cells.add(int(group)) cells = sorted(cells) if any(x < 1 for x in cells): raise ValueError("Cells need to be >=1") if maxlen < cells[-1]: raise ValueError( "Notebook loaded of len {}, smaller than requested cells: {}".format(maxlen, cells)) return cells def main(notebook_path, cells_spec): """Main entry point.""" nb = Notebook(notebook_path) cells = _parse_cells(cells_spec, len(nb)) for cell in cells: try: src, out = nb.get(cell) except Exception: title = "ERROR when parsing cell {}".format(cell) print( r"\begin{{tcolorbox}}" r"[colback=red!5!white,colframe=red!75!,title={{{}}}]".format(title)) tb = traceback.format_exc() print('\n'.join(_verbatimize(tb.split('\n')))) print(r"\end{tcolorbox}") continue print(r"\begin{{tcolorbox}}[title=Cell {{{:02d}}}]".format(cell)) print(src) if out: print(r"\tcblower") print(out) print(r"\end{tcolorbox}") if __name__ == "__main__": if len(sys.argv) != 3: print(__doc__) exit() main(*sys.argv[1:3])