2022-06-29 20:08:16 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module IpynbDiff
|
2022-07-08 14:10:05 -04:00
|
|
|
require 'oj'
|
|
|
|
|
2022-06-29 20:08:16 -04:00
|
|
|
class InvalidNotebookError < StandardError
|
|
|
|
end
|
|
|
|
|
|
|
|
# Returns a markdown version of the Jupyter Notebook
|
|
|
|
class Transformer
|
|
|
|
require 'json'
|
|
|
|
require 'yaml'
|
|
|
|
require 'output_transformer'
|
|
|
|
require 'symbolized_markdown_helper'
|
2022-07-08 14:10:05 -04:00
|
|
|
require 'symbol_map'
|
2022-06-29 20:08:16 -04:00
|
|
|
require 'transformed_notebook'
|
|
|
|
include SymbolizedMarkdownHelper
|
|
|
|
|
|
|
|
@include_frontmatter = true
|
|
|
|
|
|
|
|
def initialize(include_frontmatter: true, hide_images: false)
|
|
|
|
@include_frontmatter = include_frontmatter
|
|
|
|
@hide_images = hide_images
|
2022-07-08 14:10:05 -04:00
|
|
|
@out_transformer = OutputTransformer.new(hide_images)
|
2022-06-29 20:08:16 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def validate_notebook(notebook)
|
2022-07-08 14:10:05 -04:00
|
|
|
notebook_json = Oj::Parser.usual.parse(notebook)
|
2022-06-29 20:08:16 -04:00
|
|
|
|
|
|
|
return notebook_json if notebook_json.key?('cells')
|
|
|
|
|
|
|
|
raise InvalidNotebookError
|
2022-07-08 14:10:05 -04:00
|
|
|
rescue EncodingError, Oj::ParseError, JSON::ParserError
|
2022-06-29 20:08:16 -04:00
|
|
|
raise InvalidNotebookError
|
|
|
|
end
|
|
|
|
|
|
|
|
def transform(notebook)
|
|
|
|
return TransformedNotebook.new unless notebook
|
|
|
|
|
|
|
|
notebook_json = validate_notebook(notebook)
|
|
|
|
transformed = transform_document(notebook_json)
|
2022-07-08 14:10:05 -04:00
|
|
|
symbol_map = SymbolMap.parse(notebook)
|
2022-06-29 20:08:16 -04:00
|
|
|
|
|
|
|
TransformedNotebook.new(transformed, symbol_map)
|
|
|
|
end
|
|
|
|
|
|
|
|
def transform_document(notebook)
|
|
|
|
symbol = JsonSymbol.new('.cells')
|
|
|
|
|
|
|
|
transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
|
|
|
|
decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
|
|
|
|
end
|
|
|
|
|
|
|
|
transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
|
|
|
|
transformed_blocks.flatten
|
|
|
|
end
|
|
|
|
|
|
|
|
def decorate_cell(rows, cell, symbol)
|
|
|
|
tags = cell['metadata']&.fetch('tags', [])
|
|
|
|
type = cell['cell_type'] || 'raw'
|
|
|
|
|
|
|
|
[
|
|
|
|
_(symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')})),
|
|
|
|
_,
|
|
|
|
rows,
|
|
|
|
_
|
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
def transform_cell(cell, notebook, symbol)
|
|
|
|
cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
|
|
|
|
end
|
|
|
|
|
|
|
|
def transform_code_cell(cell, notebook, symbol)
|
|
|
|
[
|
|
|
|
_(symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''})),
|
|
|
|
symbolize_array(symbol / 'source', cell['source'], &:rstrip),
|
|
|
|
_(nil, '```'),
|
2022-07-04 11:10:24 -04:00
|
|
|
transform_outputs(cell['outputs'], symbol)
|
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
def transform_outputs(outputs, symbol)
|
|
|
|
transformed = outputs.map
|
|
|
|
.with_index { |output, i| @out_transformer.transform(output, symbol / ['outputs', i]) }
|
|
|
|
.compact
|
|
|
|
.map { |el| [_, el] }
|
|
|
|
|
|
|
|
[
|
|
|
|
transformed.empty? ? [] : [_, _(symbol / 'outputs', '%% Output')],
|
|
|
|
transformed
|
2022-06-29 20:08:16 -04:00
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
def transform_text_cell(cell, symbol)
|
|
|
|
symbolize_array(symbol / 'source', cell['source'], &:rstrip)
|
|
|
|
end
|
|
|
|
|
|
|
|
def transform_metadata(notebook_json)
|
|
|
|
as_yaml = {
|
|
|
|
'jupyter' => {
|
|
|
|
'kernelspec' => notebook_json['metadata']['kernelspec'],
|
|
|
|
'language_info' => notebook_json['metadata']['language_info'],
|
|
|
|
'nbformat' => notebook_json['nbformat'],
|
|
|
|
'nbformat_minor' => notebook_json['nbformat_minor']
|
|
|
|
}
|
|
|
|
}.to_yaml
|
|
|
|
|
|
|
|
as_yaml.split("\n").map { |l| _(nil, l) }.append(_(nil, '---'), _)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|