Source code for lookatme.parser

"""
This module defines the parser for the markdown presentation file
"""


from collections import defaultdict
from marshmallow import fields, Schema
import mistune
import re
import yaml


from lookatme.schemas import MetaSchema
from lookatme.slide import Slide


[docs]class Parser(object): """A parser for markdown presentation files """ def __init__(self): """Create a new Parser instance """
[docs] def parse(self, input_data): """Parse the provided input data into a Presentation object :param str input_data: The input markdown presentation to parse :returns: Presentation """ input_data, meta = self.parse_meta(input_data) input_data, slides = self.parse_slides(meta, input_data) return meta, slides
[docs] def parse_slides(self, meta, input_data): """Parse the Slide out of the input data :param dict meta: The parsed meta values :param str input_data: The input data string :returns: tuple of (remaining_data, slide) """ # slides are delimited by --- md = mistune.Markdown() state = {} tokens = md.block.parse(input_data, state) num_hrules, hinfo = self._scan_for_smart_split(tokens) if num_hrules == 0: if meta["title"] in ["", None]: meta["title"] = hinfo["title"] def slide_split_check(token): return ( token["type"] == "heading" and token["level"] == hinfo["lowest_non_title"] ) def heading_mod(token): token["level"] = max( token["level"] - (hinfo["title_level"] or 0), 1, ) keep_split_token = True else: def slide_split_check(token): return token["type"] == "hrule" def heading_mod(token): pass keep_split_token = False slides = [] curr_slide_tokens = [] for token in tokens: should_split = slide_split_check(token) if token["type"] == "heading": heading_mod(token) # new slide! if should_split: if keep_split_token and len(slides) == 0 and len(curr_slide_tokens) == 0: pass else: slide = Slide(curr_slide_tokens, md, len(slides)) slides.append(slide) curr_slide_tokens = [] if keep_split_token: curr_slide_tokens.append(token) continue else: curr_slide_tokens.append(token) slides.append(Slide(curr_slide_tokens, md, len(slides))) return "", slides
def _scan_for_smart_split(self, tokens): """Scan the provided tokens for the number of hrules, and the lowest (h1 < h2) header level. :returns: tuple (num_hrules, lowest_header_level) """ hinfo = { "title_level": None, "lowest_non_title": 10, "counts": defaultdict(int), "title": "", } num_hrules = 0 first_heading = None for token in tokens: if token["type"] == "hrule": num_hrules += 1 elif token["type"] == "heading": hinfo["counts"][token["level"]] += 1 if first_heading is None: first_heading = token # started off with the lowest heading, make this title if hinfo["counts"] and hinfo["counts"][first_heading["level"]] == 1: hinfo["title"] = first_heading["text"] del hinfo["counts"][first_heading["level"]] hinfo["title_level"] = first_heading["level"] low_level = min(list(hinfo["counts"].keys()) + [10]) hinfo["title_level"] = low_level - 1 hinfo["lowest_non_title"] = low_level return num_hrules, hinfo
[docs] def parse_meta(self, input_data): """Parse the PresentationMeta out of the input data :param str input_data: The input data string :returns: tuple of (remaining_data, meta) """ found_first = False yaml_data = [] skipped_chars = 0 for line in input_data.split("\n"): skipped_chars += len(line) + 1 stripped_line = line.strip() is_marker = (re.match(r'----*', stripped_line) is not None) if is_marker: if not found_first: found_first = True # found the second one else: break if found_first and not is_marker: yaml_data.append(line) continue # there was no ----* marker if not found_first and stripped_line != "": break if not found_first: return input_data, MetaSchema().load({}) new_input = input_data[skipped_chars:] if len(yaml_data) == 0: return new_input, MetaSchema().load({}) yaml_data = "\n".join(yaml_data) data = MetaSchema().loads(yaml_data) return new_input, data