Source code for dedoc.structure_extractors.patterns.abstract_pattern

from abc import ABC, abstractmethod
from typing import Optional, Union

from dedoc.data_structures.hierarchy_level import HierarchyLevel
from dedoc.data_structures.line_with_meta import LineWithMeta


[docs]class AbstractPattern(ABC): """ Base class for all patterns to configure structure extraction by :class:`~dedoc.structure_extractors.DefaultStructureExtractor`. """ _name = ""
[docs] def __init__(self, line_type: Optional[str], level_1: Optional[int], level_2: Optional[int], can_be_multiline: Optional[Union[bool, str]]) -> None: """ Initialize pattern with default values of :class:`~dedoc.data_structures.HierarchyLevel` attributes. They can be used in :meth:`~dedoc.structure_extractors.patterns.abstract_pattern.AbstractPattern.get_hierarchy_level` according to specific pattern logic. :param line_type: type of the line, e.g. "header", "bullet_list_item", "chapter", etc. :param level_1: value of a line primary importance :param level_2: level of the line inside specific class :param can_be_multiline: is used to unify lines inside tree node by :class:`~dedoc.structure_constructors.TreeConstructor`, if line can be multiline, it can be joined with another line. If ``None`` is given, can_be_multiline is set to ``True``. """ from dedoc.utils.parameter_utils import get_bool_value self._line_type = line_type self._level_1 = level_1 self._level_2 = level_2 self._can_be_multiline = get_bool_value(can_be_multiline, default_value=True)
[docs] @classmethod def name(cls: "AbstractPattern") -> str: """ Returns ``_name`` attribute, is used in parameters configuration to choose a specific pattern. Each pattern has a unique non-empty name. """ return cls._name
[docs] @abstractmethod def match(self, line: LineWithMeta) -> bool: """ Check if the given line satisfies to the pattern requirements. Line text, annotations or metadata (``metadata.tag_hierarchy_level``) can be used to decide, if the line matches the pattern or not. """ pass
[docs] @abstractmethod def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel: """ This method should be applied only when :meth:`~dedoc.structure_extractors.patterns.abstract_pattern.AbstractPattern.match` returned ``True`` for the given line. Get :class:`~dedoc.data_structures.HierarchyLevel` for initialising ``line.metadata.hierarchy_level`` attribute. Please see :ref:`add_structure_type_hierarchy_level` to get more information about :class:`~dedoc.data_structures.HierarchyLevel`. """ pass