from functools import total_ordering
from typing import Optional
[docs]@total_ordering
class HierarchyLevel:
"""
This class defines the level of the document line.
The lower is its value, the more important the line is.
The level of the line consists of two parts:
- level_1 defines primary importance (e.g. root - level_1=0, header - level_1=1, etc.);
- level_2 defines the level inside lines of equal type (e.g. for list items - "1." - level_2=1, "1.1." - level_2=2, etc.).
For the least important lines like raw_text both levels are None.
"""
root = "root"
toc = "toc"
header = "header"
toc_item = "toc_item"
list = "list" # noqa
list_item = "list_item"
bullet_list_item = "bullet_list_item"
raw_text = "raw_text"
footer = "footer"
page_id = "page_id"
unknown = "unknown"
[docs] def __init__(self, level_1: Optional[int], level_2: Optional[int], can_be_multiline: bool, line_type: str) -> None:
"""
:param level_1: value of a line's primary importance
:param level_2: level of the line inside specific class
:param can_be_multiline: is used to unify lines inside tree node, if line can be multiline, it can be joined with another line
:param line_type: type of the line, e.g. raw text, list item, header, etc.
"""
assert level_1 is None or level_1 >= 0
assert level_2 is None or level_2 >= 0
self.level_1 = level_1
self.level_2 = level_2
self.can_be_multiline = can_be_multiline
self.line_type = line_type
def __is_defined(self, other: "HierarchyLevel") -> bool:
return self.level_1 is not None and self.level_2 is not None and other.level_1 is not None and other.level_2 is not None
[docs] def __eq__(self, other: "HierarchyLevel") -> bool:
"""
Defines the equality of two hierarchy levels:
- two raw text lines or lines with unknown type are equal;
- two lines with equal level_1, level_2 are equal.
"""
if not isinstance(other, HierarchyLevel):
return False
if self.__is_defined(other) and (self.level_1, self.level_2) == (other.level_1, other.level_2):
return True
if self.line_type == HierarchyLevel.raw_text and other.line_type == HierarchyLevel.raw_text:
return True
if self.line_type == HierarchyLevel.unknown and other.line_type == HierarchyLevel.unknown:
return True
return False
[docs] def __lt__(self, other: "HierarchyLevel") -> bool:
"""
Defines the comparison of hierarchy levels:
- line1 < line2 if (level_1, level_2) of line1 <= (level_1, level_2) of line2;
- line1 < line2 if line2 is raw text or unknown, and line1 has another type.
Else line1 >= line2.
:param other: hierarchy level of the line2
"""
if self.__is_defined(other):
return (self.level_1, self.level_2) < (other.level_1, other.level_2)
if self.level_1 is None and self.level_2 is None and other.level_1 is None and other.level_2 is None:
return False
if (self.level_1 is None or self.level_2 is None) and (other.level_1 is not None or other.level_2 is not None):
return False
if (self.level_1 is not None or self.level_2 is not None) and (other.level_1 is None or other.level_2 is None):
return True
return (self.level_1, self.level_2) < (other.level_1, other.level_2)
def __str__(self) -> str:
return f"HierarchyLevel(level_1={self.level_1}, level_2={self.level_2}, can_be_multiline={self.can_be_multiline}, line_type={self.line_type})"
[docs] def is_raw_text(self) -> bool:
"""
Check if the line is raw text.
"""
return self.line_type == HierarchyLevel.raw_text
[docs] def is_unknown(self) -> bool:
"""
Check if the type of the line is unknown (only for levels from readers).
"""
return self.line_type == HierarchyLevel.unknown
[docs] def is_list_item(self) -> bool:
"""
Check if the line is a list item.
"""
return self.line_type == HierarchyLevel.list_item
[docs] @staticmethod
def create_raw_text() -> "HierarchyLevel":
"""
Create hierarchy level for a raw textual line.
"""
return HierarchyLevel(level_1=None, level_2=None, can_be_multiline=True, line_type=HierarchyLevel.raw_text)
[docs] @staticmethod
def create_unknown() -> "HierarchyLevel":
"""
Create hierarchy level for a line with unknown type.
"""
return HierarchyLevel(level_1=None, level_2=None, can_be_multiline=True, line_type=HierarchyLevel.unknown)
[docs] @staticmethod
def create_root() -> "HierarchyLevel":
"""
Create hierarchy level for the document root.
"""
return HierarchyLevel(level_1=0, level_2=0, can_be_multiline=True, line_type=HierarchyLevel.root)