dedoc
v0.11.0
Getting started:
Dedoc installation
Dedoc usage tutorial
Dedoc API usage
Using dedoc via API
Description of the API output format
Structure types
Default document structure type
Law structure type
Technical specification structure type
Diploma structure type
Package Reference
Dedoc pipeline
dedoc.data_structures
dedoc.converters
dedoc.readers
dedoc.attachments_extractors
dedoc.metadata_extractors
dedoc.structure_extractors
dedoc.structure_constructors
Notes
Changelog
dedoc
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
L
|
M
|
N
|
P
|
R
|
S
|
T
|
U
|
W
|
X
|
Y
_
__add__() (dedoc.data_structures.LineWithMeta method)
__eq__() (dedoc.data_structures.HierarchyLevel method)
__getitem__() (dedoc.data_structures.LineWithMeta method)
__init__() (dedoc.attachments_extractors.PDFAttachmentsExtractor method)
(dedoc.attachments_handler.AttachmentsHandler method)
(dedoc.converters.AbstractConverter method)
(dedoc.converters.FileConverterComposition method)
(dedoc.data_structures.AlignmentAnnotation method)
(dedoc.data_structures.Annotation method)
(dedoc.data_structures.AttachAnnotation method)
(dedoc.data_structures.AttachedFile method)
(dedoc.data_structures.BBox method)
(dedoc.data_structures.BBoxAnnotation method)
(dedoc.data_structures.BoldAnnotation method)
(dedoc.data_structures.CellProperty method)
(dedoc.data_structures.ColorAnnotation method)
(dedoc.data_structures.ConfidenceAnnotation method)
(dedoc.data_structures.DocumentContent method)
(dedoc.data_structures.DocumentMetadata method)
(dedoc.data_structures.HierarchyLevel method)
(dedoc.data_structures.IndentationAnnotation method)
(dedoc.data_structures.ItalicAnnotation method)
(dedoc.data_structures.LineMetadata method)
(dedoc.data_structures.LineWithMeta method)
(dedoc.data_structures.LinkedTextAnnotation method)
(dedoc.data_structures.ParsedDocument method)
(dedoc.data_structures.SizeAnnotation method)
(dedoc.data_structures.SpacingAnnotation method)
(dedoc.data_structures.StrikeAnnotation method)
(dedoc.data_structures.StyleAnnotation method)
(dedoc.data_structures.SubscriptAnnotation method)
(dedoc.data_structures.SuperscriptAnnotation method)
(dedoc.data_structures.Table method)
(dedoc.data_structures.TableAnnotation method)
(dedoc.data_structures.TableMetadata method)
(dedoc.data_structures.TreeNode method)
(dedoc.data_structures.UnderlinedAnnotation method)
(dedoc.data_structures.UnstructuredDocument method)
(dedoc.DedocManager method)
(dedoc.metadata_extractors.ImageMetadataExtractor method)
(dedoc.metadata_extractors.MetadataExtractorComposition method)
(dedoc.metadata_extractors.NoteMetadataExtractor method)
(dedoc.metadata_extractors.PdfMetadataExtractor method)
(dedoc.readers.ArchiveReader method)
(dedoc.readers.CSVReader method)
(dedoc.readers.DocxReader method)
(dedoc.readers.EmailReader method)
(dedoc.readers.ExcelReader method)
(dedoc.readers.HtmlReader method)
(dedoc.readers.JsonReader method)
(dedoc.readers.MhtmlReader method)
(dedoc.readers.NoteReader method)
(dedoc.readers.PdfAutoReader method)
(dedoc.readers.PdfBaseReader method)
(dedoc.readers.PdfImageReader method)
(dedoc.readers.PdfTabbyReader method)
(dedoc.readers.PdfTxtlayerReader method)
(dedoc.readers.PptxReader method)
(dedoc.readers.RawTextReader method)
(dedoc.readers.ReaderComposition method)
(dedoc.structure_constructors.StructureConstructorComposition method)
(dedoc.structure_extractors.AbstractLawStructureExtractor method)
(dedoc.structure_extractors.ClassifyingLawStructureExtractor method)
(dedoc.structure_extractors.DiplomaStructureExtractor method)
(dedoc.structure_extractors.FoivLawStructureExtractor method)
(dedoc.structure_extractors.LawStructureExtractor method)
(dedoc.structure_extractors.StructureExtractorComposition method)
(dedoc.structure_extractors.TzStructureExtractor method)
__len__() (dedoc.data_structures.LineWithMeta method)
__lt__() (dedoc.data_structures.HierarchyLevel method)
A
AbstractAttachmentsExtractor (class in dedoc.attachments_extractors)
AbstractConverter (class in dedoc.converters)
AbstractLawStructureExtractor (class in dedoc.structure_extractors)
AbstractMetadataExtractor (class in dedoc.metadata_extractors)
AbstractOfficeAttachmentsExtractor (class in dedoc.attachments_extractors)
AbstractStructureConstructor (class in dedoc.structure_constructors)
AbstractStructureExtractor (class in dedoc.structure_extractors)
add_child() (dedoc.data_structures.TreeNode method)
add_metadata() (dedoc.metadata_extractors.AbstractMetadataExtractor method)
(dedoc.metadata_extractors.BaseMetadataExtractor method)
(dedoc.metadata_extractors.DocxMetadataExtractor method)
(dedoc.metadata_extractors.ImageMetadataExtractor method)
(dedoc.metadata_extractors.MetadataExtractorComposition method)
(dedoc.metadata_extractors.NoteMetadataExtractor method)
(dedoc.metadata_extractors.PdfMetadataExtractor method)
add_text() (dedoc.data_structures.TreeNode method)
AlignmentAnnotation (class in dedoc.data_structures)
Annotation (class in dedoc.data_structures)
annotations (dedoc.data_structures.LineWithMeta property)
ArchiveReader (class in dedoc.readers)
AttachAnnotation (class in dedoc.data_structures)
AttachedFile (class in dedoc.data_structures)
AttachmentsHandler (class in dedoc.attachments_handler)
B
BaseMetadataExtractor (class in dedoc.metadata_extractors)
BaseReader (class in dedoc.readers)
BBox (class in dedoc.data_structures)
BBoxAnnotation (class in dedoc.data_structures)
BinaryConverter (class in dedoc.converters)
BoldAnnotation (class in dedoc.data_structures)
C
can_convert() (dedoc.converters.AbstractConverter method)
(dedoc.converters.BinaryConverter method)
(dedoc.converters.DocxConverter method)
(dedoc.converters.ExcelConverter method)
(dedoc.converters.PDFConverter method)
(dedoc.converters.PNGConverter method)
(dedoc.converters.PptxConverter method)
(dedoc.converters.TxtConverter method)
can_extract() (dedoc.attachments_extractors.AbstractAttachmentsExtractor method)
(dedoc.attachments_extractors.DocxAttachmentsExtractor method)
(dedoc.attachments_extractors.ExcelAttachmentsExtractor method)
(dedoc.attachments_extractors.JsonAttachmentsExtractor method)
(dedoc.attachments_extractors.PDFAttachmentsExtractor method)
(dedoc.attachments_extractors.PptxAttachmentsExtractor method)
(dedoc.metadata_extractors.AbstractMetadataExtractor method)
(dedoc.metadata_extractors.BaseMetadataExtractor method)
(dedoc.metadata_extractors.DocxMetadataExtractor method)
(dedoc.metadata_extractors.ImageMetadataExtractor method)
(dedoc.metadata_extractors.NoteMetadataExtractor method)
(dedoc.metadata_extractors.PdfMetadataExtractor method)
can_read() (dedoc.readers.ArchiveReader method)
(dedoc.readers.BaseReader method)
(dedoc.readers.CSVReader method)
(dedoc.readers.DocxReader method)
(dedoc.readers.EmailReader method)
(dedoc.readers.ExcelReader method)
(dedoc.readers.HtmlReader method)
(dedoc.readers.JsonReader method)
(dedoc.readers.MhtmlReader method)
(dedoc.readers.NoteReader method)
(dedoc.readers.PdfAutoReader method)
(dedoc.readers.PdfImageReader method)
(dedoc.readers.PdfTabbyReader method)
(dedoc.readers.PdfTxtlayerReader method)
(dedoc.readers.PptxReader method)
(dedoc.readers.RawTextReader method)
CellProperty (class in dedoc.data_structures)
ClassifyingLawStructureExtractor (class in dedoc.structure_extractors)
ColorAnnotation (class in dedoc.data_structures)
ConfidenceAnnotation (class in dedoc.data_structures)
create() (dedoc.data_structures.TreeNode static method)
create_raw_text() (dedoc.data_structures.HierarchyLevel static method)
create_root() (dedoc.data_structures.HierarchyLevel static method)
create_unknown() (dedoc.data_structures.HierarchyLevel static method)
CSVReader (class in dedoc.readers)
D
DedocManager (class in dedoc)
DefaultStructureExtractor (class in dedoc.structure_extractors)
DiplomaStructureExtractor (class in dedoc.structure_extractors)
do_convert() (dedoc.converters.AbstractConverter method)
(dedoc.converters.BinaryConverter method)
(dedoc.converters.DocxConverter method)
(dedoc.converters.ExcelConverter method)
(dedoc.converters.PDFConverter method)
(dedoc.converters.PNGConverter method)
(dedoc.converters.PptxConverter method)
(dedoc.converters.TxtConverter method)
do_converting() (dedoc.converters.FileConverterComposition method)
document_type (dedoc.structure_extractors.ClassifyingLawStructureExtractor attribute)
(dedoc.structure_extractors.DefaultStructureExtractor attribute)
(dedoc.structure_extractors.DiplomaStructureExtractor attribute)
(dedoc.structure_extractors.FoivLawStructureExtractor attribute)
(dedoc.structure_extractors.LawStructureExtractor attribute)
(dedoc.structure_extractors.TzStructureExtractor attribute)
DocumentContent (class in dedoc.data_structures)
DocumentMetadata (class in dedoc.data_structures)
DocxAttachmentsExtractor (class in dedoc.attachments_extractors)
DocxConverter (class in dedoc.converters)
DocxMetadataExtractor (class in dedoc.metadata_extractors)
DocxReader (class in dedoc.readers)
E
EmailReader (class in dedoc.readers)
ExcelAttachmentsExtractor (class in dedoc.attachments_extractors)
ExcelConverter (class in dedoc.converters)
ExcelReader (class in dedoc.readers)
extend_other_fields() (dedoc.data_structures.DocumentMetadata method)
(dedoc.data_structures.LineMetadata method)
extract_structure() (dedoc.structure_extractors.AbstractLawStructureExtractor method)
(dedoc.structure_extractors.AbstractStructureExtractor method)
(dedoc.structure_extractors.ClassifyingLawStructureExtractor method)
(dedoc.structure_extractors.DefaultStructureExtractor method)
(dedoc.structure_extractors.DiplomaStructureExtractor method)
(dedoc.structure_extractors.StructureExtractorComposition method)
(dedoc.structure_extractors.TzStructureExtractor method)
F
FileConverterComposition (class in dedoc.converters)
FoivLawStructureExtractor (class in dedoc.structure_extractors)
from_two_points() (dedoc.data_structures.BBox static method)
G
get_attachments() (dedoc.attachments_extractors.AbstractAttachmentsExtractor method)
(dedoc.attachments_extractors.DocxAttachmentsExtractor method)
(dedoc.attachments_extractors.ExcelAttachmentsExtractor method)
(dedoc.attachments_extractors.JsonAttachmentsExtractor method)
(dedoc.attachments_extractors.PDFAttachmentsExtractor method)
(dedoc.attachments_extractors.PptxAttachmentsExtractor method)
get_root() (dedoc.data_structures.TreeNode method)
H
handle_attachments() (dedoc.attachments_handler.AttachmentsHandler method)
have_intersection_with_box() (dedoc.data_structures.BBox method)
height (dedoc.data_structures.BBox attribute)
HierarchyLevel (class in dedoc.data_structures)
HtmlReader (class in dedoc.readers)
I
ImageMetadataExtractor (class in dedoc.metadata_extractors)
IndentationAnnotation (class in dedoc.data_structures)
is_list_item() (dedoc.data_structures.HierarchyLevel method)
is_raw_text() (dedoc.data_structures.HierarchyLevel method)
is_unknown() (dedoc.data_structures.HierarchyLevel method)
ItalicAnnotation (class in dedoc.data_structures)
J
JsonAttachmentsExtractor (class in dedoc.attachments_extractors)
JsonReader (class in dedoc.readers)
L
LawStructureExtractor (class in dedoc.structure_extractors)
line (dedoc.data_structures.LineWithMeta property)
LinearConstructor (class in dedoc.structure_constructors)
LineMetadata (class in dedoc.data_structures)
LineWithMeta (class in dedoc.data_structures)
LinkedTextAnnotation (class in dedoc.data_structures)
M
metadata (dedoc.data_structures.LineWithMeta property)
MetadataExtractorComposition (class in dedoc.metadata_extractors)
MhtmlReader (class in dedoc.readers)
N
name (dedoc.data_structures.AlignmentAnnotation attribute)
(dedoc.data_structures.AttachAnnotation attribute)
(dedoc.data_structures.BBoxAnnotation attribute)
(dedoc.data_structures.BoldAnnotation attribute)
(dedoc.data_structures.ColorAnnotation attribute)
(dedoc.data_structures.ConfidenceAnnotation attribute)
(dedoc.data_structures.IndentationAnnotation attribute)
(dedoc.data_structures.ItalicAnnotation attribute)
(dedoc.data_structures.LinkedTextAnnotation attribute)
(dedoc.data_structures.SizeAnnotation attribute)
(dedoc.data_structures.SpacingAnnotation attribute)
(dedoc.data_structures.StrikeAnnotation attribute)
(dedoc.data_structures.StyleAnnotation attribute)
(dedoc.data_structures.SubscriptAnnotation attribute)
(dedoc.data_structures.SuperscriptAnnotation attribute)
(dedoc.data_structures.TableAnnotation attribute)
(dedoc.data_structures.UnderlinedAnnotation attribute)
NoteMetadataExtractor (class in dedoc.metadata_extractors)
NoteReader (class in dedoc.readers)
P
parse() (dedoc.DedocManager method)
parse_file() (dedoc.readers.ReaderComposition method)
ParsedDocument (class in dedoc.data_structures)
PDFAttachmentsExtractor (class in dedoc.attachments_extractors)
PdfAutoReader (class in dedoc.readers)
PdfBaseReader (class in dedoc.readers)
PDFConverter (class in dedoc.converters)
PdfImageReader (class in dedoc.readers)
PdfMetadataExtractor (class in dedoc.metadata_extractors)
PdfTabbyReader (class in dedoc.readers)
PdfTxtlayerReader (class in dedoc.readers)
PNGConverter (class in dedoc.converters)
PptxAttachmentsExtractor (class in dedoc.attachments_extractors)
PptxConverter (class in dedoc.converters)
PptxReader (class in dedoc.readers)
R
RawTextReader (class in dedoc.readers)
read() (dedoc.readers.ArchiveReader method)
(dedoc.readers.BaseReader method)
(dedoc.readers.CSVReader method)
(dedoc.readers.DocxReader method)
(dedoc.readers.EmailReader method)
(dedoc.readers.ExcelReader method)
(dedoc.readers.HtmlReader method)
(dedoc.readers.JsonReader method)
(dedoc.readers.MhtmlReader method)
(dedoc.readers.NoteReader method)
(dedoc.readers.PdfAutoReader method)
(dedoc.readers.PdfBaseReader method)
(dedoc.readers.PdfTabbyReader method)
(dedoc.readers.PptxReader method)
(dedoc.readers.RawTextReader method)
ReaderComposition (class in dedoc.readers)
S
Serializable (class in dedoc.data_structures)
set_line() (dedoc.data_structures.LineWithMeta method)
SizeAnnotation (class in dedoc.data_structures)
SpacingAnnotation (class in dedoc.data_structures)
split() (dedoc.data_structures.LineWithMeta method)
square (dedoc.data_structures.BBox property)
StrikeAnnotation (class in dedoc.data_structures)
structure_document() (dedoc.structure_constructors.AbstractStructureConstructor method)
(dedoc.structure_constructors.LinearConstructor method)
(dedoc.structure_constructors.StructureConstructorComposition method)
(dedoc.structure_constructors.TreeConstructor method)
StructureConstructorComposition (class in dedoc.structure_constructors)
StructureExtractorComposition (class in dedoc.structure_extractors)
StyleAnnotation (class in dedoc.data_structures)
SubscriptAnnotation (class in dedoc.data_structures)
SuperscriptAnnotation (class in dedoc.data_structures)
T
Table (class in dedoc.data_structures)
TableAnnotation (class in dedoc.data_structures)
TableMetadata (class in dedoc.data_structures)
to_dict() (dedoc.data_structures.Serializable method)
TreeConstructor (class in dedoc.structure_constructors)
TreeNode (class in dedoc.data_structures)
TxtConverter (class in dedoc.converters)
TzStructureExtractor (class in dedoc.structure_extractors)
U
uid (dedoc.data_structures.LineWithMeta property)
UnderlinedAnnotation (class in dedoc.data_structures)
UnstructuredDocument (class in dedoc.data_structures)
W
width (dedoc.data_structures.BBox attribute)
with_attachments() (dedoc.attachments_extractors.AbstractAttachmentsExtractor static method)
X
x_bottom_right (dedoc.data_structures.BBox attribute)
x_top_left (dedoc.data_structures.BBox attribute)
Y
y_bottom_right (dedoc.data_structures.BBox attribute)
y_top_left (dedoc.data_structures.BBox attribute)