Source code for dedoc.data_structures.document_metadata
from typing import Dict, Optional, Union
from dedoc.api.schema.document_metadata import DocumentMetadata as ApiDocumentMetadata
from dedoc.data_structures.serializable import Serializable
[docs]class DocumentMetadata(Serializable):
"""
This class holds information about document metadata.
:ivar file_name: original document name (before rename and conversion, so it can contain non-ascii symbols, spaces and so on)
:ivar temporary_file_name: file name during parsing (unique name after rename and conversion)
:ivar size: size of the original file in bytes
:ivar modified_time: time of the last modification in unix time format (seconds since the epoch)
:ivar created_time: time of the creation in unixtime
:ivar access_time: time of the last access to the file in unixtime
:ivar file_type: mime type of the file
:ivar uid: document unique identifier (useful for attached files)
:vartype file_name: str
:vartype temporary_file_name: str
:vartype size: int
:vartype modified_time: int
:vartype created_time: int
:vartype access_time: int
:vartype file_type: str
:vartype uid: str
Additional variables may be added with other file metadata.
"""
def __init__(self,
file_name: str,
temporary_file_name: str,
size: int,
modified_time: int,
created_time: int,
access_time: int,
file_type: str,
uid: Optional[str] = None,
**kwargs: Dict[str, Union[str, int, float]]) -> None:
"""
:param uid: document unique identifier
:param file_name: original document name
:param temporary_file_name: file name during parsing
:param size: size of the original file in bytes
:param modified_time: time of the last modification in unix time format
:param created_time: time of the creation in unixtime
:param access_time: time of the last access to the file in unixtime
:param file_type: mime type of the file
"""
import uuid
self.file_name: str = file_name
self.temporary_file_name: str = temporary_file_name
self.size: int = size
self.modified_time: int = modified_time
self.created_time: int = created_time
self.access_time: int = access_time
self.file_type: str = file_type
for key, value in kwargs.items():
self.add_attribute(key, value)
self.uid: str = f"doc_uid_auto_{uuid.uuid1()}" if uid is None else uid
def add_attribute(self, key: str, value: Union[str, int, float]) -> None:
setattr(self, key, value)