Source code for dedoc.data_structures.document_metadata

import uuid

from dedoc.api.schema.document_metadata import DocumentMetadata as ApiDocumentMetadata
from dedoc.data_structures.serializable import Serializable


[docs]class DocumentMetadata(Serializable): """ This class holds information about document metadata. """
[docs] def __init__(self, file_name: str, temporary_file_name: str, size: int, modified_time: int, created_time: int, access_time: int, file_type: str, other_fields: dict = None, uid: str = None) -> None: """ :param uid: document unique identifier (useful for attached files) :param file_name: original document name (before rename and conversion, so it can contain non-ascii symbols, spaces and so on) :param temporary_file_name: file name during parsing (unique name after rename and conversion); :param size: size of the original file in bytes :param modified_time: time of the last modification in unix time format (seconds since the epoch) :param created_time: time of the creation in unixtime :param access_time: time of the last access to the file in unixtime :param file_type: mime type of the file :param other_fields: additional fields of user metadata """ self.file_name = file_name self.temporary_file_name = temporary_file_name self.size = size self.modified_time = modified_time self.created_time = created_time self.access_time = access_time self.file_type = file_type self.other_fields = {} if other_fields is not None and len(other_fields) > 0: self.extend_other_fields(other_fields) self.uid = f"doc_uid_auto_{uuid.uuid1()}" if uid is None else uid
def set_uid(self, uid: str) -> None: self.uid = uid # noqa
[docs] def extend_other_fields(self, new_fields: dict) -> None: """ Add new attributes to the class and to the other_fields dictionary. :param new_fields: fields to add """ assert (new_fields is not None) assert (len(new_fields) > 0) for key, value in new_fields.items(): setattr(self, key, value) self.other_fields[key] = value
[docs] def to_api_schema(self) -> ApiDocumentMetadata: api_document_metadata = ApiDocumentMetadata(uid=self.uid, file_name=self.file_name, temporary_file_name=self.temporary_file_name, size=self.size, modified_time=self.modified_time, created_time=self.created_time, access_time=self.access_time, file_type=self.file_type, other_fields=self.other_fields) if self.other_fields is not None: for (key, value) in self.other_fields.items(): setattr(api_document_metadata, key, value) return api_document_metadata