Source code for dedoc.data_structures.document_metadata

import uuid
from collections import OrderedDict

from flask_restx import Api, Model, fields

from dedoc.api.models.custom_fields import wild_any_fields
from dedoc.data_structures.serializable import Serializable


[docs]class DocumentMetadata(Serializable): """ This class holds information about document metadata. """
[docs] def __init__(self, file_name: str, temporary_file_name: str, size: int, modified_time: int, created_time: int, access_time: int, file_type: str, other_fields: dict = None, uid: str = None) -> None: """ :param uid: document unique identifier (useful for attached files) :param file_name: original document name (before rename and conversion, so it can contain non-ascii symbols, spaces and so on) :param temporary_file_name: file name during parsing (unique name after rename and conversion); :param size: size of the original file in bytes :param modified_time: time of the last modification in unix time format (seconds since the epoch) :param created_time: time of the creation in unixtime :param access_time: time of the last access to the file in unixtime :param file_type: mime type of the file :param other_fields: additional fields of user metadata """ self.file_name = file_name self.temporary_file_name = temporary_file_name self.size = size self.modified_time = modified_time self.created_time = created_time self.access_time = access_time self.file_type = file_type self.other_fields = {} if other_fields is not None and len(other_fields) > 0: self.extend_other_fields(other_fields) self.uid = f"doc_uid_auto_{uuid.uuid1()}" if uid is None else uid
def set_uid(self, uid: str) -> None: self.uid = uid # noqa
[docs] def extend_other_fields(self, new_fields: dict) -> None: """ Add new attributes to the class and to the other_fields dictionary. :param new_fields: fields to add """ assert (new_fields is not None) assert (len(new_fields) > 0) for key, value in new_fields.items(): setattr(self, key, value) self.other_fields[key] = value
def to_dict(self) -> dict: res = OrderedDict() res["uid"] = self.uid res["file_name"] = self.file_name res["temporary_file_name"] = self.temporary_file_name res["size"] = self.size res["modified_time"] = self.modified_time res["created_time"] = self.created_time res["access_time"] = self.access_time res["file_type"] = self.file_type if self.other_fields is not None: for (key, value) in self.other_fields.items(): res[key] = value res["other_fields"] = self.other_fields return res @staticmethod def get_api_dict(api: Api) -> Model: return api.model("DocumentMetadata", { "uid": fields.String(description="unique document identifier", example="doc_uid_auto_ba73d76a-326a-11ec-8092-417272234cb0"), "file_name": fields.String(description="file name", example="example.odt"), "temporary_file_name": fields.String(description="file name", example="123.odt"), "size": fields.Integer(description="file size in bytes", example="20060"), "modified_time": fields.Integer(description="modification time of the document in the format UnixTime", example="1590579805"), "created_time": fields.Integer(description="creation time of the document in the format UnixTime", example="1590579805"), "access_time": fields.Integer(description="file access time in format UnixTime", example="1590579805"), "file_type": fields.String(description="mime-type file", example="application/vnd.oasis.opendocument.text"), "[a-z]*": wild_any_fields })