Source code for diskette.core.dumper

import datetime
import json
import shutil
import tarfile
import tempfile
from pathlib import Path

from django.conf import settings
from django.template.defaultfilters import filesizeformat

from ..exceptions import (
    ApplicationConfigError, ApplicationRegistryError, DumperError
)
from ..utils import versionning
from ..utils.lists import get_duplicates, unduplicated_merge_lists
from ..utils.loggers import NoOperationLogger

from .applications import ApplicationConfig, DrainApplicationConfig
from .serializers import DumpdataSerializerAbstract
from .storages import StorageMixin



[docs]
class Dumper(StorageMixin, DumpdataSerializerAbstract):
    """
    Dump manager is in charge of storing application model objects, return serialized
    datas and storage files to dump them.

    Arguments:
        apps (list): List of dictionnaries, each dictionnary is a data dump definition.
            Each dictionnary will be turned to ``DrainApplicationConfig`` or
            ``ApplicationConfig`` object, depending it is a drain or not.

    Keyword Arguments:
        executable (string): A path to prefix commands, commonly the path to
            django-admin (or equivalent). This path will suffixed with a single space
            to ensure separation with command arguments. This is only used with
            ``command``.
        storages_basepath (Path): Basepath for reference in some path resolution.
            Currently used by storage dump to make relative path for storage files.
            On default this is based on current working directory. If given, the
            storage paths must be in the same leaf else this will be an error.
        storages (list): A list of storage Path objects.
        storages_excludes (list): A list of patterns to exclude storage files from
            dump.
        indent (integer): Indentation level in data dumps. If not given, dumps won't
            be indented.
        logger (object): Instance of a logger object to use. Logger object must
            implement common logging message methods (like error, info, etc..). See
            ``diskette.utils.loggers`` for available loggers. If not given, a dummy
            logger will be used that ignores any messages and won't output anything.
    """
    MANIFEST_FILENAME = "manifest.json"
    TEMPDIR_PREFIX = "diskette_"

    def __init__(self, apps, executable=None, storages_basepath=None, storages=None,
                 storages_excludes=None, logger=None, indent=None):
        self.storages_basepath = storages_basepath or Path.cwd()
        self.executable = executable + " " if executable else ""
        self.logger = logger or NoOperationLogger()
        self.storages = storages or []
        self.storages_excludes = storages_excludes or []
        self.indent = indent
        self.now = datetime.datetime.now()

        self.apps = self.load(apps)


[docs]
    def get_diskette_version(self):
        """
        Shortand to return diskette version.

        Useful with manifest building so it can be easily mocked from tests.

        Returns:
            string: The version string.
        """
        return versionning.get_package_version()["version"]



[docs]
    def get_drain_exclusions(self, apps, drain_excluded=False):
        """
        Get all model labels that should be excluded from a Drain to respect drainage
        policy.

        Arguments:
            apps (list): List of ``ApplicationConfig`` or ``DrainApplicationConfig``
                objects.

        Keyword Arguments:
            drain_excluded (boolean): If True, the excluded models are also returned for
                application which allows it with their ``allow_drain`` option. If False
                the excluded model won't be included. Default is False.

        Returns:
            list: List of FQM labels.
        """
        labels = []

        for app in apps:
            # If drain accept exclusions, blindly follow the apps retentions
            if drain_excluded:
                labels.extend(app.retention)
            # If drain reject exclusions, ignores app policy and force exclusions
            else:
                labels.extend(
                    unduplicated_merge_lists(app.retention, app.excludes)
                )

        return labels



[docs]
    def load(self, apps):
        """
        Load application objects from given definitions.

        Arguments:
            apps (list): List of dictionnaries where each one is an application.

        Returns:
            list: List of application objects built from given definitions.
        """
        objects = []
        drains = []

        # Check for duplicated name
        twices = list(get_duplicates([name for name, options in apps]))
        if twices:
            raise DumperError(
                "There was some duplicate names from applications: {}".format(
                    ", ".join(twices)
                )
            )

        # Build registry of application models
        objects = []
        for name, options in apps:
            if not options.get("is_drain"):
                # 'is_drain' is not meant to be passed as model argument
                options.pop("is_drain", None)
                # Append initialized app model
                objects.append(ApplicationConfig(name, **options))

        # Add drain models to registry
        drains = []
        for name, options in apps:
            if options.get("is_drain"):
                # Merge explicit drain exclusions with involved app models to
                # exclude
                options["excludes"] = unduplicated_merge_lists(
                    self.get_drain_exclusions(
                        objects,
                        drain_excluded=options.get("drain_excluded", False)
                    ),
                    options.get("excludes", []),
                )
                # 'is_drain' is not meant to be passed as model argument
                options.pop("is_drain", None)
                # Append initialized drain model
                drains.append(DrainApplicationConfig(name, **options))

        return objects + drains



[docs]
    def dump_options(self):
        """
        Build a dictionnary of options for each application.

        By option we means dump options given to dumpdata.

        Returns:
            list: List of dictionnaries, each dictionnary is a payload of application
                options.
        """
        return [app.as_options() for app in self.apps]



[docs]
    def payload(self):
        """
        Build a dictionnary of configuration payload for each application.

        Returns:
            list: List of dictionnaries, each dictionnary is a payload of application
                definition parameters.
        """
        return [app.as_config() for app in self.apps]



[docs]
    def build_commands(self, destination=None, indent=None):
        """
        Build dumpdata command line for each application.

        Keyword Arguments:
            destination (string or Path): Destination file where to write dump if
                given. The file will be created by the dump command when executed, not
                during this method.
            indent (integer): Indentation level for dump data.

        Returns:
            list: List of tuples for processed applications, each tuple contains
                firstly application name then the built dump command.
        """
        return [
            (
                app.name,
                self.command(app, destination=destination, indent=indent)
            )
            for app in self.apps
        ]



[docs]
    def dump_data(self, destination=None, indent=None, check=False):
        """
        Call dumpdata command to dump each application data.

        Keyword Arguments:
            destination (string or Path): Destination file where to write dump if
                given. The file will be created by the dump command when executed, not
                during this method.
            indent (integer): Indentation level for dump data.
            check (boolean): Perform operations writhout writing or querying anything.

        Returns:
            list: List of tuples for processed applications, each tuple contains
                firstly application name then the command output.
        """
        return [
            (
                app.name,
                self.call(app, destination=destination, indent=indent, check=check)
            )
            for app in self.apps
        ]



[docs]
    def validate_applications(self):
        """
        Call validators from all enabled application model objects.

        Raises:
            ApplicationRegistryError: An error with all possible collected errors if
            there is any.
        """
        errors = {}

        # Collect all model errors
        for app in self.apps:
            try:
                app.validate()
            except ApplicationConfigError as e:
                errors[app.name] = str(e)

        # Raise a single exception containing all collected errors
        if errors:
            raise ApplicationRegistryError(error_messages=errors)



[docs]
    def format_archive_filename(self, filename, with_data=False, with_storages=False):
        """
        Format archive filename depending features.

        Keyword Arguments:
            filename (string): Filename to use instead. It must end with ``.tar.gz``.
                Filename format may be like ``diskette{features}_{date}.tar.gz`` where
                ``features`` pattern can include either ``_data``, ``_storages`` or both
                depending enabled dump kinds, and ``date`` pattern would be a datetime
                string like ``2025-02-03T175309``.
            with_data (boolean): Enable dump of application datas.
            with_storages (boolean): Enable dump of media storages.

        Returns:
            string: Formatted filename with features.
        """
        filename_features = ""
        if with_data is True:
            filename_features += "_data"
        if with_storages is True:
            filename_features += "_storages"

        return filename.format(
            features=filename_features,
            date=self.now.isoformat(timespec="seconds").replace(":", ""),
        )



[docs]
    def build_dump_manifest(self, destination, data_path, with_data=True,
                            with_storages=True):
        """
        Build dump JSON manifest.

        Example of built file (real build is not indented): ::

            {
                "version": "0.0.0",
                "creation": "2024-01-01T12:12:12",
                "datas": [
                    "data/djangocontribsites.json",
                    "data/djangocontribauth.json"
                ],
                "storages": [
                    "var/media"
                ]
            }

        .. Note::
            Involves relative path resolving so it implies that storage paths are
            proper children of given destination path (that is removed from lead of
            storage paths).

            So storage paths must all start with a starting portion of value from
            Dumper attribute ``storages_basepath``.

            As an example ``/foo/bar/storage`` would not be in a 'storages_basepath'
            ``/ping/`` (so it is invalid) but would be (and valid) in ``/foo`` or
            ``/foo/bar``.

        .. Note::
            Manifest preserve order of registered applications when writing data dump
            list so it safe for loading them.

        Arguments:
            destination (Path): Destination file where to write manifest.

        Keyword Arguments:
            with_data (boolean): Enable dump of application datas.
            with_storages (boolean): Enable dump of media storages.

        Returns:
            Path: Path to the written manifest file.
        """
        manifest_path = destination / self.MANIFEST_FILENAME
        data = {
            "version": self.get_diskette_version(),
            "creation": self.now.isoformat(timespec="seconds"),
            "datas": None,
            "storages": None,
        }

        # Build a list of expected data dump filenames from registered applications
        if with_data is True:
            data["datas"] = [
                str((data_path / app.filename).relative_to(destination))
                for app in self.apps
            ]

        # Build a list of expected storage dump directories from registered storages
        if with_storages is True:
            data["storages"] = [
                str(storage.relative_to(self.storages_basepath))
                for storage in self.storages
            ]

        # Write built manifest into destination path
        manifest_path.write_text(json.dumps(data))

        return manifest_path



[docs]
    def validate(self):
        """
        Call all validators
        """
        self.validate_applications()
        self.validate_storages()



[docs]
    def make_archive(self, destination, filename, with_data=True, with_storages=True,
                     with_storages_excludes=True, destination_chmod=None):
        """
        Dump data and storages then archive everything in an archive.

        .. Note::
            Arguments 'with_data' and 'with_storages' can not be both disabled, at
            least one must be enabled else it is assumed as an error.

        Arguments:
            destination (Path): Directory where to write archive file.

        Keyword Arguments:
            filename (string): Custom archive filename to use instead of the default
                one. Your custom filename must end with ``.tar.gz``. Default filename
                is ``diskette[_data][_storages].tar.gz`` (parts depend from options).
            with_data (boolean): Enable dump of application datas.
            with_storages (boolean): Enable dump of media storages.
            with_storages_excludes (boolean): Enable usage of excluding patterns when
                collecting storages files.
            destination_chmod (integer): File permission to apply on archive files and
                also on destination directory if it did not exists. Value must be in
                an octal notation, default is ``0o755``.

        Returns:
            Path: Path to the written archive file.
        """
        destination_chmod = (
            destination_chmod or settings.DISKETTE_DUMP_PERMISSIONS or 0o755
        )

        if not with_data and not with_storages:
            raise DumperError(
                "Arguments 'with_data' and 'with_storages' can not be both 'False'"
            )

        # Temporary directory where the manager will work
        destination_tmpdir = Path(tempfile.mkdtemp(prefix=self.TEMPDIR_PREFIX))

        # Build data dump destination path
        data_tmpdir = destination_tmpdir / "data"
        data_tmpdir.mkdir()

        # Dump data into temp directory
        if with_data is True:
            self.dump_data(destination=data_tmpdir, indent=self.indent)

        # Compute history/stats file
        manifest_path = self.build_dump_manifest(
            destination_tmpdir,
            data_tmpdir,
            with_data=with_data,
            with_storages=with_storages
        )

        # Build dump archive paths
        archive_filename = self.format_archive_filename(
            filename,
            with_data=with_data,
            with_storages=with_storages
        )
        archive_path = destination_tmpdir / archive_filename
        archive_destination = destination / archive_filename

        # Then add everything to the archive
        try:
            with tarfile.open(archive_path, "w:gz") as tar:
                # Add data dumps dir
                if with_data is True:
                    self.logger.info("Appending data to the archive")
                    tar.add(data_tmpdir, arcname="data")
                    # Clear space from data dumps
                    shutil.rmtree(data_tmpdir)

                # Append collected storages files
                if with_storages is True:
                    self.logger.info("Appending storages to the archive")
                    for path, arcname in self.iter_storages_files(
                        allow_excludes=with_storages_excludes
                    ):
                        self.logger.debug("- {name} ({size})".format(
                            name=arcname,
                            size=filesizeformat(self.get_file_size(path)),
                        ))
                        tar.add(path, arcname=arcname)

                # Append dump manifest
                tar.add(manifest_path, arcname=self.MANIFEST_FILENAME)

            # Create destination directory with the right permission if needed
            if not destination.exists():
                destination.mkdir(
                    mode=destination_chmod,
                    parents=True,
                    exist_ok=True
                )

            # Use shutil instead of Path.rename since the latter does not work well
            # with different devices
            shutil.move(archive_path, archive_destination)
            archive_destination.chmod(destination_chmod)

        finally:
            # Always remove temporary working directory
            if destination_tmpdir.exists():
                shutil.rmtree(destination_tmpdir)

        return archive_destination



[docs]
    def make_script(self, destination, with_data=True, with_storages=True,
                    with_storages_excludes=True):
        """
        Create shellscript command lines to dump data.

        Arguments:
            destination (Path): Directory where to write archive file.

        Keyword Arguments:
            with_data (boolean): Enable dump of application datas.
            with_storages (boolean): Enable dump of media storages.
            with_storages_excludes (boolean): Enable usage of excluding patterns when
                collecting storages files.

        Returns:
            string: All commands to dump data, each command on its line with a previous
                comment line with the dump name.
        """
        if not with_data and not with_storages:
            raise DumperError(
                "Arguments 'with_data' and 'with_storages' can not be both 'False'"
            )

        commandlines = []

        # Dump data into temp directory
        if with_data is True:
            commandlines += [
                "# {}\n{}".format(name, cmd)
                for name, cmd in self.build_commands(
                    destination=destination,
                    indent=self.indent
                )
            ]

        return "\n".join(commandlines)



[docs]
    def check(self, destination, filename, with_data=True, with_storages=True,
              with_storages_excludes=True):
        """
        Check what would be done.

        Arguments:
            destination (Path): Directory where to write archive file.

        Keyword Arguments:
            filename (string): Custom archive filename to use instead of the default
                one. Your custom filename must end with ``.tar.gz``. Default filename
                is ``diskette[_data][_storages].tar.gz`` (parts depend from options).
            with_data (boolean): Enable dump of application datas.
            with_storages (boolean): Enable dump of media storages.
            with_storages_excludes (boolean): Enable usage of excluding patterns when
                collecting storages files.

        Returns:
            Path: Path to the written archive file.
        """
        if not with_data and not with_storages:
            raise DumperError(
                "Arguments 'with_data' and 'with_storages' can not be both 'False'"
            )

        # Dump data into temp directory
        if with_data is True:
            self.dump_data(destination=destination, indent=self.indent, check=True)

        if with_storages is True:
            self.logger.info("- Scanning storages to archive")
            files_length = 0
            files_total = 0
            for path, arcname in self.iter_storages_files(
                allow_excludes=with_storages_excludes
            ):
                files_length += 1
                size = self.get_file_size(path)
                files_total += size
                self.logger.debug("- {name} ({size})".format(
                    name=arcname,
                    size=filesizeformat(size),
                ))

            if not files_length:
                self.logger.warning("  - No file has been found in any storage")
            else:
                msg = "- {length} file(s) would be collected for a total of {size}"
                self.logger.info(
                    msg.format(
                        length=files_length,
                        size=filesizeformat(files_total),
                    )
                )

            archive_filename = self.format_archive_filename(
                filename,
                with_data=with_data,
                with_storages=with_storages
            )

            return destination / archive_filename