diff --git a/.gitignore b/.gitignore index 66e519a..8e30f48 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,3 @@ target .venv docker-squash.iml **/image.tar -**/tox.tar diff --git a/docker_squash/image.py b/docker_squash/image.py index 3039d50..53f29c7 100644 --- a/docker_squash/image.py +++ b/docker_squash/image.py @@ -10,7 +10,7 @@ import tarfile import tempfile import threading -from typing import List, Optional, Union +from typing import Iterable, List, Optional, Set, Union import docker as docker_library @@ -283,22 +283,23 @@ def load_squashed_image(self): % (self.image_name, self.image_tag) ) - def _files_in_layers(self, layers): + def _files_in_layers(self, layers: List[str]) -> Set[str]: """ Prepare a list of files in all layers """ - files = {} + files = set() for layer in layers: self.log.debug("Generating list of files in layer '%s'..." % layer) tar_file = self._extract_tar_name(layer) with tarfile.open(tar_file, "r", format=tarfile.PAX_FORMAT) as tar: - files[layer] = [self._normalize_path(x) for x in tar.getnames()] - self.log.debug("Done, found %s files" % len(files[layer])) + layer_files = [self._normalize_path(x) for x in tar.getnames()] + files.update(layer_files) + self.log.debug("Done, found %s files" % len(layer_files)) return files - def _prepare_tmp_directory(self, tmp_dir: str) -> str: + def _prepare_tmp_directory(self, tmp_dir: Optional[str]) -> str: """Creates temporary directory that is used to work on layers""" if tmp_dir: @@ -513,21 +514,26 @@ def _move_layers(self, layers, src: str, dest: str): self.log.debug("Moving unmodified layer '%s'..." % layer_id) shutil.move(os.path.join(src, layer_id), dest) - def _file_should_be_skipped(self, file_name, file_paths): - # file_paths is now array of array with files to be skipped. - # First level are layers, second are files in these layers. - layer_nb = 1 - - for layers in file_paths: - for file_path in layers: - if file_name == file_path or file_name.startswith(file_path + "/"): - return layer_nb + def _file_should_be_skipped( + self, file_name: str, files_to_skip: Set[str], directories_to_skip: Set[str] + ) -> bool: + if file_name in files_to_skip: + self.log.debug( + "Skipping file '%s' because it is marked to be skipped" % file_name + ) + return True - layer_nb += 1 + for parent in self._path_hierarchy(file_name): + if parent in files_to_skip or parent in directories_to_skip: + self.log.debug( + "Skipping file '%s' because its parent directory '%s' is marked to be skipped" + % (file_name, parent) + ) + return True - return 0 + return False - def _marker_files(self, tar, members): + def _marker_files(self, members: List[tarfile.TarInfo]) -> List[tarfile.TarInfo]: """ Searches for marker files in the specified archive. @@ -535,217 +541,22 @@ def _marker_files(self, tar, members): These files mark the corresponding file to be removed (hidden) when we start a container from the image. """ - marker_files = {} + marker_files = [] - self.log.debug("Searching for marker files in '%s' archive..." % tar.name) + self.log.debug("Searching for marker files") for member in members: if ".wh." in member.name: self.log.debug("Found '%s' marker file" % member.name) - marker_files[member] = tar.extractfile(member) + marker_files.append(member) - self.log.debug("Done, found %s files" % len(marker_files)) + self.log.debug("Found %s marker files" % len(marker_files)) return marker_files - def _add_markers(self, markers, tar, files_in_layers, added_symlinks): - """ - This method is responsible for adding back all markers that were not - added to the squashed layer AND files they refer to can be found in layers - we do not squash. - """ - - if markers: - self.log.debug("Marker files to add: %s" % [o.name for o in markers.keys()]) - else: - # No marker files to add - return - - # https://github.com/goldmann/docker-squash/issues/108 - # Some tar archives do have the filenames prefixed with './' - # which does not have any effect when we unpack the tar achive, - # but when processing tar content - we see this. - tar_files = [self._normalize_path(x) for x in tar.getnames()] - - for marker, marker_file in markers.items(): - actual_file = marker.name.replace(".wh.", "") - normalized_file = self._normalize_path(actual_file) - - should_be_added_back = False - - if self._file_should_be_skipped(normalized_file, added_symlinks): - self.log.debug( - "Skipping '%s' marker file, this file is on a symlink path" - % normalized_file - ) - continue - - if normalized_file in tar_files: - self.log.debug( - "Skipping '%s' marker file, this file was added earlier for some reason..." - % normalized_file - ) - continue - - if files_in_layers: - for files in files_in_layers.values(): - if normalized_file in files: - should_be_added_back = True - break - else: - # There are no previous layers, so we need to add it back - # In fact this shouldn't happen since having a marker file - # where there is no previous layer does not make sense. - should_be_added_back = True - - if should_be_added_back: - self.log.debug("Adding '%s' marker file back..." % marker.name) - # Marker files on AUFS are hardlinks, we need to create - # regular files, therefore we need to recreate the tarinfo - # object - tar.addfile(tarfile.TarInfo(name=marker.name), marker_file) - # Add the file name to the list too to avoid re-reading all files - # in tar archive - tar_files.append(normalized_file) - else: - self.log.debug("Skipping '%s' marker file..." % marker.name) - - def _normalize_path( - self, path: Union[str, pathlib.Path] - ) -> Union[str, pathlib.Path]: + def _normalize_path(self, path: str) -> str: return os.path.normpath(os.path.join("/", path)) - def _add_hardlinks(self, squashed_tar, squashed_files, to_skip, skipped_hard_links): - for layer, hardlinks_in_layer in enumerate(skipped_hard_links): - # We need to start from 1, that's why we bump it here - current_layer = layer + 1 - for member in hardlinks_in_layer.values(): - normalized_name = self._normalize_path(member.name) - normalized_linkname = self._normalize_path(member.linkname) - - # Find out if the name is on the list of files to skip - if it is - get the layer number - # where it was found - layer_skip_name = self._file_should_be_skipped(normalized_name, to_skip) - # Do the same for linkname - layer_skip_linkname = self._file_should_be_skipped( - normalized_linkname, to_skip - ) - - # We need to check if we should skip adding back the hard link - # This can happen in the following situations: - # 1. hard link is on the list of files to skip - # 2. hard link target is on the list of files to skip - # 3. hard link is already in squashed files - # 4. hard link target is NOT in already squashed files - if ( - layer_skip_name - and current_layer > layer_skip_name - or layer_skip_linkname - and current_layer > layer_skip_linkname - or normalized_name in squashed_files - or normalized_linkname not in squashed_files - ): - self.log.debug( - "Found a hard link '%s' to a file which is marked to be skipped: '%s', skipping link too" - % (normalized_name, normalized_linkname) - ) - else: - if self.debug: - self.log.debug( - "Adding hard link '%s' pointing to '%s' back..." - % (normalized_name, normalized_linkname) - ) - - squashed_files.append(normalized_name) - squashed_tar.addfile(member) - - def _add_file(self, member, content, squashed_tar, squashed_files, to_skip): - normalized_name = self._normalize_path(member.name) - - if normalized_name in squashed_files: - self.log.debug( - "Skipping file '%s' because it is already squashed" % normalized_name - ) - return - - if self._file_should_be_skipped(normalized_name, to_skip): - self.log.debug( - "Skipping '%s' file because it's on the list to skip files" - % normalized_name - ) - return - - if content: - squashed_tar.addfile(member, content) - else: - # Special case: other(?) files, we skip the file - # itself - squashed_tar.addfile(member) - - # We added a file to the squashed tar, so let's note it - squashed_files.append(normalized_name) - - def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links): - added_symlinks = [] - for layer, symlinks_in_layer in enumerate(skipped_sym_links): - # We need to start from 1, that's why we bump it here - current_layer = layer + 1 - for member in symlinks_in_layer.values(): - # Handling symlinks. This is similar to hard links with one - # difference. Sometimes we do want to have broken symlinks - # be added because these can point to locations - # that will become available after adding volumes for example. - normalized_name = self._normalize_path(member.name) - normalized_linkname = self._normalize_path(member.linkname) - - # File is already in squashed files, skipping - if normalized_name in squashed_files: - self.log.debug( - "Found a symbolic link '%s' which is already squashed, skipping" - % (normalized_name) - ) - continue - - if self._file_should_be_skipped(normalized_name, added_symlinks): - self.log.debug( - "Found a symbolic link '%s' which is on a path to previously squashed symlink, skipping" - % (normalized_name) - ) - continue - # Find out if the name is on the list of files to skip - if it is - get the layer number - # where it was found - layer_skip_name = self._file_should_be_skipped(normalized_name, to_skip) - # Do the same for linkname - layer_skip_linkname = self._file_should_be_skipped( - normalized_linkname, to_skip - ) - - # If name or linkname was found in the lists of files to be - # skipped or it's not found in the squashed files - if ( - layer_skip_name - and current_layer > layer_skip_name - or layer_skip_linkname - and current_layer > layer_skip_linkname - ): - self.log.debug( - "Found a symbolic link '%s' to a file which is marked to be skipped: '%s', skipping link too" - % (normalized_name, normalized_linkname) - ) - else: - if self.debug: - self.log.debug( - "Adding symbolic link '%s' pointing to '%s' back..." - % (normalized_name, normalized_linkname) - ) - - added_symlinks.append([normalized_name]) - - squashed_files.append(normalized_name) - squashed_tar.addfile(member) - - return added_symlinks - def _squash_layers(self, layers_to_squash: List[str], layers_to_move: List[str]): self.log.info(f"Starting squashing for {self.squashed_tar}...") @@ -754,21 +565,14 @@ def _squash_layers(self, layers_to_squash: List[str], layers_to_move: List[str]) layers_to_squash.reverse() # Find all files in layers that we don't squash - files_in_layers_to_move = self._files_in_layers(layers_to_move) + files_in_layers_to_move: Set[str] = self._files_in_layers(layers_to_move) with tarfile.open( self.squashed_tar, "w", format=tarfile.PAX_FORMAT ) as squashed_tar: - to_skip = [] - skipped_markers = {} - skipped_hard_links = [] - skipped_sym_links = [] - skipped_files = [] - # List of filenames in the squashed archive - squashed_files = [] - # List of opaque directories in the image - opaque_dirs = [] - reading_layers: List[tarfile.TarFile] = [] + files_to_skip: Set[str] = set() + squashed_files: Set[str] = set() + directories_to_skip: Set[str] = set() for layer_id in layers_to_squash: layer_tar_file = self._extract_tar_name(layer_id) @@ -778,90 +582,43 @@ def _squash_layers(self, layers_to_squash: List[str], layers_to_move: List[str]) layer_tar: tarfile.TarFile = tarfile.open( layer_tar_file, "r", format=tarfile.PAX_FORMAT ) - reading_layers.append(layer_tar) - # Find all marker files for all layers - # We need the list of marker files upfront, so we can - # skip unnecessary files - members = layer_tar.getmembers() - markers = self._marker_files(layer_tar, members) + members: List[tarfile.TarInfo] = layer_tar.getmembers() + markers: List[tarfile.TarInfo] = self._marker_files(members) - skipped_sym_link_files = {} - skipped_hard_link_files = {} - skipped_files_in_layer = {} - - files_to_skip = [] - # List of opaque directories found in this layer - layer_opaque_dirs = [] - - # Add it as early as possible, we will be populating - # 'skipped_sym_link_files' array later - skipped_sym_links.append(skipped_sym_link_files) - - # Add it as early as possible, we will be populating - # 'files_to_skip' array later - to_skip.append(files_to_skip) + # List of opaque directories found in this layer. + # We will add it to 'directories_to_skip' at the end of processing the layer + opaque_dirs: List[str] = [] # Iterate over marker files found for this particular # layer and if a file in the squashed layers file corresponding # to the marker file is found, then skip both files - for marker, marker_file in markers.items(): - # We have a opaque directory marker file + for marker in markers: + normalized_name = self._normalize_path(marker.name) + # We have an opaque directory marker file # https://github.com/opencontainers/image-spec/blob/master/layer.md#opaque-whiteout - if marker.name.endswith(".wh..wh..opq"): - opaque_dir = os.path.dirname(marker.name) - + if normalized_name.endswith(".wh..wh..opq"): + opaque_dir = os.path.dirname(normalized_name) self.log.debug("Found opaque directory: '%s'" % opaque_dir) - - layer_opaque_dirs.append(opaque_dir) + opaque_dirs.append(opaque_dir) else: - files_to_skip.append( - self._normalize_path(marker.name.replace(".wh.", "")) - ) - skipped_markers[marker] = marker_file + actual_file = normalized_name.replace(".wh.", "") + files_to_skip.add(actual_file) + if ( + actual_file in squashed_files + or actual_file not in files_in_layers_to_move + ): + self.log.debug( + "Skipping marker file '%s'" % normalized_name + ) + files_to_skip.add(normalized_name) # Copy all the files to the new tar for member in members: normalized_name = self._normalize_path(member.name) - if self._is_in_opaque_dir(member, opaque_dirs): - self.log.debug( - "Skipping file '%s' because it is in an opaque directory" - % normalized_name - ) - continue - - # Skip all symlinks, we'll investigate them later - if member.issym(): - skipped_sym_link_files[normalized_name] = member - continue - - if member in skipped_markers.keys(): - self.log.debug( - "Skipping '%s' marker file, at the end of squashing we'll see if it's necessary to add it back" - % normalized_name - ) - continue - - if self._file_should_be_skipped(normalized_name, skipped_sym_links): - self.log.debug( - "Skipping '%s' file because it's on a symlink path, at the end of squashing we'll see if it's necessary to add it back" - % normalized_name - ) - - if member.isfile(): - f = (member, layer_tar.extractfile(member)) - else: - f = (member, None) - - skipped_files_in_layer[normalized_name] = f - continue - - # Skip files that are marked to be skipped - if self._file_should_be_skipped(normalized_name, to_skip): - self.log.debug( - "Skipping '%s' file because it's on the list to skip files" - % normalized_name - ) + if self._file_should_be_skipped( + normalized_name, files_to_skip, directories_to_skip + ): continue # Check if file is already added to the archive @@ -871,137 +628,37 @@ def _squash_layers(self, layers_to_squash: List[str], layers_to_move: List[str]) # This is true because we do reverse squashing - from # newer to older layer self.log.debug( - "Skipping '%s' file because it's older than file already added to the archive" + "Skipping file '%s' because it is older than file already added to the archive" % normalized_name ) continue - # Hard links are processed after everything else - if member.islnk(): - skipped_hard_link_files[normalized_name] = member - continue + if not member.isdir(): + # https://github.com/goldmann/docker-squash/issues/253 + directories_to_skip.add(normalized_name) content = None if member.isfile(): content = layer_tar.extractfile(member) - self._add_file( - member, content, squashed_tar, squashed_files, to_skip - ) - - skipped_hard_links.append(skipped_hard_link_files) - skipped_files.append(skipped_files_in_layer) - opaque_dirs += layer_opaque_dirs - - self._add_hardlinks( - squashed_tar, squashed_files, to_skip, skipped_hard_links - ) - added_symlinks = self._add_symlinks( - squashed_tar, squashed_files, to_skip, skipped_sym_links - ) - - for layer in skipped_files: - for member, content in layer.values(): - self._add_file( - member, content, squashed_tar, squashed_files, added_symlinks - ) + # We convert hardlinks to regular files to avoid issues with deleting + # link's target file + if member.islnk(): + target = layer_tar.getmember(member.linkname) + content = layer_tar.extractfile(target) + member.type = target.type + member.size = target.size - if files_in_layers_to_move: - self._reduce(skipped_markers) + squashed_tar.addfile(member, content) + squashed_files.add(normalized_name) - self._add_markers( - skipped_markers, - squashed_tar, - files_in_layers_to_move, - added_symlinks, - ) + directories_to_skip.update(opaque_dirs) + layer_tar.close() - tar: tarfile.TarFile - for tar in reading_layers: - tar.close() self.log.info("Squashing finished!") - def _is_in_opaque_dir(self, member, dirs): - """ - If the member we investigate is an opaque directory - or if the member is located inside of the opaque directory, - we copy these files as-is. Any other layer that has content - on the opaque directory will be ignored! - """ - - for opaque_dir in dirs: - if member.name == opaque_dir or member.name.startswith("%s/" % opaque_dir): - self.log.debug( - "Member '%s' found to be part of opaque directory '%s'" - % (member.name, opaque_dir) - ) - return True - - return False - - def _reduce(self, markers): - """ - This function is responsible for reducing marker files - that are scheduled to be added at the end of squashing to - minimum. - - In some cases, one marker file will overlap - with others making others not necessary. - - This is not only about adding less marker files, but - if we try to add a marker file for a file or directory - deeper in the hierarchy of already marked directory, - the image will not be successfully loaded back into Docker - daemon. - - Passed dictionary containing markers is altered *in-place*. - - Args: - markers (dict): Dictionary of markers scheduled to be added. - """ - - self.log.debug("Reducing marker files to be added back...") - - # Prepare a list of files (or directories) based on the marker - # files scheduled to be added - marked_files = list( - map( - lambda x: self._normalize_path(x.name.replace(".wh.", "")), - markers.keys(), - ) - ) - - # List of markers that should be not added back to tar file - to_remove = [] - - for marker in markers.keys(): - self.log.debug("Investigating '{}' marker file".format(marker.name)) - - path = self._normalize_path(marker.name.replace(".wh.", "")) - # Iterate over the path hierarchy, but starting with the - # root directory. This will make it possible to remove - # marker files based on the highest possible directory level - for directory in self._path_hierarchy(path): - if directory in marked_files: - self.log.debug( - "Marker file '{}' is superseded by higher-level marker file: '{}'".format( - marker.name, directory - ) - ) - to_remove.append(marker) - break - - self.log.debug("Removing {} marker files".format(len(to_remove))) - - if to_remove: - for marker in to_remove: - self.log.debug("Removing '{}' marker file".format(marker.name)) - markers.pop(marker) - - self.log.debug("Marker files reduced") - - def _path_hierarchy(self, path): + def _path_hierarchy(self, path: Union[str, pathlib.PurePath]) -> Iterable[str]: """ Creates a full hierarchy of directories for a given path. diff --git a/tests/test_integ_squash.py b/tests/test_integ_squash.py index 930b581..c500269 100644 --- a/tests/test_integ_squash.py +++ b/tests/test_integ_squash.py @@ -297,6 +297,14 @@ def assertFileDoesNotExist(self, name): tar.getnames(), ) + def assertContentEquals(self, name, expected): + self.content.seek(0) # Rewind + with tarfile.open(fileobj=self.content, mode="r") as tar: + content = tar.extractfile(name) + assert content.read() == expected, ( + "File %s has different content than expected" % name + ) + class TestIntegSquash(IntegSquash): @pytest.fixture(autouse=True) @@ -328,7 +336,7 @@ def test_all_files_should_be_in_squashed_layer(self): FROM %s RUN touch /somefile_layer1 RUN touch /somefile_layer2 - RUN touch /somefile_layer3 + RUN echo text > /somefile_layer3 """ % TestIntegSquash.BUSYBOX_IMAGE ) @@ -346,6 +354,7 @@ def test_all_files_should_be_in_squashed_layer(self): container.assertFileExists("somefile_layer1") container.assertFileExists("somefile_layer2") container.assertFileExists("somefile_layer3") + container.assertContentEquals("somefile_layer3", b"text\n") # We should have two layers less in the image self.assertTrue(len(squashed_image.layers) == len(image.layers) - 2) @@ -415,6 +424,27 @@ def test_there_should_be_a_marker_file_in_the_squashed_layer(self): # We should have one layer less in the image self.assertEqual(len(squashed_image.layers), len(image.layers) - 1) + def test_there_should_not_be_a_marker_file(self): + dockerfile = ( + """ + FROM %s + RUN touch /somefile_layer1 + RUN rm /somefile_layer1 + RUN touch /somefile_layer3 + """ + % TestIntegSquash.BUSYBOX_IMAGE + ) + + with self.Image(dockerfile) as image: + with self.SquashedImage(image, 3) as squashed_image: + squashed_image.assertFileDoesNotExist("somefile_layer1") + squashed_image.assertFileExists("somefile_layer3") + squashed_image.assertFileDoesNotExist(".wh.somefile_layer1") + + with self.Container(squashed_image) as container: + container.assertFileExists("somefile_layer3") + container.assertFileDoesNotExist("somefile_layer1") + def test_there_should_be_a_marker_file_in_the_squashed_layer_even_more_complex( self, ): @@ -852,15 +882,35 @@ def test_should_not_fail_with_hard_links(self): dockerfile = ( """ FROM %s - RUN touch /file && ln file link + RUN echo text > /file && ln file link RUN rm file """ % TestIntegSquash.BUSYBOX_IMAGE ) with self.Image(dockerfile) as image: - with self.SquashedImage(image, None): - pass + with self.SquashedImage(image, None) as squashed_image: + with self.Container(squashed_image) as container: + container.assertFileExists("link") + container.assertContentEquals("link", b"text\n") + + def test_should_handle_multiple_hard_links(self): + dockerfile = ( + """ + FROM %s + RUN echo text > /file && ln file link1 && ln file link2 + RUN rm file + """ + % TestIntegSquash.BUSYBOX_IMAGE + ) + + with self.Image(dockerfile) as image: + with self.SquashedImage(image, 2) as squashed_image: + with self.Container(squashed_image) as container: + container.assertFileExists("link1") + container.assertFileExists("link2") + container.assertContentEquals("link1", b"text\n") + container.assertContentEquals("link2", b"text\n") # https://github.com/goldmann/docker-squash/issues/99 # TODO: try not to use centos:6.6 image - this slows down testsuite @@ -1111,6 +1161,58 @@ def test_should_handle_symlinks_to_directory(self): container.assertFileExists("tmp/dir") container.assertFileDoesNotExist("tmp/dir/file") + def test_should_handle_replacing_directory(self): + dockerfile = ( + """ + FROM %s + RUN mkdir /tmp/dir + RUN touch /tmp/dir/file + RUN rm -rf /tmp/dir ; touch /tmp/dir + """ + % TestIntegSquash.BUSYBOX_IMAGE + ) + + with self.Image(dockerfile) as image: + with self.SquashedImage(image, 3, numeric=True) as squashed_image: + with self.Container(squashed_image) as container: + container.assertFileExists("tmp/dir") + container.assertFileDoesNotExist("tmp/dir/file") + + def test_should_handle_replacing_directory_containing_markers(self): + dockerfile = ( + """ + FROM %s + RUN mkdir /tmp/dir + RUN touch /tmp/dir/file + RUN rm /tmp/dir/file + RUN rm -rf /tmp/dir ; touch /tmp/dir + """ + % TestIntegSquash.BUSYBOX_IMAGE + ) + + with self.Image(dockerfile) as image: + with self.SquashedImage(image, 2, numeric=True) as squashed_image: + squashed_image.assertFileDoesNotExist("tmp/dir/.wh.file") + with self.Container(squashed_image) as container: + container.assertFileExists("tmp/dir") + container.assertFileDoesNotExist("tmp/dir/file") + + def test_should_handle_replacing_link_target(self): + dockerfile = ( + """ + FROM %s + RUN touch file ; ln file link + RUN rm file ; mkdir file + """ + % TestIntegSquash.BUSYBOX_IMAGE + ) + + with self.Image(dockerfile) as image: + with self.SquashedImage(image, 2, numeric=True) as squashed_image: + with self.Container(squashed_image) as container: + container.assertFileExists("file") + container.assertFileExists("link") + # https://github.com/goldmann/docker-squash/issues/122 def test_should_not_add_duplicate_files(self): dockerfile = """ @@ -1131,9 +1233,7 @@ def test_should_not_add_duplicate_files(self): ) with self.Image(dockerfile) as image: - with self.SquashedImage( - image, 6, numeric=True, output_path="tox.tar" - ) as squashed_image: + with self.SquashedImage(image, 6, numeric=True) as squashed_image: with self.Container(squashed_image) as container: container.assertFileExists( "data-template/etc/systemd/system/container-ipa.target.wants" diff --git a/tests/test_unit_v1_image.py b/tests/test_unit_v1_image.py index 465eba8..d67c6db 100644 --- a/tests/test_unit_v1_image.py +++ b/tests/test_unit_v1_image.py @@ -1,6 +1,5 @@ import builtins import pathlib -import tarfile import unittest import mock @@ -19,33 +18,39 @@ def setUp(self): def test_should_skip_exact_files(self): ret = self.squash._file_should_be_skipped( - "/opt/webserver/something", [["/opt/eap", "/opt/webserver/something"]] + "/opt/webserver/something", {"/opt/eap", "/opt/webserver/something"}, set() ) - self.assertEqual(ret, 1) + self.assertTrue(ret) def test_should_not_skip_file_not_in_path_to_skip(self): ret = self.squash._file_should_be_skipped( - "/opt/webserver/tmp", [["/opt/eap", "/opt/webserver/something"]] + "/opt/webserver/tmp", {"/opt/eap", "/opt/webserver/something"}, set() ) - self.assertEqual(ret, 0) + self.assertFalse(ret) def test_should_not_skip_the_file_that_name_is_similar_to_skipped_path(self): ret = self.squash._file_should_be_skipped( - "/opt/webserver/tmp1234", [["/opt/eap", "/opt/webserver/tmp"]] + "/opt/webserver/tmp1234", {"/opt/eap", "/opt/webserver/tmp"}, set() ) - self.assertEqual(ret, 0) + self.assertFalse(ret) def test_should_skip_files_in_subdirectory(self): ret = self.squash._file_should_be_skipped( - "/opt/webserver/tmp/abc", [["/opt/eap", "/opt/webserver/tmp"]] + "/opt/webserver/tmp/abc", {"/opt/eap", "/opt/webserver/tmp"}, set() ) - self.assertEqual(ret, 1) + self.assertTrue(ret) - def test_should_skip_files_in_other_layer(self): + def test_should_skip_files_in_directory(self): ret = self.squash._file_should_be_skipped( - "/opt/webserver/tmp/abc", [["a"], ["b"], ["/opt/eap", "/opt/webserver/tmp"]] + "/opt/webserver/tmp/abc", {"/opt/eap"}, {"/opt/webserver/tmp"} ) - self.assertEqual(ret, 3) + self.assertTrue(ret) + + def test_should_not_skip_directory(self): + ret = self.squash._file_should_be_skipped( + "/opt/webserver/tmp/abc", {"/opt/eap"}, {"/opt/webserver/tmp/abc"} + ) + self.assertFalse(ret) class TestParseImageName(unittest.TestCase): @@ -205,16 +210,14 @@ def test_should_find_all_marker_files(self): for path in ["/opt/eap", "/opt/eap/one", "/opt/eap/.wh.to_skip"]: files.append(self._tar_member(path)) - tar = mock.Mock() - markers = self.squash._marker_files(tar, files) + markers = self.squash._marker_files(files) - self.assertTrue(len(markers) == 1) - self.assertTrue(list(markers)[0].name == "/opt/eap/.wh.to_skip") + self.assertEqual(len(markers), 1) + self.assertEqual(markers[0].name, "/opt/eap/.wh.to_skip") def test_should_return_empty_dict_when_no_files_are_in_the_tar(self): - tar = mock.Mock() - markers = self.squash._marker_files(tar, []) - self.assertTrue(markers == {}) + markers = self.squash._marker_files([]) + self.assertEqual(len(markers), 0) def test_should_return_empty_dict_when_no_marker_files_are_found(self): files = [] @@ -222,168 +225,9 @@ def test_should_return_empty_dict_when_no_marker_files_are_found(self): for path in ["/opt/eap", "/opt/eap/one"]: files.append(self._tar_member(path)) - tar = mock.Mock() - markers = self.squash._marker_files(tar, files) - - self.assertTrue(len(markers) == 0) - self.assertTrue(markers == {}) - - -class TestAddMarkers(unittest.TestCase): - def setUp(self): - self.docker_client = mock.Mock() - self.log = mock.Mock() - self.image = "whatever" - self.squash = Image(self.log, self.docker_client, self.image, None) - - def test_should_not_fail_with_empty_list_of_markers_to_add(self): - self.squash._add_markers({}, None, None, []) - - def test_should_add_all_marker_files_to_empty_tar(self): - tar = mock.Mock() - tar.getnames.return_value = [] - - marker_1 = mock.Mock() - type(marker_1).name = mock.PropertyMock(return_value=".wh.marker_1") - - markers = {marker_1: "file"} - self.squash._add_markers(markers, tar, {}, []) - - self.assertTrue(len(tar.addfile.mock_calls) == 1) - tar_info, marker_file = tar.addfile.call_args[0] - self.assertIsInstance(tar_info, tarfile.TarInfo) - self.assertTrue(marker_file == "file") - self.assertTrue(tar_info.isfile()) - - def test_should_add_all_marker_files_to_empty_tar_besides_what_should_be_skipped( - self, - ): - tar = mock.Mock() - tar.getnames.return_value = [] - - marker_1 = mock.Mock() - type(marker_1).name = mock.PropertyMock(return_value=".wh.marker_1") - marker_2 = mock.Mock() - type(marker_2).name = mock.PropertyMock(return_value=".wh.marker_2") - - markers = {marker_1: "file1", marker_2: "file2"} - self.squash._add_markers( - markers, tar, {"1234layerdid": ["/marker_1", "/marker_2"]}, [["/marker_1"]] - ) - - self.assertEqual(len(tar.addfile.mock_calls), 1) - tar_info, marker_file = tar.addfile.call_args[0] - self.assertIsInstance(tar_info, tarfile.TarInfo) - self.assertTrue(marker_file == "file2") - self.assertTrue(tar_info.isfile()) - - def test_should_skip_a_marker_file_if_file_is_in_unsquashed_layers(self): - tar = mock.Mock() - # List of files in the squashed tar - tar.getnames.return_value = ["marker_1"] - - marker_1 = mock.Mock() - type(marker_1).name = mock.PropertyMock(return_value=".wh.marker_1") - marker_2 = mock.Mock() - type(marker_2).name = mock.PropertyMock(return_value=".wh.marker_2") - # List of marker files to add back - markers = {marker_1: "marker_1", marker_2: "marker_2"} - # List of files in all layers to be moved - files_in_moved_layers = {"1234layerdid": ["/some/file", "/marker_2"]} - self.squash._add_markers(markers, tar, files_in_moved_layers, []) - - self.assertEqual(len(tar.addfile.mock_calls), 1) - tar_info, marker_file = tar.addfile.call_args[0] - self.assertIsInstance(tar_info, tarfile.TarInfo) - self.assertTrue(marker_file == "marker_2") - self.assertTrue(tar_info.isfile()) - - def test_should_not_add_any_marker_files(self): - tar = mock.Mock() - tar.getnames.return_value = ["marker_1", "marker_2"] - - marker_1 = mock.Mock() - type(marker_1).name = mock.PropertyMock(return_value=".wh.marker_1") - marker_2 = mock.Mock() - type(marker_2).name = mock.PropertyMock(return_value=".wh.marker_2") - - markers = {marker_1: "file1", marker_2: "file2"} - self.squash._add_markers( - markers, tar, {"1234layerdid": ["some/file", "marker_1", "marker_2"]}, [] - ) - - self.assertTrue(len(tar.addfile.mock_calls) == 0) - - # https://github.com/goldmann/docker-squash/issues/108 - def test_should_add_marker_file_when_tar_has_prefixed_entries(self): - tar = mock.Mock() - # Files already in tar - tar.getnames.return_value = ["./abc", "./def"] - - marker_1 = mock.Mock() - type(marker_1).name = mock.PropertyMock(return_value=".wh.some/file") - marker_2 = mock.Mock() - type(marker_2).name = mock.PropertyMock(return_value=".wh.file2") - - markers = {marker_1: "filecontent1", marker_2: "filecontent2"} - - # List of layers to move (and files in these layers), already normalized - self.squash._add_markers( - markers, tar, {"1234layerdid": ["/some/file", "/other/file", "/stuff"]}, [] - ) - - self.assertEqual(len(tar.addfile.mock_calls), 1) - tar_info, marker_file = tar.addfile.call_args[0] - self.assertIsInstance(tar_info, tarfile.TarInfo) - # We need to add the marker file because we need to - # override the already existing file - self.assertEqual(marker_file, "filecontent1") - self.assertTrue(tar_info.isfile()) - - -class TestReduceMarkers(unittest.TestCase): - def setUp(self): - self.docker_client = mock.Mock() - self.log = mock.Mock() - self.image = "whatever" - self.squash = Image(self.log, self.docker_client, self.image, None) - - def test_should_not_reduce_any_marker_files(self): - marker_1 = mock.Mock() - type(marker_1).name = mock.PropertyMock(return_value=".wh.some/file") - marker_2 = mock.Mock() - type(marker_2).name = mock.PropertyMock(return_value=".wh.file2") - - markers = {marker_1: "filecontent1", marker_2: "filecontent2"} - - self.squash._reduce(markers) - - assert len(markers) == 2 - assert markers[marker_1] == "filecontent1" - assert markers[marker_2] == "filecontent2" - - def test_should_reduce_marker_files(self): - marker_1 = mock.Mock() - type(marker_1).name = mock.PropertyMock(return_value="opt/.wh.testing") - marker_2 = mock.Mock() - type(marker_2).name = mock.PropertyMock( - return_value="opt/testing/something/.wh.file" - ) - marker_3 = mock.Mock() - type(marker_3).name = mock.PropertyMock( - return_value="opt/testing/something/.wh.other_file" - ) - - markers = { - marker_1: "filecontent1", - marker_2: "filecontent2", - marker_3: "filecontent3", - } - - self.squash._reduce(markers) + markers = self.squash._marker_files(files) - assert len(markers) == 1 - assert markers[marker_1] == "filecontent1" + self.assertEqual(len(markers), 0) class TestPathHierarchy(unittest.TestCase): diff --git a/tests/test_unit_v2_image.py b/tests/test_unit_v2_image.py index 5852280..d6a4d75 100644 --- a/tests/test_unit_v2_image.py +++ b/tests/test_unit_v2_image.py @@ -11,8 +11,7 @@ class TestReadingConfigFiles(unittest.TestCase): def setUp(self): self.docker_client = mock.Mock() self.log = mock.Mock() - self.image = "whatever" - self.image = V2Image(self.log, self.docker_client, self.image, None) + self.image = V2Image(self.log, self.docker_client, "whatever", None) def test_should_read_json_file(self): manifest_example = '[{"Config":"96bdd3be20fa51b22dc9aaf996b49d403a403adf96e35d7e8b98519267c21c21.json","RepoTags":["busybox-to-squash:squashed"],"Layers":["980a6c63f88351bea42851fc101e4e2f61b12e1bf70122aad1f25186a736a404/layer.tar","977b2156300ec11226ffc7f9382e2fe4ec10a9cdfe445e062542b430aa09d82d/layer.tar","8a646a2ab402ca2774063c602182ad22c09d4af236ed84bdddb6d1205309accf/layer.tar"]}]' @@ -47,8 +46,7 @@ class TestGeneratingMetadata(unittest.TestCase): def setUp(self): self.docker_client = mock.Mock() self.log = mock.Mock() - self.image = "whatever" - self.image = V2Image(self.log, self.docker_client, self.image, None) + self.image = V2Image(self.log, self.docker_client, "whatever", None) def test_generate_manifest(self): old_image_manifest = { @@ -238,8 +236,7 @@ class TestWritingMetadata(unittest.TestCase): def setUp(self): self.docker_client = mock.Mock() self.log = mock.Mock() - self.image = "whatever" - self.image = V2Image(self.log, self.docker_client, self.image, None) + self.image = V2Image(self.log, self.docker_client, "whatever", None) @mock.patch.object(V2Image, "_write_json_metadata") def test_write_image_metadata(self, mock_method):