aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'cvs2svn_lib/dumpfile_delegate.py')
-rw-r--r--cvs2svn_lib/dumpfile_delegate.py510
1 files changed, 510 insertions, 0 deletions
diff --git a/cvs2svn_lib/dumpfile_delegate.py b/cvs2svn_lib/dumpfile_delegate.py
new file mode 100644
index 0000000..092cfca
--- /dev/null
+++ b/cvs2svn_lib/dumpfile_delegate.py
@@ -0,0 +1,510 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import new as md5
+
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import path_split
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.cvs_file import CVSFile
+from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
+from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream
+
+
+# Things that can happen to a file.
+OP_ADD = 'add'
+OP_CHANGE = 'change'
+
+
+class DumpfileDelegate(SVNRepositoryDelegate):
+ """Create a Subversion dumpfile."""
+
+ def __init__(self, revision_reader, dumpfile_path):
+ """Return a new DumpfileDelegate instance, attached to a dumpfile
+ DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
+
+ self._revision_reader = revision_reader
+ self.dumpfile_path = dumpfile_path
+
+ self.dumpfile = open(self.dumpfile_path, 'wb')
+ self._write_dumpfile_header(self.dumpfile)
+
+ # A set of the basic project infrastructure project directories
+ # that have been created so far, as SVN paths. (The root
+ # directory is considered to be present at initialization.) This
+ # includes all of the LOD paths, and all of their parent
+ # directories etc.
+ self._basic_directories = set([''])
+
+ def _write_dumpfile_header(self, dumpfile):
+ # Initialize the dumpfile with the standard headers.
+ #
+ # Since the CVS repository doesn't have a UUID, and the Subversion
+ # repository will be created with one anyway, we don't specify a
+ # UUID in the dumpflie
+ dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
+
+ def _utf8_path(self, path):
+ """Return a copy of PATH encoded in UTF-8."""
+
+ # Convert each path component separately (as they may each use
+ # different encodings).
+ try:
+ return '/'.join([
+ Ctx().cvs_filename_decoder(piece).encode('utf8')
+ for piece in path.split('/')
+ ])
+ except UnicodeError:
+ raise FatalError(
+ "Unable to convert a path '%s' to internal encoding.\n"
+ "Consider rerunning with one or more '--encoding' parameters or\n"
+ "with '--fallback-encoding'."
+ % (path,))
+
+ def _string_for_prop(self, name, value):
+ """Return a property in the form needed for the dumpfile."""
+
+ return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
+
+ def start_commit(self, revnum, revprops):
+ """Emit the start of SVN_COMMIT (an SVNCommit)."""
+
+ self.revision = revnum
+
+ # The start of a new commit typically looks like this:
+ #
+ # Revision-number: 1
+ # Prop-content-length: 129
+ # Content-length: 129
+ #
+ # K 7
+ # svn:log
+ # V 27
+ # Log message for revision 1.
+ # K 10
+ # svn:author
+ # V 7
+ # jrandom
+ # K 8
+ # svn:date
+ # V 27
+ # 2003-04-22T22:57:58.132837Z
+ # PROPS-END
+ #
+ # Notice that the length headers count everything -- not just the
+ # length of the data but also the lengths of the lengths, including
+ # the 'K ' or 'V ' prefixes.
+ #
+ # The reason there are both Prop-content-length and Content-length
+ # is that the former includes just props, while the latter includes
+ # everything. That's the generic header form for any entity in a
+ # dumpfile. But since revisions only have props, the two lengths
+ # are always the same for revisions.
+
+ # Calculate the output needed for the property definitions.
+ prop_names = revprops.keys()
+ prop_names.sort()
+ prop_strings = []
+ for propname in prop_names:
+ if revprops[propname] is not None:
+ prop_strings.append(
+ self._string_for_prop(propname, revprops[propname]))
+
+ all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
+ total_len = len(all_prop_strings)
+
+ # Print the revision header and revprops
+ self.dumpfile.write(
+ 'Revision-number: %d\n'
+ 'Prop-content-length: %d\n'
+ 'Content-length: %d\n'
+ '\n'
+ '%s'
+ '\n'
+ % (self.revision, total_len, total_len, all_prop_strings)
+ )
+
+ def end_commit(self):
+ pass
+
+ def _make_any_dir(self, path):
+ """Emit the creation of directory PATH."""
+
+ self.dumpfile.write(
+ "Node-path: %s\n"
+ "Node-kind: dir\n"
+ "Node-action: add\n"
+ "\n"
+ "\n"
+ % self._utf8_path(path)
+ )
+
+ def _register_basic_directory(self, path, create):
+ """Register the creation of PATH if it is not already there.
+
+ Create any parent directories that do not already exist. If
+ CREATE is set, also create PATH if it doesn't already exist. This
+ method should only be used for the LOD paths and the directories
+ containing them, not for directories within an LOD path."""
+
+ if path not in self._basic_directories:
+ # Make sure that the parent directory is present:
+ self._register_basic_directory(path_split(path)[0], True)
+ if create:
+ self._make_any_dir(path)
+ self._basic_directories.add(path)
+
+ def initialize_project(self, project):
+ """Create any initial directories for the project.
+
+ The trunk, tags, and branches directories directories are created
+ the first time the project is seen. Be sure not to create parent
+ directories that already exist (e.g., because two directories
+ share part of their paths either within or across projects)."""
+
+ for path in project.get_initial_directories():
+ self._register_basic_directory(path, True)
+
+ def initialize_lod(self, lod):
+ lod_path = lod.get_path()
+ if lod_path:
+ self._register_basic_directory(lod_path, True)
+
+ def mkdir(self, lod, cvs_directory):
+ self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
+
+ def _add_or_change_path(self, s_item, op):
+ """Emit the addition or change corresponding to S_ITEM.
+
+ OP is either the constant OP_ADD or OP_CHANGE."""
+
+ assert op in [OP_ADD, OP_CHANGE]
+
+ # Convenience variables
+ cvs_rev = s_item.cvs_rev
+
+ # The property handling here takes advantage of an undocumented
+ # but IMHO consistent feature of the Subversion dumpfile-loading
+ # code. When a node's properties aren't mentioned (that is, the
+ # "Prop-content-length:" header is absent, no properties are
+ # listed at all, and there is no "PROPS-END\n" line) then no
+ # change is made to the node's properties.
+ #
+ # This is consistent with the way dumpfiles behave w.r.t. text
+ # content changes, so I'm comfortable relying on it. If you
+ # commit a change to *just* the properties of some node that
+ # already has text contents from a previous revision, then in the
+ # dumpfile output for the prop change, no "Text-content-length:"
+ # nor "Text-content-md5:" header will be present, and the text of
+ # the file will not be given. But this does not cause the file's
+ # text to be erased! It simply remains unchanged.
+ #
+ # This works out great for cvs2svn, due to lucky coincidences:
+ #
+ # For files, the only properties we ever set are set in the first
+ # revision; all other revisions (including on branches) inherit
+ # from that. After the first revision, we never change file
+ # properties, therefore, there is no need to remember the full set
+ # of properties on a given file once we've set it.
+ #
+ # For directories, the only property we set is "svn:ignore", and
+ # while we may change it after the first revision, we always do so
+ # based on the contents of a ".cvsignore" file -- in other words,
+ # CVS is doing the remembering for us, so we still don't have to
+ # preserve the previous value of the property ourselves.
+
+ # Calculate the (sorted-by-name) property string and length, if any.
+ if s_item.svn_props_changed:
+ svn_props = s_item.svn_props
+ prop_contents = ''
+ prop_names = svn_props.keys()
+ prop_names.sort()
+ for pname in prop_names:
+ pvalue = svn_props[pname]
+ if pvalue is not None:
+ prop_contents += self._string_for_prop(pname, pvalue)
+ prop_contents += 'PROPS-END\n'
+ props_header = 'Prop-content-length: %d\n' % len(prop_contents)
+ else:
+ prop_contents = ''
+ props_header = ''
+
+ # If the file has keywords, we must prevent CVS/RCS from expanding
+ # the keywords because they must be unexpanded in the repository,
+ # or Subversion will get confused.
+ stream = self._revision_reader.get_content_stream(
+ cvs_rev, suppress_keyword_substitution=s_item.has_keywords()
+ )
+
+ if Ctx().decode_apple_single:
+ # Insert a filter to decode any files that are in AppleSingle
+ # format:
+ stream = get_maybe_apple_single_stream(stream)
+
+ # Insert a filter to convert all EOLs to LFs if neccessary
+
+ eol_style = s_item.svn_props.get('svn:eol-style', None)
+ if eol_style:
+ stream = LF_EOL_Filter(stream, eol_style)
+
+ buf = None
+
+ # treat .cvsignore as a directory property
+ dir_path, basename = path_split(cvs_rev.get_svn_path())
+ if basename == '.cvsignore':
+ buf = stream.read()
+ ignore_vals = generate_ignores(buf)
+ ignore_contents = '\n'.join(ignore_vals)
+ if ignore_contents:
+ ignore_contents += '\n'
+ ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
+ (len(ignore_contents), ignore_contents))
+ ignore_contents += 'PROPS-END\n'
+ ignore_len = len(ignore_contents)
+
+ # write headers, then props
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: dir\n'
+ 'Node-action: change\n'
+ 'Prop-content-length: %d\n'
+ 'Content-length: %d\n'
+ '\n'
+ '%s'
+ % (self._utf8_path(dir_path),
+ ignore_len, ignore_len, ignore_contents)
+ )
+ if not Ctx().keep_cvsignore:
+ stream.close()
+ return
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: file\n'
+ 'Node-action: %s\n'
+ '%s' # no property header if no props
+ % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
+ )
+
+ pos = self.dumpfile.tell()
+
+ content_header_fmt = (
+ 'Text-content-length: %16d\n'
+ 'Text-content-md5: %32s\n'
+ 'Content-length: %16d\n'
+ '\n'
+ )
+
+ self.dumpfile.write(content_header_fmt % (0, '', 0,))
+
+ if prop_contents:
+ self.dumpfile.write(prop_contents)
+
+ # Insert the rev contents, calculating length and checksum as we go.
+ checksum = md5()
+ length = 0
+ if buf is None:
+ buf = stream.read(config.PIPE_READ_SIZE)
+ while buf != '':
+ checksum.update(buf)
+ length += len(buf)
+ self.dumpfile.write(buf)
+ buf = stream.read(config.PIPE_READ_SIZE)
+
+ stream.close()
+
+ # Go back to overwrite the length and checksum headers with the
+ # correct values. The content length is the length of property
+ # data, text data, and any metadata around/inside around them:
+ self.dumpfile.seek(pos, 0)
+ self.dumpfile.write(
+ content_header_fmt
+ % (length, checksum.hexdigest(), length + len(prop_contents),)
+ )
+
+ # Jump back to the end of the stream
+ self.dumpfile.seek(0, 2)
+
+ # This record is done (write two newlines -- one to terminate
+ # contents that weren't themselves newline-termination, one to
+ # provide a blank line for readability.
+ self.dumpfile.write('\n\n')
+
+ def add_path(self, s_item):
+ """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
+
+ self._add_or_change_path(s_item, OP_ADD)
+
+ def change_path(self, s_item):
+ """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
+
+ self._add_or_change_path(s_item, OP_CHANGE)
+
+ def delete_lod(self, lod):
+ """Emit the deletion of LOD."""
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-action: delete\n'
+ '\n'
+ % (self._utf8_path(lod.get_path()),)
+ )
+ self._basic_directories.remove(lod.get_path())
+
+ def delete_path(self, lod, cvs_path):
+ dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
+ if basename == '.cvsignore':
+ # When a .cvsignore file is deleted, the directory's svn:ignore
+ # property needs to be deleted.
+ ignore_contents = 'PROPS-END\n'
+ ignore_len = len(ignore_contents)
+
+ # write headers, then props
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: dir\n'
+ 'Node-action: change\n'
+ 'Prop-content-length: %d\n'
+ 'Content-length: %d\n'
+ '\n'
+ '%s'
+ % (self._utf8_path(dir_path),
+ ignore_len, ignore_len, ignore_contents)
+ )
+ if not Ctx().keep_cvsignore:
+ return
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-action: delete\n'
+ '\n'
+ % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
+ )
+
+ def copy_lod(self, src_lod, dest_lod, src_revnum):
+ # Register the main LOD directory, and create parent directories
+ # as needed:
+ self._register_basic_directory(dest_lod.get_path(), False)
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: dir\n'
+ 'Node-action: add\n'
+ 'Node-copyfrom-rev: %d\n'
+ 'Node-copyfrom-path: %s\n'
+ '\n'
+ % (self._utf8_path(dest_lod.get_path()),
+ src_revnum, self._utf8_path(src_lod.get_path()))
+ )
+
+ def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
+ if isinstance(cvs_path, CVSFile):
+ node_kind = 'file'
+ if cvs_path.basename == '.cvsignore':
+ # FIXME: Here we have to adjust the containing directory's
+ # svn:ignore property to reflect the addition of the
+ # .cvsignore file to the LOD! This is awkward because we
+ # don't have the contents of the .cvsignore file available.
+ if not Ctx().keep_cvsignore:
+ return
+ elif isinstance(cvs_path, CVSDirectory):
+ node_kind = 'dir'
+ else:
+ raise InternalError()
+
+ self.dumpfile.write(
+ 'Node-path: %s\n'
+ 'Node-kind: %s\n'
+ 'Node-action: add\n'
+ 'Node-copyfrom-rev: %d\n'
+ 'Node-copyfrom-path: %s\n'
+ '\n'
+ % (
+ self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
+ node_kind,
+ src_revnum,
+ self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
+ )
+ )
+
+ def finish(self):
+ """Perform any cleanup necessary after all revisions have been
+ committed."""
+
+ self.dumpfile.close()
+
+
+def generate_ignores(raw_ignore_val):
+ ignore_vals = [ ]
+ for ignore in raw_ignore_val.split():
+ # Reset the list if we encounter a '!'
+ # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
+ if ignore == '!':
+ ignore_vals = [ ]
+ else:
+ ignore_vals.append(ignore)
+ return ignore_vals
+
+
+class LF_EOL_Filter:
+ """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
+ into the appropriate canonical eol style."""
+
+ eol_style_replacements = {
+ 'LF' : '\n',
+ 'CR' : '\r',
+ 'CRLF' : '\r\n',
+ 'native' : '\n',
+ }
+
+ def __init__(self, stream, eol_style):
+ self.stream = stream
+ self.replacement = self.eol_style_replacements[eol_style]
+ self.carry_cr = False
+ self.eof = False
+
+ def read(self, size=-1):
+ while True:
+ buf = self.stream.read(size)
+ self.eof = len(buf) == 0
+ if self.carry_cr:
+ buf = '\r' + buf
+ self.carry_cr = False
+ if not self.eof and buf[-1] == '\r':
+ self.carry_cr = True
+ buf = buf[:-1]
+ buf = buf.replace('\r\n', '\n')
+ buf = buf.replace('\r', '\n')
+ if self.replacement != '\n':
+ buf = buf.replace('\n', self.replacement)
+ if buf or self.eof:
+ return buf
+
+ def close(self):
+ self.stream.close()
+ self.stream = None
+
+