diff options
Diffstat (limited to 'cvs2svn_lib/dumpfile_delegate.py')
-rw-r--r-- | cvs2svn_lib/dumpfile_delegate.py | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/cvs2svn_lib/dumpfile_delegate.py b/cvs2svn_lib/dumpfile_delegate.py new file mode 100644 index 0000000..092cfca --- /dev/null +++ b/cvs2svn_lib/dumpfile_delegate.py @@ -0,0 +1,510 @@ +# (Be in -*- python -*- mode.) +# +# ==================================================================== +# Copyright (c) 2000-2009 CollabNet. All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://subversion.tigris.org/license-1.html. +# If newer versions of this license are posted there, you may use a +# newer version instead, at your option. +# +# This software consists of voluntary contributions made by many +# individuals. For exact contribution history, see the revision +# history and logs, available at http://cvs2svn.tigris.org/. +# ==================================================================== + +"""This module contains database facilities used by cvs2svn.""" + + +try: + from hashlib import md5 +except ImportError: + from md5 import new as md5 + + +from cvs2svn_lib import config +from cvs2svn_lib.common import FatalError +from cvs2svn_lib.common import InternalError +from cvs2svn_lib.common import path_split +from cvs2svn_lib.context import Ctx +from cvs2svn_lib.cvs_file import CVSDirectory +from cvs2svn_lib.cvs_file import CVSFile +from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate +from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream + + +# Things that can happen to a file. +OP_ADD = 'add' +OP_CHANGE = 'change' + + +class DumpfileDelegate(SVNRepositoryDelegate): + """Create a Subversion dumpfile.""" + + def __init__(self, revision_reader, dumpfile_path): + """Return a new DumpfileDelegate instance, attached to a dumpfile + DUMPFILE_PATH, using Ctx().cvs_filename_decoder().""" + + self._revision_reader = revision_reader + self.dumpfile_path = dumpfile_path + + self.dumpfile = open(self.dumpfile_path, 'wb') + self._write_dumpfile_header(self.dumpfile) + + # A set of the basic project infrastructure project directories + # that have been created so far, as SVN paths. (The root + # directory is considered to be present at initialization.) This + # includes all of the LOD paths, and all of their parent + # directories etc. + self._basic_directories = set(['']) + + def _write_dumpfile_header(self, dumpfile): + # Initialize the dumpfile with the standard headers. + # + # Since the CVS repository doesn't have a UUID, and the Subversion + # repository will be created with one anyway, we don't specify a + # UUID in the dumpflie + dumpfile.write('SVN-fs-dump-format-version: 2\n\n') + + def _utf8_path(self, path): + """Return a copy of PATH encoded in UTF-8.""" + + # Convert each path component separately (as they may each use + # different encodings). + try: + return '/'.join([ + Ctx().cvs_filename_decoder(piece).encode('utf8') + for piece in path.split('/') + ]) + except UnicodeError: + raise FatalError( + "Unable to convert a path '%s' to internal encoding.\n" + "Consider rerunning with one or more '--encoding' parameters or\n" + "with '--fallback-encoding'." + % (path,)) + + def _string_for_prop(self, name, value): + """Return a property in the form needed for the dumpfile.""" + + return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value) + + def start_commit(self, revnum, revprops): + """Emit the start of SVN_COMMIT (an SVNCommit).""" + + self.revision = revnum + + # The start of a new commit typically looks like this: + # + # Revision-number: 1 + # Prop-content-length: 129 + # Content-length: 129 + # + # K 7 + # svn:log + # V 27 + # Log message for revision 1. + # K 10 + # svn:author + # V 7 + # jrandom + # K 8 + # svn:date + # V 27 + # 2003-04-22T22:57:58.132837Z + # PROPS-END + # + # Notice that the length headers count everything -- not just the + # length of the data but also the lengths of the lengths, including + # the 'K ' or 'V ' prefixes. + # + # The reason there are both Prop-content-length and Content-length + # is that the former includes just props, while the latter includes + # everything. That's the generic header form for any entity in a + # dumpfile. But since revisions only have props, the two lengths + # are always the same for revisions. + + # Calculate the output needed for the property definitions. + prop_names = revprops.keys() + prop_names.sort() + prop_strings = [] + for propname in prop_names: + if revprops[propname] is not None: + prop_strings.append( + self._string_for_prop(propname, revprops[propname])) + + all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n' + total_len = len(all_prop_strings) + + # Print the revision header and revprops + self.dumpfile.write( + 'Revision-number: %d\n' + 'Prop-content-length: %d\n' + 'Content-length: %d\n' + '\n' + '%s' + '\n' + % (self.revision, total_len, total_len, all_prop_strings) + ) + + def end_commit(self): + pass + + def _make_any_dir(self, path): + """Emit the creation of directory PATH.""" + + self.dumpfile.write( + "Node-path: %s\n" + "Node-kind: dir\n" + "Node-action: add\n" + "\n" + "\n" + % self._utf8_path(path) + ) + + def _register_basic_directory(self, path, create): + """Register the creation of PATH if it is not already there. + + Create any parent directories that do not already exist. If + CREATE is set, also create PATH if it doesn't already exist. This + method should only be used for the LOD paths and the directories + containing them, not for directories within an LOD path.""" + + if path not in self._basic_directories: + # Make sure that the parent directory is present: + self._register_basic_directory(path_split(path)[0], True) + if create: + self._make_any_dir(path) + self._basic_directories.add(path) + + def initialize_project(self, project): + """Create any initial directories for the project. + + The trunk, tags, and branches directories directories are created + the first time the project is seen. Be sure not to create parent + directories that already exist (e.g., because two directories + share part of their paths either within or across projects).""" + + for path in project.get_initial_directories(): + self._register_basic_directory(path, True) + + def initialize_lod(self, lod): + lod_path = lod.get_path() + if lod_path: + self._register_basic_directory(lod_path, True) + + def mkdir(self, lod, cvs_directory): + self._make_any_dir(lod.get_path(cvs_directory.cvs_path)) + + def _add_or_change_path(self, s_item, op): + """Emit the addition or change corresponding to S_ITEM. + + OP is either the constant OP_ADD or OP_CHANGE.""" + + assert op in [OP_ADD, OP_CHANGE] + + # Convenience variables + cvs_rev = s_item.cvs_rev + + # The property handling here takes advantage of an undocumented + # but IMHO consistent feature of the Subversion dumpfile-loading + # code. When a node's properties aren't mentioned (that is, the + # "Prop-content-length:" header is absent, no properties are + # listed at all, and there is no "PROPS-END\n" line) then no + # change is made to the node's properties. + # + # This is consistent with the way dumpfiles behave w.r.t. text + # content changes, so I'm comfortable relying on it. If you + # commit a change to *just* the properties of some node that + # already has text contents from a previous revision, then in the + # dumpfile output for the prop change, no "Text-content-length:" + # nor "Text-content-md5:" header will be present, and the text of + # the file will not be given. But this does not cause the file's + # text to be erased! It simply remains unchanged. + # + # This works out great for cvs2svn, due to lucky coincidences: + # + # For files, the only properties we ever set are set in the first + # revision; all other revisions (including on branches) inherit + # from that. After the first revision, we never change file + # properties, therefore, there is no need to remember the full set + # of properties on a given file once we've set it. + # + # For directories, the only property we set is "svn:ignore", and + # while we may change it after the first revision, we always do so + # based on the contents of a ".cvsignore" file -- in other words, + # CVS is doing the remembering for us, so we still don't have to + # preserve the previous value of the property ourselves. + + # Calculate the (sorted-by-name) property string and length, if any. + if s_item.svn_props_changed: + svn_props = s_item.svn_props + prop_contents = '' + prop_names = svn_props.keys() + prop_names.sort() + for pname in prop_names: + pvalue = svn_props[pname] + if pvalue is not None: + prop_contents += self._string_for_prop(pname, pvalue) + prop_contents += 'PROPS-END\n' + props_header = 'Prop-content-length: %d\n' % len(prop_contents) + else: + prop_contents = '' + props_header = '' + + # If the file has keywords, we must prevent CVS/RCS from expanding + # the keywords because they must be unexpanded in the repository, + # or Subversion will get confused. + stream = self._revision_reader.get_content_stream( + cvs_rev, suppress_keyword_substitution=s_item.has_keywords() + ) + + if Ctx().decode_apple_single: + # Insert a filter to decode any files that are in AppleSingle + # format: + stream = get_maybe_apple_single_stream(stream) + + # Insert a filter to convert all EOLs to LFs if neccessary + + eol_style = s_item.svn_props.get('svn:eol-style', None) + if eol_style: + stream = LF_EOL_Filter(stream, eol_style) + + buf = None + + # treat .cvsignore as a directory property + dir_path, basename = path_split(cvs_rev.get_svn_path()) + if basename == '.cvsignore': + buf = stream.read() + ignore_vals = generate_ignores(buf) + ignore_contents = '\n'.join(ignore_vals) + if ignore_contents: + ignore_contents += '\n' + ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \ + (len(ignore_contents), ignore_contents)) + ignore_contents += 'PROPS-END\n' + ignore_len = len(ignore_contents) + + # write headers, then props + self.dumpfile.write( + 'Node-path: %s\n' + 'Node-kind: dir\n' + 'Node-action: change\n' + 'Prop-content-length: %d\n' + 'Content-length: %d\n' + '\n' + '%s' + % (self._utf8_path(dir_path), + ignore_len, ignore_len, ignore_contents) + ) + if not Ctx().keep_cvsignore: + stream.close() + return + + self.dumpfile.write( + 'Node-path: %s\n' + 'Node-kind: file\n' + 'Node-action: %s\n' + '%s' # no property header if no props + % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header) + ) + + pos = self.dumpfile.tell() + + content_header_fmt = ( + 'Text-content-length: %16d\n' + 'Text-content-md5: %32s\n' + 'Content-length: %16d\n' + '\n' + ) + + self.dumpfile.write(content_header_fmt % (0, '', 0,)) + + if prop_contents: + self.dumpfile.write(prop_contents) + + # Insert the rev contents, calculating length and checksum as we go. + checksum = md5() + length = 0 + if buf is None: + buf = stream.read(config.PIPE_READ_SIZE) + while buf != '': + checksum.update(buf) + length += len(buf) + self.dumpfile.write(buf) + buf = stream.read(config.PIPE_READ_SIZE) + + stream.close() + + # Go back to overwrite the length and checksum headers with the + # correct values. The content length is the length of property + # data, text data, and any metadata around/inside around them: + self.dumpfile.seek(pos, 0) + self.dumpfile.write( + content_header_fmt + % (length, checksum.hexdigest(), length + len(prop_contents),) + ) + + # Jump back to the end of the stream + self.dumpfile.seek(0, 2) + + # This record is done (write two newlines -- one to terminate + # contents that weren't themselves newline-termination, one to + # provide a blank line for readability. + self.dumpfile.write('\n\n') + + def add_path(self, s_item): + """Emit the addition corresponding to S_ITEM, an SVNCommitItem.""" + + self._add_or_change_path(s_item, OP_ADD) + + def change_path(self, s_item): + """Emit the change corresponding to S_ITEM, an SVNCommitItem.""" + + self._add_or_change_path(s_item, OP_CHANGE) + + def delete_lod(self, lod): + """Emit the deletion of LOD.""" + + self.dumpfile.write( + 'Node-path: %s\n' + 'Node-action: delete\n' + '\n' + % (self._utf8_path(lod.get_path()),) + ) + self._basic_directories.remove(lod.get_path()) + + def delete_path(self, lod, cvs_path): + dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path())) + if basename == '.cvsignore': + # When a .cvsignore file is deleted, the directory's svn:ignore + # property needs to be deleted. + ignore_contents = 'PROPS-END\n' + ignore_len = len(ignore_contents) + + # write headers, then props + self.dumpfile.write( + 'Node-path: %s\n' + 'Node-kind: dir\n' + 'Node-action: change\n' + 'Prop-content-length: %d\n' + 'Content-length: %d\n' + '\n' + '%s' + % (self._utf8_path(dir_path), + ignore_len, ignore_len, ignore_contents) + ) + if not Ctx().keep_cvsignore: + return + + self.dumpfile.write( + 'Node-path: %s\n' + 'Node-action: delete\n' + '\n' + % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),) + ) + + def copy_lod(self, src_lod, dest_lod, src_revnum): + # Register the main LOD directory, and create parent directories + # as needed: + self._register_basic_directory(dest_lod.get_path(), False) + + self.dumpfile.write( + 'Node-path: %s\n' + 'Node-kind: dir\n' + 'Node-action: add\n' + 'Node-copyfrom-rev: %d\n' + 'Node-copyfrom-path: %s\n' + '\n' + % (self._utf8_path(dest_lod.get_path()), + src_revnum, self._utf8_path(src_lod.get_path())) + ) + + def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum): + if isinstance(cvs_path, CVSFile): + node_kind = 'file' + if cvs_path.basename == '.cvsignore': + # FIXME: Here we have to adjust the containing directory's + # svn:ignore property to reflect the addition of the + # .cvsignore file to the LOD! This is awkward because we + # don't have the contents of the .cvsignore file available. + if not Ctx().keep_cvsignore: + return + elif isinstance(cvs_path, CVSDirectory): + node_kind = 'dir' + else: + raise InternalError() + + self.dumpfile.write( + 'Node-path: %s\n' + 'Node-kind: %s\n' + 'Node-action: add\n' + 'Node-copyfrom-rev: %d\n' + 'Node-copyfrom-path: %s\n' + '\n' + % ( + self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)), + node_kind, + src_revnum, + self._utf8_path(src_lod.get_path(cvs_path.cvs_path)) + ) + ) + + def finish(self): + """Perform any cleanup necessary after all revisions have been + committed.""" + + self.dumpfile.close() + + +def generate_ignores(raw_ignore_val): + ignore_vals = [ ] + for ignore in raw_ignore_val.split(): + # Reset the list if we encounter a '!' + # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore + if ignore == '!': + ignore_vals = [ ] + else: + ignore_vals.append(ignore) + return ignore_vals + + +class LF_EOL_Filter: + """Filter a stream and convert all end-of-line markers (CRLF, CR or LF) + into the appropriate canonical eol style.""" + + eol_style_replacements = { + 'LF' : '\n', + 'CR' : '\r', + 'CRLF' : '\r\n', + 'native' : '\n', + } + + def __init__(self, stream, eol_style): + self.stream = stream + self.replacement = self.eol_style_replacements[eol_style] + self.carry_cr = False + self.eof = False + + def read(self, size=-1): + while True: + buf = self.stream.read(size) + self.eof = len(buf) == 0 + if self.carry_cr: + buf = '\r' + buf + self.carry_cr = False + if not self.eof and buf[-1] == '\r': + self.carry_cr = True + buf = buf[:-1] + buf = buf.replace('\r\n', '\n') + buf = buf.replace('\r', '\n') + if self.replacement != '\n': + buf = buf.replace('\n', self.replacement) + if buf or self.eof: + return buf + + def close(self): + self.stream.close() + self.stream = None + + |