aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'cvs2svn_lib/symbol_strategy.py')
-rw-r--r--cvs2svn_lib/symbol_strategy.py685
1 files changed, 685 insertions, 0 deletions
diff --git a/cvs2svn_lib/symbol_strategy.py b/cvs2svn_lib/symbol_strategy.py
new file mode 100644
index 0000000..9d562a8
--- /dev/null
+++ b/cvs2svn_lib/symbol_strategy.py
@@ -0,0 +1,685 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet. All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""SymbolStrategy classes determine how to convert symbols."""
+
+import re
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import path_join
+from cvs2svn_lib.common import normalize_svn_path
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import TypedSymbol
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+from cvs2svn_lib.symbol_statistics import SymbolPlanError
+
+
+class StrategyRule:
+ """A single rule that might determine how to convert a symbol."""
+
+ def start(self, symbol_statistics):
+ """This method is called once before get_symbol() is ever called.
+
+ The StrategyRule can override this method to do whatever it wants
+ to prepare itself for work. SYMBOL_STATISTICS is an instance of
+ SymbolStatistics containing the statistics for all symbols in all
+ projects."""
+
+ pass
+
+ def get_symbol(self, symbol, stats):
+ """Return an object describing what to do with the symbol in STATS.
+
+ SYMBOL holds a Trunk or Symbol object as it has been determined so
+ far. Hopefully one of these method calls will turn any naked
+ Symbol instances into TypedSymbols.
+
+ If this rule applies to the SYMBOL (whose statistics are collected
+ in STATS), then return a new or modified AbstractSymbol object.
+ If this rule doesn't apply, return SYMBOL unchanged."""
+
+ raise NotImplementedError()
+
+ def finish(self):
+ """This method is called once after get_symbol() is done being called.
+
+ The StrategyRule can override this method do whatever it wants to
+ release resources, etc."""
+
+ pass
+
+
+class _RegexpStrategyRule(StrategyRule):
+ """A Strategy rule that bases its decisions on regexp matches.
+
+ If self.regexp matches a symbol name, return self.action(symbol);
+ otherwise, return the symbol unchanged."""
+
+ def __init__(self, pattern, action):
+ """Initialize a _RegexpStrategyRule.
+
+ PATTERN is a string that will be treated as a regexp pattern.
+ PATTERN must match a full symbol name for the rule to apply (i.e.,
+ it is anchored at the beginning and end of the symbol name).
+
+ ACTION is the class representing how the symbol should be
+ converted. It should be one of the classes Branch, Tag, or
+ ExcludedSymbol.
+
+ If PATTERN matches a symbol name, then get_symbol() returns
+ ACTION(name, id); otherwise it returns SYMBOL unchanged."""
+
+ try:
+ self.regexp = re.compile('^' + pattern + '$')
+ except re.error:
+ raise FatalError("%r is not a valid regexp." % (pattern,))
+
+ self.action = action
+
+ def log(self, symbol):
+ raise NotImplementedError()
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ elif self.regexp.match(symbol.name):
+ self.log(symbol)
+ return self.action(symbol)
+ else:
+ return symbol
+
+
+class ForceBranchRegexpStrategyRule(_RegexpStrategyRule):
+ """Force symbols matching pattern to be branches."""
+
+ def __init__(self, pattern):
+ _RegexpStrategyRule.__init__(self, pattern, Branch)
+
+ def log(self, symbol):
+ Log().verbose(
+ 'Converting symbol %s as a branch because it matches regexp "%s".'
+ % (symbol, self.regexp.pattern,)
+ )
+
+
+class ForceTagRegexpStrategyRule(_RegexpStrategyRule):
+ """Force symbols matching pattern to be tags."""
+
+ def __init__(self, pattern):
+ _RegexpStrategyRule.__init__(self, pattern, Tag)
+
+ def log(self, symbol):
+ Log().verbose(
+ 'Converting symbol %s as a tag because it matches regexp "%s".'
+ % (symbol, self.regexp.pattern,)
+ )
+
+
+class ExcludeRegexpStrategyRule(_RegexpStrategyRule):
+ """Exclude symbols matching pattern."""
+
+ def __init__(self, pattern):
+ _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol)
+
+ def log(self, symbol):
+ Log().verbose(
+ 'Excluding symbol %s because it matches regexp "%s".'
+ % (symbol, self.regexp.pattern,)
+ )
+
+
+class ExcludeTrivialImportBranchRule(StrategyRule):
+ """If a symbol is a trivial import branch, exclude it.
+
+ A trivial import branch is defined to be a branch that only had a
+ single import on it (no other kinds of commits) in every file in
+ which it appeared. In most cases these branches are worthless."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ if stats.tag_create_count == 0 \
+ and stats.branch_create_count == stats.trivial_import_count:
+ Log().verbose(
+ 'Excluding branch %s because it is a trivial import branch.'
+ % (symbol,)
+ )
+ return ExcludedSymbol(symbol)
+ else:
+ return symbol
+
+
+class ExcludeVendorBranchRule(StrategyRule):
+ """If a symbol is a pure vendor branch, exclude it.
+
+ A pure vendor branch is defined to be a branch that only had imports
+ on it (no other kinds of commits) in every file in which it
+ appeared."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ if stats.tag_create_count == 0 \
+ and stats.branch_create_count == stats.pure_ntdb_count:
+ Log().verbose(
+ 'Excluding branch %s because it is a pure vendor branch.'
+ % (symbol,)
+ )
+ return ExcludedSymbol(symbol)
+ else:
+ return symbol
+
+
+class UnambiguousUsageRule(StrategyRule):
+ """If a symbol is used unambiguously as a tag/branch, convert it as such."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ is_tag = stats.tag_create_count > 0
+ is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0
+ if is_tag and is_branch:
+ # Can't decide
+ return symbol
+ elif is_branch:
+ Log().verbose(
+ 'Converting symbol %s as a branch because it is always used '
+ 'as a branch.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+ elif is_tag:
+ Log().verbose(
+ 'Converting symbol %s as a tag because it is always used '
+ 'as a tag.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+ else:
+ # The symbol didn't appear at all:
+ return symbol
+
+
+class BranchIfCommitsRule(StrategyRule):
+ """If there was ever a commit on the symbol, convert it as a branch."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ elif stats.branch_commit_count > 0:
+ Log().verbose(
+ 'Converting symbol %s as a branch because there are commits on it.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+ else:
+ return symbol
+
+
+class HeuristicStrategyRule(StrategyRule):
+ """Convert symbol based on how often it was used as a branch/tag.
+
+ Whichever happened more often determines how the symbol is
+ converted."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ elif stats.tag_create_count >= stats.branch_create_count:
+ Log().verbose(
+ 'Converting symbol %s as a tag because it is more often used '
+ 'as a tag.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+ else:
+ Log().verbose(
+ 'Converting symbol %s as a branch because it is more often used '
+ 'as a branch.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+
+
+class AllBranchRule(StrategyRule):
+ """Convert all symbols as branches.
+
+ Usually this rule will appear after a list of more careful rules
+ (including a general rule like UnambiguousUsageRule) and will
+ therefore only apply to the symbols not handled earlier."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ else:
+ Log().verbose(
+ 'Converting symbol %s as a branch because no other rules applied.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+
+
+class AllTagRule(StrategyRule):
+ """Convert all symbols as tags.
+
+ We don't worry about conflicts here; they will be caught later by
+ SymbolStatistics.check_consistency().
+
+ Usually this rule will appear after a list of more careful rules
+ (including a general rule like UnambiguousUsageRule) and will
+ therefore only apply to the symbols not handled earlier."""
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, (Trunk, TypedSymbol)):
+ return symbol
+ else:
+ Log().verbose(
+ 'Converting symbol %s as a tag because no other rules applied.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+
+
+class TrunkPathRule(StrategyRule):
+ """Set the base path for Trunk."""
+
+ def __init__(self, trunk_path):
+ self.trunk_path = trunk_path
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, Trunk) and symbol.base_path is None:
+ symbol.base_path = self.trunk_path
+
+ return symbol
+
+
+class SymbolPathRule(StrategyRule):
+ """Set the base paths for symbol LODs."""
+
+ def __init__(self, symbol_type, base_path):
+ self.symbol_type = symbol_type
+ self.base_path = base_path
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, self.symbol_type) and symbol.base_path is None:
+ symbol.base_path = path_join(self.base_path, symbol.name)
+
+ return symbol
+
+
+class BranchesPathRule(SymbolPathRule):
+ """Set the base paths for Branch LODs."""
+
+ def __init__(self, branch_path):
+ SymbolPathRule.__init__(self, Branch, branch_path)
+
+
+class TagsPathRule(SymbolPathRule):
+ """Set the base paths for Tag LODs."""
+
+ def __init__(self, tag_path):
+ SymbolPathRule.__init__(self, Tag, tag_path)
+
+
+class HeuristicPreferredParentRule(StrategyRule):
+ """Use a heuristic rule to pick preferred parents.
+
+ Pick the parent that should be preferred for any TypedSymbols. As
+ parent, use the symbol that appeared most often as a possible parent
+ of the symbol in question. If multiple symbols are tied, choose the
+ one that comes first according to the Symbol class's natural sort
+ order."""
+
+ def _get_preferred_parent(self, stats):
+ """Return the LODs that are most often possible parents in STATS.
+
+ Return the set of LinesOfDevelopment that appeared most often as
+ possible parents. The return value might contain multiple symbols
+ if multiple LinesOfDevelopment appeared the same number of times."""
+
+ best_count = -1
+ best_symbol = None
+ for (symbol, count) in stats.possible_parents.items():
+ if count > best_count or (count == best_count and symbol < best_symbol):
+ best_count = count
+ best_symbol = symbol
+
+ if best_symbol is None:
+ return None
+ else:
+ return best_symbol
+
+ def get_symbol(self, symbol, stats):
+ if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None:
+ preferred_parent = self._get_preferred_parent(stats)
+ if preferred_parent is None:
+ Log().verbose('%s has no preferred parent' % (symbol,))
+ else:
+ symbol.preferred_parent_id = preferred_parent.id
+ Log().verbose(
+ 'The preferred parent of %s is %s' % (symbol, preferred_parent,)
+ )
+
+ return symbol
+
+
+class ManualTrunkRule(StrategyRule):
+ """Change the SVN path of Trunk LODs.
+
+ Members:
+
+ project_id -- (int or None) The id of the project whose trunk
+ should be affected by this rule. If project_id is None, then
+ the rule is not project-specific.
+
+ svn_path -- (str) The SVN path that should be used as the base
+ directory for this trunk. This member must not be None,
+ though it may be the empty string for a single-project,
+ trunk-only conversion.
+
+ """
+
+ def __init__(self, project_id, svn_path):
+ self.project_id = project_id
+ self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
+
+ def get_symbol(self, symbol, stats):
+ if (self.project_id is not None
+ and self.project_id != stats.lod.project.id):
+ return symbol
+
+ if isinstance(symbol, Trunk):
+ symbol.base_path = self.svn_path
+
+ return symbol
+
+
+def convert_as_branch(symbol):
+ Log().verbose(
+ 'Converting symbol %s as a branch because of manual setting.'
+ % (symbol,)
+ )
+ return Branch(symbol)
+
+
+def convert_as_tag(symbol):
+ Log().verbose(
+ 'Converting symbol %s as a tag because of manual setting.'
+ % (symbol,)
+ )
+ return Tag(symbol)
+
+
+def exclude(symbol):
+ Log().verbose(
+ 'Excluding symbol %s because of manual setting.'
+ % (symbol,)
+ )
+ return ExcludedSymbol(symbol)
+
+
+class ManualSymbolRule(StrategyRule):
+ """Change how particular symbols are converted.
+
+ Members:
+
+ project_id -- (int or None) The id of the project whose trunk
+ should be affected by this rule. If project_id is None, then
+ the rule is not project-specific.
+
+ symbol_name -- (str) The name of the symbol that should be
+ affected by this rule.
+
+ conversion -- (callable or None) A callable that converts the
+ symbol to its preferred output type. This should normally be
+ one of (convert_as_branch, convert_as_tag, exclude). If this
+ member is None, then this rule does not affect the symbol's
+ output type.
+
+ svn_path -- (str) The SVN path that should be used as the base
+ directory for this trunk. This member must not be None,
+ though it may be the empty string for a single-project,
+ trunk-only conversion.
+
+ parent_lod_name -- (str or None) The name of the line of
+ development that should be preferred as the parent of this
+ symbol. (The preferred parent is the line of development from
+ which the symbol should sprout.) If this member is set to the
+ string '.trunk.', then the symbol will be set to sprout
+ directly from trunk. If this member is set to None, then this
+ rule won't affect the symbol's parent.
+
+ """
+
+ def __init__(
+ self, project_id, symbol_name, conversion, svn_path, parent_lod_name
+ ):
+ self.project_id = project_id
+ self.symbol_name = symbol_name
+ self.conversion = conversion
+ if svn_path is None:
+ self.svn_path = None
+ else:
+ self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
+ self.parent_lod_name = parent_lod_name
+
+ def _get_parent_by_id(self, parent_lod_name, stats):
+ """Return the LOD object for the parent with name PARENT_LOD_NAME.
+
+ STATS is the _Stats object describing a symbol whose parent needs
+ to be determined from its name. If none of its possible parents
+ has name PARENT_LOD_NAME, raise a SymbolPlanError."""
+
+ for pp in stats.possible_parents.keys():
+ if isinstance(pp, Trunk):
+ pass
+ elif pp.name == parent_lod_name:
+ return pp
+ else:
+ parent_counts = stats.possible_parents.items()
+ parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
+ lines = [
+ '%s is not a valid parent for %s;'
+ % (parent_lod_name, stats.lod,),
+ ' possible parents (with counts):'
+ ]
+ for (symbol, count) in parent_counts:
+ if isinstance(symbol, Trunk):
+ lines.append(' .trunk. : %d' % count)
+ else:
+ lines.append(' %s : %d' % (symbol.name, count))
+ raise SymbolPlanError('\n'.join(lines))
+
+ def get_symbol(self, symbol, stats):
+ if (self.project_id is not None
+ and self.project_id != stats.lod.project.id):
+ return symbol
+
+ elif isinstance(symbol, Trunk):
+ return symbol
+
+ elif self.symbol_name == stats.lod.name:
+ if self.conversion is not None:
+ symbol = self.conversion(symbol)
+
+ if self.parent_lod_name is None:
+ pass
+ elif self.parent_lod_name == '.trunk.':
+ symbol.preferred_parent_id = stats.lod.project.trunk_id
+ else:
+ symbol.preferred_parent_id = self._get_parent_by_id(
+ self.parent_lod_name, stats
+ ).id
+
+ if self.svn_path is not None:
+ symbol.base_path = self.svn_path
+
+ return symbol
+
+
+class SymbolHintsFileRule(StrategyRule):
+ """Use manual symbol configurations read from a file.
+
+ The input file is line-oriented with the following format:
+
+ <project-id> <symbol-name> <conversion> [<svn-path> [<parent-lod-name>]]
+
+ Where the fields are separated by whitespace and
+
+ project-id -- the numerical id of the Project to which the
+ symbol belongs (numbered starting with 0). This field can
+ be '.' if the rule is not project-specific.
+
+ symbol-name -- the name of the symbol being specified, or
+ '.trunk.' if the rule should apply to trunk.
+
+ conversion -- how the symbol should be treated in the
+ conversion. This is one of the following values: 'branch',
+ 'tag', or 'exclude'. This field can be '.' if the rule
+ shouldn't affect how the symbol is treated in the
+ conversion.
+
+ svn-path -- the SVN path that should serve as the root path of
+ this LOD. The path should be expressed as a path relative
+ to the SVN root directory, with or without a leading '/'.
+ This field can be omitted or '.' if the rule shouldn't
+ affect the LOD's SVN path.
+
+ parent-lod-name -- the name of the LOD that should serve as this
+ symbol's parent. This field can be omitted or '.' if the
+ rule shouldn't affect the symbol's parent, or it can be
+ '.trunk.' to indicate that the symbol should sprout from the
+ project's trunk."""
+
+ comment_re = re.compile(r'^(\#|$)')
+
+ conversion_map = {
+ 'branch' : convert_as_branch,
+ 'tag' : convert_as_tag,
+ 'exclude' : exclude,
+ '.' : None,
+ }
+
+ def __init__(self, filename):
+ self.filename = filename
+
+ def start(self, symbol_statistics):
+ self._rules = []
+
+ f = open(self.filename, 'r')
+ for l in f:
+ l = l.rstrip()
+ s = l.lstrip()
+ if self.comment_re.match(s):
+ continue
+ fields = s.split()
+
+ if len(fields) < 3:
+ raise FatalError(
+ 'The following line in "%s" cannot be parsed:\n "%s"'
+ % (self.filename, l,)
+ )
+
+ project_id = fields.pop(0)
+ symbol_name = fields.pop(0)
+ conversion = fields.pop(0)
+
+ if fields:
+ svn_path = fields.pop(0)
+ if svn_path == '.':
+ svn_path = None
+ elif svn_path[0] == '/':
+ svn_path = svn_path[1:]
+ else:
+ svn_path = None
+
+ if fields:
+ parent_lod_name = fields.pop(0)
+ else:
+ parent_lod_name = '.'
+
+ if fields:
+ raise FatalError(
+ 'The following line in "%s" cannot be parsed:\n "%s"'
+ % (self.filename, l,)
+ )
+
+ if project_id == '.':
+ project_id = None
+ else:
+ try:
+ project_id = int(project_id)
+ except ValueError:
+ raise FatalError(
+ 'Illegal project_id in the following line:\n "%s"' % (l,)
+ )
+
+ if symbol_name == '.trunk.':
+ if conversion not in ['.', 'trunk']:
+ raise FatalError('Trunk cannot be converted as a different type')
+
+ if parent_lod_name != '.':
+ raise FatalError('Trunk\'s parent cannot be set')
+
+ if svn_path is None:
+ # This rule doesn't do anything:
+ pass
+ else:
+ self._rules.append(ManualTrunkRule(project_id, svn_path))
+
+ else:
+ try:
+ conversion = self.conversion_map[conversion]
+ except KeyError:
+ raise FatalError(
+ 'Illegal conversion in the following line:\n "%s"' % (l,)
+ )
+
+ if parent_lod_name == '.':
+ parent_lod_name = None
+
+ if conversion is None \
+ and svn_path is None \
+ and parent_lod_name is None:
+ # There is nothing to be done:
+ pass
+ else:
+ self._rules.append(
+ ManualSymbolRule(
+ project_id, symbol_name,
+ conversion, svn_path, parent_lod_name
+ )
+ )
+
+ for rule in self._rules:
+ rule.start(symbol_statistics)
+
+ def get_symbol(self, symbol, stats):
+ for rule in self._rules:
+ symbol = rule.get_symbol(symbol, stats)
+
+ return symbol
+
+ def finish(self):
+ for rule in self._rules:
+ rule.finish()
+
+ del self._rules
+
+