Mercurial > hg > silvermirror
changeset 0:abb358e2434c
initial commit of silvermirror, from http://my-svn.assembla.com/svn/arbez/silvermirror
author | k0s <k0scist@gmail.com> |
---|---|
date | Mon, 07 Sep 2009 15:39:06 -0400 |
parents | |
children | 9b139702a8f9 |
files | setup.py silvermirror-whitepaper.txt silvermirror/__init__.py silvermirror/unify.py silvermirror/utils.py |
diffstat | 5 files changed, 435 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/setup.py Mon Sep 07 15:39:06 2009 -0400 @@ -0,0 +1,32 @@ +from setuptools import setup, find_packages + +version = '0.0' + +setup(name='silvermirror', + version=version, + description="mirror files", + long_description="""\ +""", + classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers + keywords='mirror unison', + author='Jeff Hammel', + author_email='k0scist@gmail.com', + url='http://explosivedecompression.net', + license='GPL', + packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), + include_package_data=True, + zip_safe=False, + install_requires=[ + # -*- Extra requirements: -*- + 'martINI', + 'netifaces' + ], + dependency_links=[ + 'https://svn.openplans.org/svn/standalone/martINI#egg=martINI', + ], + entry_points=""" + # -*- Entry points: -*- + [console_scripts] + silvermirror = silvermirror.unify:unify + """, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silvermirror-whitepaper.txt Mon Sep 07 15:39:06 2009 -0400 @@ -0,0 +1,228 @@ +SilverMirror Whitepaper + +It is necessary to maintain parallel directory structures of various +resources across an arbitrary number of computers. The traditional +approach is the central server model, where files live in one +canonical location and the network is used to give access to the data. +However, this model has deficiencies, chiefly among them that if the +server goes down or must be moved a considerable amount of effort must +be extended to set up a new central server. + +Distributed version control, often of nominal use (in the case where a +canonical trunk exists) is ideally suited to provide mirroring of +desired resources across computers. + +Implementation + +A front end to a DVCS - most likely mercurial but potentially bzr - +will be written to keep resources in sync across an arbitrary number +of computers. The front end, called SilverMirror, may be used to push +or pull changes to resources. Optionally, a daemon will monitor +changes to resources and push or pull changes at desired intervals. + +The use should be as natural as possible and require no interaction +for everday tasks. A resource consists of a directory and all +subdirectories and their contents. Once a resource is denoted as +versioned, any change to the resource's directory structure should be +mirrored across machines without user intervention. Files matching a +pattern may be ignored, either globally or on a per resource basis, +for the purpose of versioning. + +Configuration + +SilverMirror is configured via an INI file containing a section for +each resource and a section for application configuration. + +The main section, denoted [::SilverMirror::], has the following options: + + * directory: base directory for SilverMirror. The SilverMirror +configuration is stored in ${directory}/.silvermirror . If omitted, +the user's home directory is used. + + * ignore: global patterns of files and directories to ignore. Paths +matching these patterns will not be versioned. + +Each section has the following configuration options: + + * directory: path of the resource. If a relative path is used, it is +joined with the directory setting from the main section. If this +setting is not specified, the section name is used as a relative path. + + * ignore: paths not to version on a per resource basis. This is in +addition to the patterns specified by the ignore setting in the main +section. + + * conflict: handler to resolve conflict. + + * hosts: hosts to push/pull from + +In order to ensure coherency among resources, all relevant +configuration options must be synced prior to push/pull transactions. + +Default Configuration: + +[::SilverMirror::] +conflict = ClobberRemote + +Example of a more complex configuration: + +[::SilverMirror::] +conflict.push = ClobberRemote +conflict.pull = ClobberLocal + +Push + +Push changes to remote resources. When resources are pushed, first +changes are pulled from each remote host in turn, conflicts between +local and remote changes are resolved (see Behavior on Conflicts), +then local modifications are pushed. This is done to keep the +resources in sync. + +When new files are added to the resource they are automatically added +to the hg repository. When resource files are edited the changes are +pushed to the repository. When a conflict occurs between local +resources and remote resources, the conflict handler is used. + +Pull + +Get changes to the cloud filesystem resources. If no host is +specified, pull changes from all known + accessible hosts. + +Namespaced Resources + +It is possible to maintain versioning of a subdirectory within a +resource. + +Example: + +[docs] +directory = /path/to/docs + +[docs:private] + +This configuration snippet describes a resource, [docs:private], +namespaced within the [docs] resource. [docs:private] inherits +configuration and behavior from [docs] but may be dealt with +separately. For example, some computers in the cloud may not have +[docs:private] specified in their configuration and so will not get a +copy of it upon pulling. A common use case is specifying a +subdirectory to be omitted with the ignore option in the configuration +file, then, when this subdirectory needs to be shared between multiple +computers, removing it from the ignore values and including as a +namespaced resource. + +In the above example, because the directory option was not specified +in the [docs:private] section, the path to [docs:private] is taken +from its namespace (private) and the directory of its parent resource. +So its base directory is /path/to/docs/private . If a relative path +was specified in the directory option of the [docs:private] section, +it would be joined with the base directory of [docs]. + +Behavior on Conflicts + +Conflict handlers are set via setuptools entry points. Several +conflict handlers are provided with SilverMirror: + + * ClobberLocal: replace local changes with changes from remote files + + * ClobberRemote: replace remote file changes with changes from local +files + + * Edit: invoke an editor (default: $EDITOR) to interactively resolve +the conflicts + +The conflict handler may also be specified from the command line: + +silvermirror push -d ClobberRemote + +Command Line Usage + +silvermirror [push|pull] [resource] [options] + +In the simplest invocation, SilverMirror is used with no command line +arguments: + +silvermirror + +This pushes changes of the resource as determined by the current +working directory after pulling outstanding changes from all +applicable remote computers and invoking the conflict handler for +push. If the current working directory is not within a resource, all +resources will be pushed. + +Finer control is obtained by specifying command line arguments: + +[push|pull] : whether to use the push method (which includes pulling +for changes; see above) or the pull method. If not specified, the +resource is pushed. + +[resource] : which resource to act upon. This can be the resource +name, as specified in the .ini file, or the path to the base directory +of the resource. Note that if a path is specified, it must be to the +base directory of the resource as SilverMirror has no notion of +disparate versioning within a resource. If the resource is not +specified, the resource that the current working directory is within +is used. If the current working directory is not in a resource path, +all resources are acted upon in turn. If the key word "--all" is used, +all resources will also be acted upon. + +[options] : several command line switches are available to the +silvermirror program: + + * -d <handler> : specify which conflict handler to use. <handler> +should be the name of the desired conflict handler. A list of all +conflict handlers is available with the "--conflict-handlers" option. + + * -H <host>, --host=<host> : pull and/or push only to specified +hosts. If this option is used more than once, the hosts specified will +be acted upon. + + * --conflict-handlers : list the name and description (if +specified) for all available conflict handlers. + + * --help : displays help and usage information + +Behavior Respecting Versioned Directories + +SilverMirror does not desire to duplicate versioning on directories +already under version control (svn, bzr, hg). These resources are +automatically ignore. In a future implementation, these resources +would optionally be checked out or updated upon a pull. + +Automatic Syncronization + +SilverMirror includes a script that will automatically invoke +syncronizing the resources in a specified period of time. This daemon, +called silvermirrord, is invoked from the command line with options +parallel to the silvermirror program. One additional option, -s, tells +how many seconds between syncs. Upon invocation, this program puts +itself in the background and performs the desired sync every number of +seconds specified. It is important that the conflict handler specified +is noninteractive, otherwise the daemon will hang forever. + +As an alternative, the silvermirror program may be invoked from a cron +job. + +Future Work + +SilverMirror implements a cloud filesystem which may be accessed +nearly transparently by an arbitrary number of computers. Several +improvements could extend SilverMirror to solve several deficiencies +of modern filesystem. + + * tagging: in most filesystems, a file has a canonical location. +However, it may be desirable to have the file accesible via multiple +paths. In practice, this is achieved via symbolic links. However, this +requires manual maintaince of links vs the canonical location. Noting +that this problem is identical to tagging, a solution minimizing +manual intervention could be added to SilverMirror. + + * update of web documents: modern computers deal heavily with +documents via URLs. It is noted that this includes files, the URL of a +file with path ${PATH} being file://${PATH} noted implicitly from +contexts. However, existing shells and operating systems have no +mechanism for indicating that a "file" is a web resource. Such +functionality could be added to SilverMirror so that up-to-date +versions of web resources could be maintained. This infrastructure +could also include notions for updating versioned resources (see +Behavior Respecting Versioned Directories) with parallel notation.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silvermirror/__init__.py Mon Sep 07 15:39:06 2009 -0400 @@ -0,0 +1,1 @@ +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silvermirror/unify.py Mon Sep 07 15:39:06 2009 -0400 @@ -0,0 +1,155 @@ +#!/usr/bin/env python + +import getpass +import os +import pexpect +import socket +import subprocess +import sys + +from martini.config import ConfigMunger +from optparse import OptionParser +from pprint import pprint +from utils import home +from utils import ip_addresses + +def make_config(filename): + # XXX needed? + raise NotImplementedError('Need to specify a config file, like\n~/silvermirror/silvermirror.ini') + +def read_config(filename): + config = ConfigMunger(filename).dict() + + ### main configuration + main = config.pop('::SilverMirror::', {}) + if not main.get('basedir'): + main['basedir'] = home() + main['ignore'] = main.get('ignore', '').split() # patterns to ignore - not used + main['hosts'] = main.get('hosts', '').split() + main['password'] = main.get('password', 'true') # XXX not used + main['timeout'] = float(main.get('timeout', '10.')) + + ### resources + for resource in config: + + # directory of resource + directory = config[resource].get('directory', resource) + if not os.path.isabs(directory): + # XXX note: absolute directories will not work for now + # XXX so....don't do this! + directory = os.path.join(main['basedir'], directory) + config[resource]['directory'] = directory.rstrip(os.path.sep) + + # per-resource files to ignore + # XXX regexps for now (see `man unison`) + # - this is bad as whitespace patterns cannot be ignored + ignore = main['ignore'][:] + if config[resource].has_key('ignore'): + ignore += config[resource]['ignore'].split() + config[resource]['ignore'] = ignore + + ### + config = { 'main': main, 'resources': config } + return config + +def unify(args=sys.argv[1:]): + + # passwords + pw = {} + + ### command line options + parser = OptionParser() + parser.add_option('-c', '--config') + parser.add_option('-H', '--host', dest='hosts', + action='append', default=None) + parser.add_option('--no-password', dest='password', + action='store_false', default=True) + parser.add_option('--test', dest='test', + action='store_true', default=False) + (options, args) = parser.parse_args() + + ### configuration + user_conf = os.path.join(home(), '.silvermirror') + if options.config: + assert os.path.exists(options.config) + conf = read_config(options.config) + else: + for i in user_conf, '/etc/silvermirror': + if os.path.exists(i): + conf = read_config(i) + break + else: + conf = make_config(user_conf) + + # XXX needed for now + assert conf['main']['basedir'] == home() + + ### determine hosts to sync with + hosts = set(options.hosts or conf['main']['hosts']) + addresses = ip_addresses().values() + hosts = hosts.difference(addresses) # don't sync with self + _hosts = [] + for host in hosts: + s = socket.socket() + s.settimeout(conf['main']['timeout']) + if options.test: + print 'Resolving %s' % host + try: + s.connect((host, 22)) + s.close() + except (socket.gaierror, socket.timeout, socket.error): + continue + _hosts.append(host) + hosts = _hosts + if options.test: + print 'Hosts:' + for host in hosts: + print host + assert hosts + + if options.password and conf['main']['password']: + for host in hosts: + pw[host] = getpass.getpass('Enter password for %s: ' % host) + # TODO: ensure that the hosts are resolvable + # XXX: hosts should actually be manageable on a per-resource basis + + ### determine resources to sync + cwd = os.path.realpath(os.getcwd()) + resources = conf['resources'] + _resources = args + if 'all' not in _resources: + if _resources: + resources = dict([(key, value) for key, value in resources.items() + if key in _resources]) + else: + for key, value in resources.items(): + directory = os.path.realpath(value['directory']) + os.sep + if (cwd + os.sep).startswith(directory): + resources = { key: value } + break + if options.test: + print 'Resources:' + pprint(resources) + + ### sync with hosts + os.chdir(conf['main']['basedir']) + for resource in resources: + for host in hosts: + command = ['unison', '-auto', '-batch', resource, 'ssh://%s/%s' % (host, resource)] + + # XXX - to refactor? + for i in resources[resource]['ignore']: + command.extend(('-ignore', "'Name %s'" % i)) + + command = ' '.join(command) + print command # XXX debug + if not options.test: + child = pexpect.spawn(command, timeout=36000, maxread=1) + child.expect('password: ') + child.sendline(pw[host]) + print child.read() + # subprocess.call(command) + os.chdir(cwd) + +if __name__ == '__main__': + unify()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silvermirror/utils.py Mon Sep 07 15:39:06 2009 -0400 @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +import netifaces +import os + +def home(): + # XXX needed? better way of doing? + return os.environ['HOME'] + +def ip_addresses(): + """dictionary of ip4 addresses for the machine""" + return dict([(i, netifaces.ifaddresses(i)[2][0]['addr']) + for i in netifaces.interfaces() + if netifaces.ifaddresses(i).get(2) + ]) + +if __name__ == '__main__': + for name, value in sorted(ip_addresses().items()): + print '%s : %s' % (name, value)