changeset 0:abb358e2434c

initial commit of silvermirror, from http://my-svn.assembla.com/svn/arbez/silvermirror
author k0s <k0scist@gmail.com>
date Mon, 07 Sep 2009 15:39:06 -0400
parents
children 9b139702a8f9
files setup.py silvermirror-whitepaper.txt silvermirror/__init__.py silvermirror/unify.py silvermirror/utils.py
diffstat 5 files changed, 435 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/setup.py	Mon Sep 07 15:39:06 2009 -0400
@@ -0,0 +1,32 @@
+from setuptools import setup, find_packages
+
+version = '0.0'
+
+setup(name='silvermirror',
+      version=version,
+      description="mirror files",
+      long_description="""\
+""",
+      classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
+      keywords='mirror unison',
+      author='Jeff Hammel',
+      author_email='k0scist@gmail.com',
+      url='http://explosivedecompression.net',
+      license='GPL',
+      packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
+      include_package_data=True,
+      zip_safe=False,
+      install_requires=[
+          # -*- Extra requirements: -*-
+        'martINI',
+        'netifaces'
+      ],
+      dependency_links=[
+        'https://svn.openplans.org/svn/standalone/martINI#egg=martINI',
+        ],
+      entry_points="""
+      # -*- Entry points: -*-
+      [console_scripts]
+      silvermirror = silvermirror.unify:unify
+      """,
+      )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silvermirror-whitepaper.txt	Mon Sep 07 15:39:06 2009 -0400
@@ -0,0 +1,228 @@
+SilverMirror Whitepaper
+
+It is necessary to maintain parallel directory structures of various  
+resources across an arbitrary number of computers. The traditional  
+approach is the central server model, where files live in one  
+canonical location and the network is used to give access to the data.  
+However, this model has deficiencies, chiefly among them that if the  
+server goes down or must be moved a considerable amount of effort must  
+be extended to set up a new central server.
+
+Distributed version control, often of nominal use (in the case where a  
+canonical trunk exists) is ideally suited to provide mirroring of  
+desired resources across computers.
+
+Implementation
+
+A front end to a DVCS - most likely mercurial but potentially bzr -  
+will be written to keep resources in sync across an arbitrary number  
+of computers. The front end, called SilverMirror, may be used to push  
+or pull changes to resources. Optionally, a daemon will monitor  
+changes to resources and push or pull changes at desired intervals.
+
+The use should be as natural as possible and require no interaction  
+for everday tasks. A resource consists of a directory and all  
+subdirectories and their contents. Once a resource is denoted as  
+versioned, any change to the resource's directory structure should be  
+mirrored across machines without user intervention. Files matching a  
+pattern may be ignored, either globally or on a per resource basis,  
+for the purpose of versioning.
+
+Configuration
+
+SilverMirror is configured via an INI file containing a section for  
+each resource and a section for application configuration.
+
+The main section, denoted [::SilverMirror::], has the following options:
+
+  * directory: base directory for SilverMirror. The SilverMirror  
+configuration is stored in ${directory}/.silvermirror . If omitted,  
+the user's home directory is used.
+
+  * ignore: global patterns of files and directories to ignore. Paths  
+matching these patterns will not be versioned.
+
+Each section has the following configuration options:
+
+  * directory: path of the resource. If a relative path is used, it is  
+joined with the directory setting from the main section. If this  
+setting is not specified, the section name is used as a relative path.
+
+  * ignore: paths not to version on a per resource basis. This is in  
+addition to the patterns specified by the ignore setting in the main  
+section.
+
+  * conflict: handler to resolve conflict.
+
+  * hosts: hosts to push/pull from
+
+In order to ensure coherency among resources, all relevant  
+configuration options must be synced prior to push/pull transactions.
+
+Default Configuration:
+
+[::SilverMirror::]
+conflict = ClobberRemote
+
+Example of a more complex configuration:
+
+[::SilverMirror::]
+conflict.push = ClobberRemote
+conflict.pull = ClobberLocal
+
+Push
+
+Push changes to remote resources. When resources are pushed, first  
+changes are pulled from each remote host in turn, conflicts between  
+local and remote changes are resolved (see Behavior on Conflicts),  
+then local modifications are pushed. This is done to keep the  
+resources in sync.
+
+When new files are added to the resource they are automatically added  
+to the hg repository. When resource files are edited the changes are  
+pushed to the repository. When a conflict occurs between local  
+resources and remote resources, the conflict handler is used.
+
+Pull
+
+Get changes to the cloud filesystem resources. If no host is  
+specified, pull changes from all known + accessible hosts.
+
+Namespaced Resources
+
+It is possible to maintain versioning of a subdirectory within a  
+resource.
+
+Example:
+
+[docs]
+directory = /path/to/docs
+
+[docs:private]
+
+This configuration snippet describes a resource, [docs:private],  
+namespaced within the [docs] resource. [docs:private] inherits  
+configuration and behavior from [docs] but may be dealt with  
+separately. For example, some computers in the cloud may not have  
+[docs:private] specified in their configuration and so will not get a  
+copy of it upon pulling. A common use case is specifying a  
+subdirectory to be omitted with the ignore option in the configuration  
+file, then, when this subdirectory needs to be shared between multiple  
+computers, removing it from the ignore values and including as a  
+namespaced resource.
+
+In the above example, because the directory option was not specified  
+in the [docs:private] section, the path to [docs:private] is taken  
+from its namespace (private) and the directory of its parent resource.  
+So its base directory  is /path/to/docs/private . If a relative path  
+was specified in the directory option of the [docs:private] section,  
+it would be joined with the base directory of [docs].
+
+Behavior on Conflicts
+
+Conflict handlers are set via setuptools entry points. Several  
+conflict handlers are provided with SilverMirror:
+
+  * ClobberLocal: replace local changes with changes from remote files
+
+  * ClobberRemote: replace remote file changes with changes from local  
+files
+
+  * Edit: invoke an editor (default: $EDITOR) to interactively resolve  
+the conflicts
+
+The conflict handler may also be specified from the command line:
+
+silvermirror push -d ClobberRemote
+
+Command Line Usage
+
+silvermirror [push|pull] [resource] [options]
+
+In the simplest invocation, SilverMirror is used with no command line  
+arguments:
+
+silvermirror
+
+This pushes changes of the resource as determined by the current  
+working directory after pulling outstanding changes from all  
+applicable remote computers and invoking the conflict handler for  
+push. If the current working directory is not within a resource, all  
+resources will be pushed.
+
+Finer control is obtained by specifying command line arguments:
+
+[push|pull] :  whether to use the push method (which includes pulling  
+for changes; see above) or the pull method. If not specified, the  
+resource is pushed.
+
+[resource] :  which resource to act upon. This can be the resource  
+name, as specified in the .ini file, or the path to the base directory  
+of the resource. Note that if a path is specified, it must be to the  
+base directory of the resource as SilverMirror has no notion of  
+disparate versioning within a resource. If the resource is not  
+specified, the resource that the current working directory is within  
+is used. If the current working directory is not in a resource path,  
+all resources are acted upon in turn. If the key word "--all" is used,  
+all resources will also be acted upon.
+
+[options] : several command line switches are available to the  
+silvermirror program:
+
+  * -d <handler> : specify which conflict handler to use. <handler>  
+should be the name of the desired conflict handler. A list of all  
+conflict handlers is available with the "--conflict-handlers" option.
+
+  * -H <host>, --host=<host> : pull and/or push only to specified  
+hosts. If this option is used more than once, the hosts specified will  
+be acted upon.
+
+  * --conflict-handlers :  list the name and description (if  
+specified) for all available conflict handlers.
+
+  * --help : displays help and usage information
+
+Behavior Respecting Versioned Directories
+
+SilverMirror does not desire to duplicate versioning on directories  
+already under version control (svn, bzr, hg). These resources are  
+automatically ignore. In a future implementation, these resources  
+would optionally be checked out or updated upon a pull.
+
+Automatic Syncronization
+
+SilverMirror includes a script that will automatically invoke  
+syncronizing the resources in a specified period of time. This daemon,  
+called silvermirrord, is invoked from the command line with options  
+parallel to the silvermirror program. One additional option, -s, tells  
+how many seconds between syncs. Upon invocation, this program puts  
+itself in the background and performs the desired sync every number of  
+seconds specified. It is important that the conflict handler specified  
+is noninteractive, otherwise the daemon will hang forever.
+
+As an alternative, the silvermirror program may be invoked from a cron  
+job.
+
+Future Work
+
+SilverMirror implements a cloud filesystem which may be accessed  
+nearly  transparently by an arbitrary number of computers. Several  
+improvements could extend SilverMirror to solve several deficiencies  
+of modern filesystem.
+
+  * tagging: in most filesystems, a file has a canonical location.  
+However, it may be desirable to have the file accesible via multiple  
+paths. In practice, this is achieved via symbolic links. However, this  
+requires manual maintaince of links vs the canonical location. Noting  
+that this problem is identical to tagging, a solution minimizing  
+manual intervention could be added to SilverMirror.
+
+  * update of web documents: modern computers deal heavily with  
+documents via URLs. It is noted that this includes files, the URL of a  
+file with path ${PATH} being file://${PATH} noted implicitly from  
+contexts. However, existing shells and operating systems have no  
+mechanism for indicating that a "file" is a web resource. Such  
+functionality could be added to SilverMirror so that up-to-date
+versions of web resources could be maintained.  This infrastructure
+could also include notions for updating versioned resources (see
+Behavior Respecting Versioned Directories) with parallel notation.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silvermirror/__init__.py	Mon Sep 07 15:39:06 2009 -0400
@@ -0,0 +1,1 @@
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silvermirror/unify.py	Mon Sep 07 15:39:06 2009 -0400
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+import getpass
+import os
+import pexpect
+import socket
+import subprocess
+import sys
+
+from martini.config import ConfigMunger
+from optparse import OptionParser
+from pprint import pprint
+from utils import home
+from utils import ip_addresses
+
+def make_config(filename):
+    # XXX needed?
+    raise NotImplementedError('Need to specify a config file, like\n~/silvermirror/silvermirror.ini')
+
+def read_config(filename):
+    config = ConfigMunger(filename).dict()
+
+    ### main configuration
+    main = config.pop('::SilverMirror::', {})
+    if not main.get('basedir'):
+        main['basedir'] = home()
+    main['ignore'] = main.get('ignore', '').split() # patterns to ignore - not used
+    main['hosts'] = main.get('hosts', '').split()
+    main['password'] = main.get('password', 'true') # XXX not used
+    main['timeout'] = float(main.get('timeout', '10.'))
+
+    ### resources
+    for resource in config:
+
+        # directory of resource
+        directory = config[resource].get('directory', resource)
+        if not os.path.isabs(directory):
+            # XXX note: absolute directories will not work for now
+            # XXX so....don't do this!
+            directory = os.path.join(main['basedir'], directory)
+        config[resource]['directory'] = directory.rstrip(os.path.sep)
+
+        # per-resource files to ignore
+        # XXX  regexps for now (see `man unison`)
+        # - this is bad as whitespace patterns cannot be ignored
+        ignore = main['ignore'][:]
+        if config[resource].has_key('ignore'):
+            ignore += config[resource]['ignore'].split()
+        config[resource]['ignore'] = ignore
+
+    ###
+    config = { 'main': main, 'resources': config }
+    return config
+
+def unify(args=sys.argv[1:]):
+
+    # passwords
+    pw = {}
+
+    ### command line options
+    parser = OptionParser()
+    parser.add_option('-c', '--config')
+    parser.add_option('-H', '--host', dest='hosts',
+                      action='append', default=None)
+    parser.add_option('--no-password', dest='password',
+                      action='store_false', default=True)
+    parser.add_option('--test', dest='test', 
+                      action='store_true', default=False)
+    (options, args) = parser.parse_args()
+    
+    ### configuration
+    user_conf = os.path.join(home(), '.silvermirror')
+    if options.config:
+        assert os.path.exists(options.config)
+        conf = read_config(options.config)
+    else:
+        for i in user_conf, '/etc/silvermirror':
+            if os.path.exists(i):
+                conf = read_config(i)
+                break
+        else:
+            conf = make_config(user_conf)
+
+    # XXX needed for now
+    assert conf['main']['basedir'] == home()
+
+    ### determine hosts to sync with
+    hosts = set(options.hosts or conf['main']['hosts'])
+    addresses = ip_addresses().values()
+    hosts = hosts.difference(addresses) # don't sync with self
+    _hosts = []
+    for host in hosts:
+        s = socket.socket()
+        s.settimeout(conf['main']['timeout'])
+        if options.test:
+            print 'Resolving %s' % host
+        try: 
+            s.connect((host, 22))
+            s.close()
+        except (socket.gaierror, socket.timeout, socket.error):
+            continue
+        _hosts.append(host)
+    hosts = _hosts
+    if options.test:
+        print 'Hosts:'
+        for host in hosts:
+            print host
+    assert hosts
+
+    if options.password and conf['main']['password']:
+        for host in hosts:
+            pw[host] = getpass.getpass('Enter password for %s: ' % host)
+    # TODO: ensure that the hosts are resolvable
+    # XXX: hosts should actually be manageable on a per-resource basis
+
+    ### determine resources to sync
+    cwd = os.path.realpath(os.getcwd())
+    resources = conf['resources']
+    _resources = args
+    if 'all' not in _resources:
+        if _resources:
+            resources = dict([(key, value) for key, value in resources.items()
+                              if key in _resources])
+        else:
+            for key, value in resources.items():
+                directory = os.path.realpath(value['directory']) + os.sep
+                if (cwd + os.sep).startswith(directory):
+                    resources = { key: value }
+                    break
+    if options.test:
+        print 'Resources:'
+        pprint(resources)
+
+    ### sync with hosts
+    os.chdir(conf['main']['basedir'])
+    for resource in resources:
+        for host in hosts:
+            command = ['unison', '-auto', '-batch', resource, 'ssh://%s/%s' % (host, resource)]
+
+            # XXX - to refactor?
+            for i in resources[resource]['ignore']:
+                command.extend(('-ignore', "'Name %s'" % i))
+
+            command = ' '.join(command) 
+            print command # XXX debug 
+            if not options.test:
+                child = pexpect.spawn(command, timeout=36000, maxread=1)
+                child.expect('password: ')
+                child.sendline(pw[host])
+                print child.read()
+                #                subprocess.call(command)
+    os.chdir(cwd)
+
+if __name__ == '__main__':
+    unify()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/silvermirror/utils.py	Mon Sep 07 15:39:06 2009 -0400
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import netifaces
+import os
+
+def home():
+    # XXX needed? better way of doing?
+    return os.environ['HOME']
+
+def ip_addresses():
+    """dictionary of ip4 addresses for the machine"""
+    return dict([(i, netifaces.ifaddresses(i)[2][0]['addr'])
+                 for i in netifaces.interfaces()
+                 if netifaces.ifaddresses(i).get(2)
+                 ])
+
+if __name__ == '__main__':
+    for name, value in sorted(ip_addresses().items()):
+        print '%s : %s' % (name, value)