view commitwatcher/agent.py @ 18:53533334469f

commitwatcher/agent.py
author Jeff Hammel <jhammel@mozilla.com>
date Sat, 28 Sep 2013 10:55:49 -0700
parents 9ec036da252e
children 091fd9f40b05
line wrap: on
line source

"""
agents to gather commits
"""

import feedparser
import os
from abc import abstractmethod
from pypatch import patch
from .commit import Commit
from .store import MemoryStore


class Agent(object):
    """abstract base class"""

    def __init__(self, repository, store=None):
        """
        repository -- repo to monitor
        """
        self.repository = repository
        self.store = MemoryStore() if store is None else store


class LocalCheckoutAgent(object):
    """agent based on local checkouts"""


class FeedAgent(Agent):
    """gathers changesets by reading RSS/Atom"""

    def feed(self):
        """feed URL"""
        return '/'.join((self.repository.rstrip('/'), 'atom-log'))

    @abstractmethod
    def files(self, revision):
        """gets the files from the revision link"""

    def update(self):
        """update"""

        feed = feedparser.parse(self.feed())
        for entry in feed['entries']:

            link = entry['link']
            files = self.files(link)
            # TODO

            # TODO            commit = Commit()

class FeedAgentDiff(FeedAgent):
    """read files from diff"""

    @staticmethod
    def lsdiff(diff):

        if '://' in diff:
            factory = patch.fromurl
        elif os.path.exists(diff):
            factory = patch.fromfile
        else:
            factory = patch.fromstring
        patchset = factory(diff)

        files = {}
        for p in patchset.items:

            # before, after
            a_b = {}
            for i in ('source', 'target'):
                a_b[i] = getattr(p, i)

                # strip 'a/', 'b/' from front, just to make sure
                # XXX because 
                for prefix in ('a/', 'b/'):
                    if a_b[i].startswith(prefix):
                        a_b[i] = a_b[i][len(prefix):]
                        break

            # added, modified, removed, renamed
            if a_b['source'] == a_b['target']:
                files.setdefault('modified', set()).add(a_b['source'])
            elif a_b['source'] in ('/dev/null', 'dev/null'):
                files.setdefault('added', set()).add(a_b['target'])
            elif a_b['target'] in ('/dev/null', 'dev/null'):
                files.setdefault('removed', set()).add(a_b['source'])
            else:
                raise NotImplementedError("source: %s;  target: %s" % (a_b['source'], a_b['target']))

        # xxx flatten for simplicity for now and hope i don't regret this
        files = set(sum([list(item) for item in files.values()], []))

        return files

    def diff_url(self, link):
        """
        returns diff_url from revision link:

        >>> diff_url('http://hg.mozilla.org/mozilla-central/rev/4e1a3919e741')
        'http://hg.mozilla.org/mozilla-central/raw-rev/4e1a3919e741'
        """
        return '/raw-rev/'.join(link.rsplit('/rev/', 1))

    def files(self, revision):
        """
        revision -- revision link
        """

        raw_rev = self.diff_url(revision)

        # get paths from diff
        paths = self.lsdiff(raw_rev)
        print '%s :\n%s\n' % (revision,
                              '\n'.join(['  %s' % path
                                         for path in
                                         sorted(paths)))