# Copyright 2005, 2006 by Simon 'corecode' Schubert <corecode@fs.ei.tum.de>
#
# This program is free software under the GNU GPL (>=v2)
# Read the file COPYING coming with the software for details.

import sys
import time
import pickle

from CVStools import CVSrepo

class changeset:
    def __init__(self, author=None, date=None, msg=None, enddate=None,
                 revs=None):
        self.author = author
        self.date = date
        self.enddate = enddate or self.date
        self.msg = msg
        self.revs = revs or []

    def __cmp__(self, other):
        # sort order: date, enddate, author, msg
        return (cmp(self.date, other.date) or
                cmp(self.enddate, other.enddate) or
                cmp(self.author, other.author) or
                cmp(self.msg, other.msg))

    def __str__(self):
        return ("changeset by %s at %d-%d, files %s"
                % (self.author, self.date, self.enddate,
                   [f[0] for f in self.revs]))

def branchpoint(branch):
    if branch.count('.') % 2:
        rev = '.'.join(branch.split('.')[:-2])
    else:
        rev = '.'.join(branch.split('.')[:-1])
    return rev

def branchstart(f, rev):
    r = rev.split('.')
    if len(r) <= 2:
        raise "%s: Not a branch!" % rev
    if len(r) % 2:
        # revision is x.y.z
        # -> 
        #   brprefix = x.y.z.
        #   brpoint = x.y
        brprefix = '.'.join(r) + '.'
        brpoint = '.'.join(r[:-1])
    else:
        # revision is x.y.z.t
        # -> 
        #   brprefix = x.y.z.
        #   brpoint = x.y
        brprefix = '.'.join(r[:-1]) + '.'
        brpoint = '.'.join(r[:-2])
    brrev = f.revs[brpoint]
    # find first branch revision at branch point
    for br in brrev[4]:
        if br.startswith(brprefix):
            return br
    raise "Branch %s not found" % brprefix

def branchdates(m, tag):
    # return revision and date for revisions just before and just after
    # the branch.
    f = m.file()
    revs = f.revs
    if not tag in f.symbols:
        return None, None, None, None
    lastrev = f.sym2rev(tag)
    if lastrev.count('.') < 2:
        return None, None, None, None
        #return 0, None, revs[lastrev][1], lastrev
    firstrev = branchstart(f, lastrev)
    beforerev = branchpoint(firstrev)
    dateafter = revs[firstrev][1]
    datebefore = revs[beforerev][1]
    if datebefore > dateafter:
        raise Error
    return datebefore, beforerev, dateafter, firstrev

def nextcvsrev(f, tag=None, tagdate=None):
    # if the tip is on a branch, we need to track it there.
    # this will happen for unchanged vendor sources
    nextrev = f.head
    revs = f.revs
    list = []
    if f.branch:
        nextrev = f.branch
    tagok = True
    if tag != None:
        if tag in f.symbols:
            nextrev = f.sym2rev(tag)
            print "TAG", tag, "REV", nextrev
        else:
            # convert main revision trunk
            # XXX: this means we only know how to deal with branches
            # from the main trunk!
            tagok = False
            print "UNTAGGED REV", nextrev
    while nextrev:
        while nextrev.count('.') > 1:
            # revision is in a branch
            # revisions in branches are stored with reverse
            # pointers: follow the branch from the start until we
            # get to the desired revision, storing revisions
            # as we go...
            branch = nextrev
            nextrev = branchstart(f, branch)
            r = []
            while nextrev:
                rev = revs[nextrev]
                r.append(rev)
                if nextrev == branch:
                    break
                nextrev = rev[5]
            # ...then yield each revision in reverse order.
            while len(r):
                rev = r.pop()
                list.append(rev)
            # compute revision to continue from
            if branch.count('.') % 2:
                nextrev = '.'.join(branch.split('.')[:-2])
            else:
                nextrev = '.'.join(branch.split('.')[:-1])
        rev = revs[nextrev]
        list.append(rev)
        nextrev = rev[5]

    list.sort(lambda a,b: cmp(a[1], b[1]) or cmp(a[0], b[0]))

    if not tagok:
        # tag not found on this file, this may mean:
        if list[0][1] >= tagdate:
            # the file was added after the branch was tagged:
            # ignore this file.
            return
        elif list[-1][1] < tagdate:
            if list[-1][3] == 'dead':
                # the file was deleted before the branch
                # was tagged: keep the revision list as is.
                pass
            else:
                # last revision is older than branch date, but file
                # still exists: it's probably not in our branch,
                # unless it's a nested branch.
                raise "Unexpected case, tag %s tagdate %d" % (tag, tagdate)
        else:
            # First revision is older than tag date,
            # last revision is newer than tag date.
            for rev in list:
                print "REV", rev
            lastrev = None
            newlist = []
            for r in list:
                if r[1] >= tagdate:
                    if lastrev and lastrev[3] == 'dead':
                        # file did not exist at branch time,
                        # return trimmed revision list
                        list = newlist
                        break
                    # weird, file did exist at branch time:
                    # assume we got the tag date wrong, don't convert
                    # revisions for this file.
                    print "KILL"
                    return
                lastrev = r
                newlist.append(r)
            else:
                raise "Unexpected case, tag %s tagdate %d" % (tag, tagdate)

    isdead = False
    for rev in list:
        if rev[3] == 'dead':
            if isdead:
                # Bug in old CVS repositories:
                # duplicate "dead" revision.
                # Drop it.
                continue
            isdead = True
        else:
            isdead = False
        #print "YIELD", rev
        yield rev

CH_ADD = 1
CH_DEL = 2
CH_MOD = 3

class ChastAggregator:
    def __init__(self, cvsroot, module, progress, files=None, since=0):
        self.repo = CVSrepo(cvsroot)
        self.module = module
        self.files = set()
        self.oldfiles = set(files or [])
        self.addfiles = set()
        self.delfiles = set()
        self.modfiles = set()
        self.progress = progress
        self.since = since

    def _changedfiles(self):
        self.progress("Scanning for files")
        for f in self.repo.walk(self.module):
            self.files.add(f.path)
            if f.time < self.since:
                continue
            yield f

    def _tagdate(self, tag):
        # Find date for earliest commit on this branch
        datebefore, dateafter = None, None
        # the following variables are for debug purposes
        pathbefore, revbefore = None, None
        pathafter, revafter = None, None
        for f in self.repo.walk(self.module):
            dbef, rbef, daft, raft = branchdates(f, tag)
            if not dbef or not daft:
                continue
            if dateafter and dbef > dateafter:
                continue
            if datebefore and datebefore > daft:
                continue
            if not datebefore or datebefore < dbef:
                datebefore = dbef
                pathbefore = f.path
                revbefore = rbef
            if not dateafter or dateafter > daft:
                dateafter = daft
                pathafter = f.path
                revafter = raft
        assert datebefore <= dateafter
        return dateafter

    def _changedfilerevs(self, m, tag=None, tagdate=None):
        f = m.file()
        lastrev = None
        revs = []
        for rev in nextcvsrev(f, tag, tagdate):
            if rev[1] < self.since:
                lastrev = rev
            else:
                revs.append(rev)

        #revs.sort(lambda a,b: cmp(a[1], b[1]) or cmp(a[0], b[0]))
        for rev in revs:
            #print "FILE", m.path, "lastrev", lastrev
            if rev[3] == "dead":
                # It was born dead
                if not lastrev:
                    continue
                action = CH_DEL
                self.delfiles.add(m.path)
            elif not lastrev or lastrev[3] == "dead":
                action = CH_ADD
                self.addfiles.add(m.path)
                try:
                    self.delfiles.remove(m.path)
                except KeyError:
                    pass
            else:
                self.modfiles.add(m.path)
                action = CH_MOD

            yield (m.path, rev[0], rev[1], rev[2], action)

            lastrev = rev

    def _aggrchanges(self, tag=None, tagdate=None):
        sets = []
        revs = []
        for f in self._changedfiles():
            self.progress("Reading %s" % f.path)
            revs.extend(self._changedfilerevs(f, tag, tagdate))

        self.progress("Sorting")

        def _cmprevs(a, b, hard=True):
            r = cmp(a[3], b[3])
            if r != 0:
                return r
            if hard:
                return cmp(a[2], b[2])
            r = a[2] - b[2]
            if abs(r) > 180:
                return r
            # potentially the same (modulo logmsg)
            return 0

        revs.sort(_cmprevs)

        class fcache:
            def __init__(self, createfunc=None):
                self.func = createfunc
                self.cache = {}

            def __getitem__(self, item):
                if len(self.cache) > 2000:
                    self._flush()
                r = self.cache.get(item)
                if r:
                    return r
                try:
                    r = self.func(item)
                except:
                    self._flush()
                    r = self.func(item)
                self.cache[item] = r
                return r

            def _flush(self):
                """Flush a part of the cache"""
                c = len(self.cache) / 2
                while c > 0:
                    self.cache.popitem()
                    c -= 1

        files = fcache(self.repo.file)

        # now we got revs sorted, concurrent checkins
        # by the same author grouped together.
        conc = []
        l = 0
        for c in revs:
            l += 1
            if l % 1000 == 1:
                self.progress("Aggregating %d revs (%d%%)"
                              % (len(revs), l * 100 / len(revs)))
            set_ = None
            logmsg = files[c[0]].getlog(c[1]).rstrip()  # strip trailing \n
            rmrevs = []
            for cc in conc:
                # check the last date to have a
                # so that we have a sliding window
                if _cmprevs(c, cc.revs[-1], False) != 0:
                    rmrevs.append(cc)
                    continue
                # if we got here, we match potentially.
                # just have to check the log messages
                if cc.msg == logmsg:
                    set_ = cc
                    break

            for rr in rmrevs:
                conc.remove(rr)

            if not set_:
                # need to create a new set.
                set_ = changeset(author=c[3], msg=logmsg, date=c[2])
                conc.append(set_)
                sets.append(set_)
            else:
                # update end date
                set_.enddate = c[2]

            set_.revs.append(c)

        sets.sort()
        return sets

    def changes(self, tag=None):
        if tag:
            self.progress("Finding tag date for %s" % tag)
            tagdate = self._tagdate(tag)
            self.progress("Date for tag %s is %s" % (tag, time.asctime(time.gmtime(tagdate))))
        else:
            tagdate = None
        sets = self._aggrchanges(tag, tagdate)
        if self.since != 0:
            c = changeset(author="repo surgery", msg="repo surgery",
                          date=self.since)
            for df in self.oldfiles - (self.files | self.delfiles):
                self.progress("Fixing up repo: %s disappeared" % df)
                c.revs.append((df, "none", c.date, c.author, CH_DEL))
            # All files which were modified or deleted, but were not
            # present before or added in the meanwhile, have to be repo
            # copied.
            for af in (self.modfiles | self.delfiles) - \
                            (self.oldfiles | self.addfiles):
                self.progress("Fixing up repo: %s appeared" % af)
                d = ("none", -1)
                for r in nextcvsrev(self.repo.file(af), tag, tagdate):
                    if r[1] < self.since and d[1] < r[1]:
                        d = r
                # If the file was deleted, don't add it at all
                if d[3] == "dead":
                    continue
                c.revs.append((af, d[0], c.date, c.author, CH_ADD))
            if c.revs:
                sets.insert(0, c)
        self.files -= self.delfiles
        self.oldfiles = self.files
        if sets:
            self.since = sets[-1].date
        return sets


if __name__ == "__main__":
    since = 0
    if len(sys.argv) > 1:
        repo = sys.argv[1]
        module = sys.argv[2]
        if len(sys.argv) > 3:
            since = int(sys.argv[3])
    else:
        repo = "test/repo"
        module = "test"

    files = None
    filesname = ((repo + "/" + module).replace("/", "-") + ".files").strip("-")
    try:
        up = pickle.Unpickler(file(filesname))
        since = up.load()
        files = up.load()
    except:
        pass

    def printfn(s):
        print s
    ca = ChastAggregator(repo, module, progress=printfn, files=files)
    changes = ca.changes(since)
    for c in changes:
        print ("Change at %s by %s:"
               % (time.asctime(time.gmtime(c[0][2])), c[0][3]))
        for f in c:
            print "\t%s %s %s" % (f[4], f[0], f[1])
    if changes:
        lastchange = changes[-1][-1][2]
    else:
        lastchange = since
    try:
        p = pickle.Pickler(file(filesname, 'w'), -1)
        p.dump(lastchange)
        p.dump(ca.files)
    except:
        print "can't write files data to %s" % filesname

