cpython/Doc/tools/findacks

#!/usr/bin/env python
"""Script to locate email addresses in the CVS logs."""
__version__ = '$Revision$'

import os
import re
import sys
import UserDict

import cvsinfo


class Acknowledgements(UserDict.UserDict):
    def add(self, email, name, path):
        d = self.data
        d.setdefault(email, {})[path] = name


def open_cvs_log(info, paths=None):
    cvsroot = info.get_cvsroot()
    cmd = "cvs -q -d%s log " % cvsroot
    if paths:
        cmd += " ".join(paths)
    return os.popen(cmd, "r")


email_rx = re.compile("<([a-z][-a-z0-9._]*@[-a-z0-9.]+)>", re.IGNORECASE)

def find_acks(f, acks):
    prev = ''
    filename = None
    MAGIC_WORDS = ('van', 'von')
    while 1:
        line = f.readline()
        if not line:
            break
        if line.startswith("Working file: "):
            filename = line.split(None, 2)[2].strip()
            prev = line
            continue
        m = email_rx.search(line)
        if m:
            words = prev.split() + line[:m.start()].split()
            L = []
            while words \
                  and (words[-1][0].isupper() or words[-1] in MAGIC_WORDS):
                L.insert(0, words.pop())
            name = " ".join(L)
            email = m.group(1).lower()
            acks.add(email, name, filename)
        prev = line


def load_cvs_log_acks(acks, args):
    repolist = cvsinfo.get_repository_list(args or [""])
    for info, paths in repolist:
        print >>sys.stderr, "Repository:", info.get_cvsroot()
        f = open_cvs_log(info, paths)
        find_acks(f, acks)
        f.close()


def load_tex_source_acks(acks, args):
    for path in args:
        path = path or os.curdir
        if os.path.isfile(path):
            read_acks_from_tex_file(acks, path)
        else:
            read_acks_from_tex_dir(acks, path)


def read_acks_from_tex_file(acks, path):
    f = open(path)
    while 1:
        line = f.readline()
        if not line:
            break
        if line.startswith(r"\sectionauthor{"):
            line = line[len(r"\sectionauthor"):]
            name, line = extract_tex_group(line)
            email, line = extract_tex_group(line)
            acks.add(email, name, path)


def read_acks_from_tex_dir(acks, path):
    stack = [path]
    while stack:
        p = stack.pop()
        for n in os.listdir(p):
            n = os.path.join(p, n)
            if os.path.isdir(n):
                stack.insert(0, n)
            elif os.path.normpath(n).endswith(".tex"):
                read_acks_from_tex_file(acks, n)


def extract_tex_group(s):
    c = 0
    for i in range(len(s)):
        if s[i] == '{':
            c += 1
        elif s[i] == '}':
            c -= 1
            if c == 0:
                return s[1:i], s[i+1:]


def print_acks(acks):
    first = 1
    for email, D in acks.items():
        if first:
            first = 0
        else:
            print
        L = D.items()
        L.sort()
        prefname = L[0][1]
        for file, name in L[1:]:
            if name != prefname:
                prefname = ""
                break
        if prefname:
            print prefname, "<%s>:" % email
        else:
            print email + ":"
        for file, name in L:
            if name == prefname:
                print "    " + file
            else:
                print "    %s (as %s)" % (file, name)


def print_ack_names(acks):
    names = []
    for email, D in acks.items():
        L = D.items()
        L.sort()
        prefname = L[0][1]
        for file, name in L[1:]:
            prefname = prefname or name
        names.append(prefname or email)
    def f(s1, s2):
        s1 = s1.lower()
        s2 = s2.lower()
        return cmp((s1.split()[-1], s1),
                   (s2.split()[-1], s2))
    names.sort(f)
    for name in names:
        print name


def main():
    args = sys.argv[1:]
    acks = Acknowledgements()
    load_cvs_log_acks(acks, args)
    load_tex_source_acks(acks, args)
    print_ack_names(acks)


if __name__ == "__main__":
    main()