cpython/Doc/tools/listmodules

#! /usr/bin/env python
#  -*- Python -*-
#
#  This script can be used to identify undocumented modules in the Python
#  standard library.  Use it like this:
#
#  .../Doc/tools/listmodules --ignore-from .../Doc/paper-<paper>/modlib.idx

"""%(program)s - list modules in the Python standard library

-a, --annotate	  Annotate the module names with the subdirectory they
                  live in
-c, --categorize  Group the modules by subdirectory
-i <file>,

--ignore-from <file>	Ignore the modules listed in <file>.  <file> may
                  contain a list of module names or a module index file
                  as produced when formatting the Python documentation
                  (.idx or .html flavor).

If neither -a nor -c are given, the modules are listed in alphabetical
order.

Note that -a and -c are mutually exclusive.

Limitation: Modules loadable as shared objects may not be listed,
though this script attempts to locate such modules.

"""

__version__ = '$Revision$'

import getopt
import glob
import os
import re
import string
import sys


REMOVE_DIRS = ["dos-8x3", "encodings", "distutils",
               "lib-old", "lib-stdwin", "test"]


def main():
    args = sys.argv[1:]
    annotate = 0
    builtin = 0
    categorize = 0
    ignore_dict = {}
    ignore = ignore_dict.has_key
    try:
        opts, args = getopt.getopt(
            args, "abchi:",
            ["annotate", "built-in", "categorize", "help", "ignore-from="])
    except getopt.error, msg:
        sys.stdout = sys.stderr
        print msg
        print
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-a", "--annotate"):
            annotate = 1
        elif opt in ("-b", "--built-in"):
            builtin = 1
        elif opt in ("-c", "--categorize"):
            categorize = 1
        elif opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt in ("-i", "--ignore-from"):
            data = open(arg).read()
            if data[:1] == "\\":
                ignore_from_idx(data, ignore_dict)
            else:
                ignore_from_modulelist(data, ignore_dict)
    if args or (annotate and categorize):
        usage()
        sys.exit(2)
    #
    # Populate the database:
    #
    srcdir = os.path.normpath(os.path.join(
        os.path.dirname(sys.argv[0]), os.pardir, os.pardir))
    os.chdir(srcdir)
    modules_by_name = {}
    modules_by_dir = {}
    if builtin:
        l = []
        modules_by_dir["<builtin>"] = l
        for name in sys.builtin_module_names:
            if not ignore(name):
                modules_by_name[name] = "<built-in>"
                l.append(name)
    rx = re.compile("Lib/plat-[a-zA-Z0-9]*/")
    fp = os.popen("find Lib -name \*.py -print", "r")
    while 1:
        line = fp.readline()
        if not line:
            break
        m = rx.match(line)
        if m:
            line = "Lib/plat-*/" + line[m.end():]
        line = line[4:-4]                # strip off 'Lib/' and '.py\n'
        dir, name = os.path.split(line)
        dir = dir or "<standard>"
        if ignore(name):
            continue
        if dir not in REMOVE_DIRS:
            modules_by_name[name] = dir
            l = modules_by_dir.get(dir, [])
            modules_by_dir[dir] = l
            if name not in l:
                l.append(name)
    # load up extension modules:
    pwd = os.getcwd()
    try:
        os.chdir("Modules")
        dir = "<extension>"
        for line in glob.glob("*module.c"):
            name = line[:-8]
            if ignore(name) or modules_by_name.has_key(name) or name == "xx":
                continue
            modules_by_name[name] = dir
            l = modules_by_dir.get(dir, [])
            modules_by_dir[dir] = l
            if name not in l:
                l.append(name)
    finally:
        os.chdir(pwd)
    #
    # Dump the results:
    #
    if annotate:
        modules = modules_by_name.items()
        modules.sort()
        width = max(map(len, modules_by_name.keys()))
        format = "%%-%ds  %%s" % width
        for name, dir in modules:
            if dir and dir[0] != "<":
                print format % (name, dir)
            else:
                print name
    elif categorize:
        modules = modules_by_dir.items()
        modules.sort()
        width = max(map(len, modules_by_dir.keys()))
        format = "%%-%ds  %%s" % width
        for dir, names in modules:
            names.sort()
            print format % (dir, names[0])
            for name in names[1:]:
                print format % ('', name)
            print
    else:
        modules = modules_by_name.keys()
        modules.sort()
        print string.join(modules, "\n")


def ignore_from_modulelist(data, ignore_dict):
    for name in string.split(data):
        ignore_dict[name] = name

def ignore_from_idx(data, ignore_dict):
    data = string.replace(data, r"\hackscore  {}", "_")
    rx = re.compile(r"\\indexentry\s*{([^@]*)@")
    for line in string.split(data, "\n"):
        m = rx.match(line)
        if m:
            name = m.group(1)
            ignore_dict[name] = name


def usage():
    vars = {}
    vars["program"] = os.path.basename(sys.argv[0])
    print __doc__ % vars


if __name__ == "__main__":
    main()