btrfs-progs/btrfs-debugfs

#!/usr/bin/env python3
#
# Simple python program to print out all the extents of a single file
# LGPLv2 license
# Copyright Facebook 2014

import sys, os, fcntl, ctypes, stat, argparse

# helpers for max ints
maxu64 = (1 << 64) - 1
maxu32 = (1 << 32) - 1

# the inode (like form stat)
BTRFS_INODE_ITEM_KEY = 1
# backref to the directory
BTRFS_INODE_REF_KEY = 12
# backref to the directory v2
BTRFS_INODE_EXTREF_KEY = 13
# xattr items
BTRFS_XATTR_ITEM_KEY = 24
# orphans for list files
BTRFS_ORPHAN_ITEM_KEY = 48
# treelog items for dirs
BTRFS_DIR_LOG_ITEM_KEY = 60
BTRFS_DIR_LOG_INDEX_KEY = 72
# dir items and dir indexes both hold filenames
BTRFS_DIR_ITEM_KEY = 84
BTRFS_DIR_INDEX_KEY = 96
# these are the file extent pointers
BTRFS_EXTENT_DATA_KEY = 108
# csums
BTRFS_EXTENT_CSUM_KEY = 128
# root item for subvols and snapshots
BTRFS_ROOT_ITEM_KEY = 132
# root item backrefs
BTRFS_ROOT_BACKREF_KEY = 144
BTRFS_ROOT_REF_KEY = 156
# each allocated extent has an extent item
BTRFS_EXTENT_ITEM_KEY = 168
# optimized extents for metadata only
BTRFS_METADATA_ITEM_KEY = 169
# backrefs for extents
BTRFS_TREE_BLOCK_REF_KEY = 176
BTRFS_EXTENT_DATA_REF_KEY = 178
BTRFS_EXTENT_REF_V0_KEY = 180
BTRFS_SHARED_BLOCK_REF_KEY = 182
BTRFS_SHARED_DATA_REF_KEY = 184
# one of these for each block group
BTRFS_BLOCK_GROUP_ITEM_KEY = 192
# dev extents records which part of each device is allocated
BTRFS_DEV_EXTENT_KEY = 204
# dev items describe devs
BTRFS_DEV_ITEM_KEY = 216
# one for each chunk
BTRFS_CHUNK_ITEM_KEY = 228
# qgroup info
BTRFS_QGROUP_STATUS_KEY = 240
BTRFS_QGROUP_INFO_KEY = 242
BTRFS_QGROUP_LIMIT_KEY = 244
BTRFS_QGROUP_RELATION_KEY = 246
# records balance progress
BTRFS_BALANCE_ITEM_KEY = 248
# stats on device errors
BTRFS_DEV_STATS_KEY = 249
BTRFS_DEV_REPLACE_KEY = 250
BTRFS_STRING_ITEM_KEY = 253

# store information about which extents are in use, and reference counts
BTRFS_EXTENT_TREE_OBJECTID = 2

BTRFS_BLOCK_GROUP_DATA = (1 << 0)

# in the kernel sources, this is flattened
# btrfs_ioctl_search_args_v2.  It includes both the btrfs_ioctl_search_key
# and the buffer.  We're using a 64K buffer size.
#
args_buffer_size = 65536
class btrfs_ioctl_search_args(ctypes.Structure):
    _pack_ = 1
    _fields_ = [ ("tree_id", ctypes.c_ulonglong),
                 ("min_objectid", ctypes.c_ulonglong),
                 ("max_objectid", ctypes.c_ulonglong),
                 ("min_offset", ctypes.c_ulonglong),
                 ("max_offset", ctypes.c_ulonglong),
                 ("min_transid", ctypes.c_ulonglong),
                 ("max_transid", ctypes.c_ulonglong),
                 ("min_type", ctypes.c_uint),
                 ("max_type", ctypes.c_uint),
                 ("nr_items", ctypes.c_uint),
                 ("unused", ctypes.c_uint),
                 ("unused1", ctypes.c_ulonglong),
                 ("unused2", ctypes.c_ulonglong),
                 ("unused3", ctypes.c_ulonglong),
                 ("unused4", ctypes.c_ulonglong),
                 ("buf_size", ctypes.c_ulonglong),
                 ("buf", ctypes.c_ubyte * args_buffer_size),
               ]

# the search ioctl returns one header for each item
#
class btrfs_ioctl_search_header(ctypes.Structure):
    _pack_ = 1
    _fields_ = [ ("transid", ctypes.c_ulonglong),
                 ("objectid", ctypes.c_ulonglong),
                 ("offset", ctypes.c_ulonglong),
                 ("type", ctypes.c_uint),
                 ("len", ctypes.c_uint),
               ]

# the type field in btrfs_file_extent_item
BTRFS_FILE_EXTENT_INLINE = 0
BTRFS_FILE_EXTENT_REG = 1
BTRFS_FILE_EXTENT_PREALLOC = 2

class btrfs_file_extent_item(ctypes.LittleEndianStructure):
    _pack_ = 1
    _fields_ = [ ("generation", ctypes.c_ulonglong),
                 ("ram_bytes", ctypes.c_ulonglong),
                 ("compression", ctypes.c_ubyte),
                 ("encryption", ctypes.c_ubyte),
                 ("other_encoding", ctypes.c_ubyte * 2),
                 ("type", ctypes.c_ubyte),
                 ("disk_bytenr", ctypes.c_ulonglong),
                 ("disk_num_bytes", ctypes.c_ulonglong),
                 ("offset", ctypes.c_ulonglong),
                 ("num_bytes", ctypes.c_ulonglong),
               ]

class btrfs_block_group_item(ctypes.LittleEndianStructure):
    _pack_ = 1
    _fields_ = [ ("used", ctypes.c_ulonglong),
                 ("chunk_objectid", ctypes.c_ulonglong),
                 ("flags", ctypes.c_ulonglong),
              ]

class btrfs_ioctl_search():
    def __init__(self):
        self.args = btrfs_ioctl_search_args()
        self.args.tree_id = 0
        self.args.min_objectid = 0
        self.args.max_objectid = maxu64
        self.args.min_offset = 0
        self.args.max_offset = maxu64
        self.args.min_transid = 0
        self.args.max_transid = maxu64
        self.args.min_type = 0
        self.args.max_type = maxu32
        self.args.nr_items = 0
        self.args.buf_size = args_buffer_size

        # magic encoded for x86_64 this is the v2 search ioctl
        self.ioctl_num = 3228603409

    # the results of the search get stored into args.buf
    def search(self, fd, nritems=65536):
        self.args.nr_items = nritems
        fcntl.ioctl(fd, self.ioctl_num, self.args, 1)

# this moves the search key forward by one.  If the end result is
# still a valid search key (all mins less than all maxes), we return
# True.  Otherwise False
#
def advance_search(search):
    if search.args.min_offset < maxu64:
        search.args.min_offset += 1
    elif search.args.min_type < 255:
        search.args.min_type += 1
    elif search.args.min_objectid < maxu64:
        search.args.min_objectid += 1
    else:
        return False

    if search.args.min_offset > search.args.max_offset:
        return False
    if search.args.min_type > search.args.max_type:
        return False
    if search.args.min_objectid > search.args.max_objectid:
        return False

    return True

# given one search_header and one file_item, print the details.  This
# also tosses the [disk_bytenr,disk_num_bytes] into extent_hash to record
# which extents were used by this file
#
def print_one_extent(header, fi, extent_hash):
    # we're ignoring inline items for now
    if fi.type == BTRFS_FILE_EXTENT_INLINE:
        # header.len is the length of the item returned.  We subtract
        # the part of the file item header that is actually used (21 bytes)
        # and we get the length of the inlined data.
        # this may or may not be compressed
        inline_len = header.len - 21
        if fi.compression:
            ram_bytes = fi.ram_bytes
        else:
            ram_bytes = inline_len
        print("(%Lu %Lu): ram %Lu disk 0 disk_size %Lu -- inline" % \
               (header.objectid, header.offset, ram_bytes, inline_len))
        extent_hash[-1] = inline_len
        return

    if fi.disk_bytenr == 0:
        tag = " -- hole"
    else:
        tag = ""
    print("(%Lu %Lu): ram %Lu disk %Lu disk_size %Lu%s" % (header.objectid,
           header.offset, fi.num_bytes, fi.disk_bytenr, fi.disk_num_bytes, tag))

    if fi.disk_bytenr:
        extent_hash[fi.disk_bytenr] = fi.disk_num_bytes

# open 'filename' and run the search ioctl against it, printing all the extents
# we find
def print_file_extents(filename):
    extent_hash = {}

    s = btrfs_ioctl_search()
    s.args.min_type = BTRFS_EXTENT_DATA_KEY
    s.args.max_type = BTRFS_EXTENT_DATA_KEY

    try:
        fd = os.open(filename, os.O_RDONLY)
        st = os.fstat(fd)
    except Exception as e:
        sys.stderr.write("Failed to open %s (%s)\n" % (filename, e))
        return -1

    if not stat.S_ISREG(st.st_mode):
        sys.stderr.write("%s not a regular file\n" % filename)
        return 0

    s.args.min_objectid = st.st_ino
    s.args.max_objectid = st.st_ino


    while True:
        try:
            s.search(fd)
        except Exception as e:
            sys.stderr.write("Search ioctl failed for %s (%s)\n" % (filename, e))
            return -1

        if s.args.nr_items == 0:
            break

        # p is the results buffer from the kernel
        p = ctypes.addressof(s.args.buf)
        header = btrfs_ioctl_search_header()
        header_size = ctypes.sizeof(header)
        h = ctypes.addressof(header)
        p_left = args_buffer_size

        for x in range(0, s.args.nr_items):
            # for each item, copy the header from the buffer into
            # our header struct.
            ctypes.memmove(h, p, header_size)
            p += header_size
            p_left -= header_size

            # this would be a kernel bug it shouldn't be sending malformed
            # items
            if p_left <= 0:
                break

            if header.type == BTRFS_EXTENT_DATA_KEY:
                fi = btrfs_file_extent_item()

                # this would also be a kernel bug
                if p_left < ctypes.sizeof(fi):
                    break

                # Copy the file item out of the results buffer
                ctypes.memmove(ctypes.addressof(fi), p, ctypes.sizeof(fi))
                print_one_extent(header, fi, extent_hash)

            p += header.len
            p_left -= header.len
            if p_left <= 0:
                break

            s.args.min_offset = header.offset

        if not advance_search(s):
            break

    total_on_disk = 0
    total_extents = 0
    for x in extent_hash.values():
        total_on_disk += x
        total_extents += 1

    # don't divide by zero
    if total_on_disk == 0:
        total_on_disk = 1

    print("file: %s extents %Lu disk size %Lu logical size %Lu ratio %.2f" % \
          (filename, total_extents, total_on_disk, st.st_size,
          float(st.st_size) / float(total_on_disk)))
    return 0

def print_block_groups(mountpoint):
    s = btrfs_ioctl_search()

    s.args.min_type = BTRFS_BLOCK_GROUP_ITEM_KEY
    s.args.max_type = BTRFS_BLOCK_GROUP_ITEM_KEY
    s.args.tree_id = BTRFS_EXTENT_TREE_OBJECTID

    min_used = maxu64
    free_of_min_used = 0
    bg_of_min_used = 0
    total_free = 0

    try:
        fd = os.open(mountpoint, os.O_RDONLY)
        os.fstat(fd)
    except Exception as e:
        sys.stderr.write("Failed to open %s (%s)\n" % (mountpoint, e))
        return -1

    while True:
        try:
            s.search(fd)
        except Exception as e:
            sys.stderr.write("Search ioctl failed for %s (%s)\n" % (mountpoint, e))
            return -1

        if s.args.nr_items == 0:
            break

        # p is the results buffer from kernel
        p = ctypes.addressof(s.args.buf)
        header = btrfs_ioctl_search_header()
        header_size = ctypes.sizeof(header)
        h = ctypes.addressof(header)
        p_left = args_buffer_size

        for _ in range(0, s.args.nr_items):
            # for each item, copy the header from the buffer into
            # our header struct
            ctypes.memmove(h, p, header_size)
            p += header_size
            p_left -= header_size

            # this would be a kernel bug it shouldn't be sending malformed
            # items
            if p_left <= 0:
                break

            if header.type == BTRFS_BLOCK_GROUP_ITEM_KEY:
                bg = btrfs_block_group_item()

                # this would be a kernel bug
                if p_left < ctypes.sizeof(bg):
                    break

                ctypes.memmove(ctypes.addressof(bg), p, ctypes.sizeof(bg))
                if bg.flags & BTRFS_BLOCK_GROUP_DATA:
                    print("block group offset %s len %s used %s chunk_objectid %Lu flags %Lu usage %.2f" %\
                     ('{:>14}'.format(header.objectid),
                      '{:>10}'.format(header.offset),
                      '{:>10}'.format(bg.used),
                      bg.chunk_objectid,
                      bg.flags,
                      float(bg.used) / float(header.offset)))

                    total_free += (header.offset - bg.used)
                    if min_used >= bg.used:
                        min_used = bg.used
                        free_of_min_used = (header.offset - bg.used)
                        bg_of_min_used = header.objectid

            p += header.len
            p_left -= header.len
            if p_left <= 0:
                break

            s.args.min_objectid = header.objectid

        if s.args.min_objectid < maxu64:
            s.args.min_objectid += 1
        if s.args.min_objectid > s.args.max_objectid:
            break

    print("total_free %Lu min_used %Lu free_of_min_used %Lu block_group_of_min_used %Lu" %\
     (total_free, min_used, free_of_min_used, bg_of_min_used))
    if (total_free - free_of_min_used) >= min_used:
        print("balance block group (%Lu) can reduce the number of data block group" % bg_of_min_used)

    return 0

# main
parser = argparse.ArgumentParser()
parser.add_argument('path', nargs='+')
parser.add_argument('-b', '--block-group', action='store_const', const=1, help='get block group information, use mountpoint as "path"')
parser.add_argument('-f', '--file', action='store_const', const=1, help='get file mapping, use filepath')

args = parser.parse_args()

if args.block_group:
    for i in args.path[0:]:
        print_block_groups(i)
elif args.file:
    for f in args.path[0:]:
        print_file_extents(f)