mirror of
https://github.com/git/git.git
synced 2024-11-25 10:54:00 +08:00
e1273106f6
EWAH is a word-aligned compressed variant of a bitset (i.e. a data structure that acts as a 0-indexed boolean array for many entries). It uses a 64-bit run-length encoding (RLE) compression scheme, trading some compression for better processing speed. The goal of this word-aligned implementation is not to achieve the best compression, but rather to improve query processing time. As it stands right now, this EWAH implementation will always be more efficient storage-wise than its uncompressed alternative. EWAH arrays will be used as the on-disk format to store reachability bitmaps for all objects in a repository while keeping reasonable sizes, in the same way that JGit does. This EWAH implementation is a mostly straightforward port of the original `javaewah` library that JGit currently uses. The library is self-contained and has been embedded whole (4 files) inside the `ewah` folder to ease redistribution. The library is re-licensed under the GPLv2 with the permission of Daniel Lemire, the original author. The source code for the C version can be found on GitHub: https://github.com/vmg/libewok The original Java implementation can also be found on GitHub: https://github.com/lemire/javaewah [jc: stripped debug-only code per Peff's $gmane/239768] Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk> Signed-off-by: Junio C Hamano <gitster@pobox.com>
115 lines
3.1 KiB
C
115 lines
3.1 KiB
C
/**
|
|
* Copyright 2013, GitHub, Inc
|
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon,
|
|
* David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
#ifndef __EWOK_RLW_H__
|
|
#define __EWOK_RLW_H__
|
|
|
|
#define RLW_RUNNING_BITS (sizeof(eword_t) * 4)
|
|
#define RLW_LITERAL_BITS (sizeof(eword_t) * 8 - 1 - RLW_RUNNING_BITS)
|
|
|
|
#define RLW_LARGEST_RUNNING_COUNT (((eword_t)1 << RLW_RUNNING_BITS) - 1)
|
|
#define RLW_LARGEST_LITERAL_COUNT (((eword_t)1 << RLW_LITERAL_BITS) - 1)
|
|
|
|
#define RLW_LARGEST_RUNNING_COUNT_SHIFT (RLW_LARGEST_RUNNING_COUNT << 1)
|
|
|
|
#define RLW_RUNNING_LEN_PLUS_BIT (((eword_t)1 << (RLW_RUNNING_BITS + 1)) - 1)
|
|
|
|
static int rlw_get_run_bit(const eword_t *word)
|
|
{
|
|
return *word & (eword_t)1;
|
|
}
|
|
|
|
static inline void rlw_set_run_bit(eword_t *word, int b)
|
|
{
|
|
if (b) {
|
|
*word |= (eword_t)1;
|
|
} else {
|
|
*word &= (eword_t)(~1);
|
|
}
|
|
}
|
|
|
|
static inline void rlw_xor_run_bit(eword_t *word)
|
|
{
|
|
if (*word & 1) {
|
|
*word &= (eword_t)(~1);
|
|
} else {
|
|
*word |= (eword_t)1;
|
|
}
|
|
}
|
|
|
|
static inline void rlw_set_running_len(eword_t *word, eword_t l)
|
|
{
|
|
*word |= RLW_LARGEST_RUNNING_COUNT_SHIFT;
|
|
*word &= (l << 1) | (~RLW_LARGEST_RUNNING_COUNT_SHIFT);
|
|
}
|
|
|
|
static inline eword_t rlw_get_running_len(const eword_t *word)
|
|
{
|
|
return (*word >> 1) & RLW_LARGEST_RUNNING_COUNT;
|
|
}
|
|
|
|
static inline eword_t rlw_get_literal_words(const eword_t *word)
|
|
{
|
|
return *word >> (1 + RLW_RUNNING_BITS);
|
|
}
|
|
|
|
static inline void rlw_set_literal_words(eword_t *word, eword_t l)
|
|
{
|
|
*word |= ~RLW_RUNNING_LEN_PLUS_BIT;
|
|
*word &= (l << (RLW_RUNNING_BITS + 1)) | RLW_RUNNING_LEN_PLUS_BIT;
|
|
}
|
|
|
|
static inline eword_t rlw_size(const eword_t *self)
|
|
{
|
|
return rlw_get_running_len(self) + rlw_get_literal_words(self);
|
|
}
|
|
|
|
struct rlw_iterator {
|
|
const eword_t *buffer;
|
|
size_t size;
|
|
size_t pointer;
|
|
size_t literal_word_start;
|
|
|
|
struct {
|
|
const eword_t *word;
|
|
int literal_words;
|
|
int running_len;
|
|
int literal_word_offset;
|
|
int running_bit;
|
|
} rlw;
|
|
};
|
|
|
|
void rlwit_init(struct rlw_iterator *it, struct ewah_bitmap *bitmap);
|
|
void rlwit_discard_first_words(struct rlw_iterator *it, size_t x);
|
|
size_t rlwit_discharge(
|
|
struct rlw_iterator *it, struct ewah_bitmap *out, size_t max, int negate);
|
|
void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out);
|
|
|
|
static inline size_t rlwit_word_size(struct rlw_iterator *it)
|
|
{
|
|
return it->rlw.running_len + it->rlw.literal_words;
|
|
}
|
|
|
|
static inline size_t rlwit_literal_words(struct rlw_iterator *it)
|
|
{
|
|
return it->pointer - it->rlw.literal_words;
|
|
}
|
|
|
|
#endif
|