mirror of
https://github.com/git/git.git
synced 2024-11-25 19:04:18 +08:00
e1273106f6
EWAH is a word-aligned compressed variant of a bitset (i.e. a data structure that acts as a 0-indexed boolean array for many entries). It uses a 64-bit run-length encoding (RLE) compression scheme, trading some compression for better processing speed. The goal of this word-aligned implementation is not to achieve the best compression, but rather to improve query processing time. As it stands right now, this EWAH implementation will always be more efficient storage-wise than its uncompressed alternative. EWAH arrays will be used as the on-disk format to store reachability bitmaps for all objects in a repository while keeping reasonable sizes, in the same way that JGit does. This EWAH implementation is a mostly straightforward port of the original `javaewah` library that JGit currently uses. The library is self-contained and has been embedded whole (4 files) inside the `ewah` folder to ease redistribution. The library is re-licensed under the GPLv2 with the permission of Daniel Lemire, the original author. The source code for the C version can be found on GitHub: https://github.com/vmg/libewok The original Java implementation can also be found on GitHub: https://github.com/lemire/javaewah [jc: stripped debug-only code per Peff's $gmane/239768] Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk> Signed-off-by: Junio C Hamano <gitster@pobox.com>
116 lines
2.9 KiB
C
116 lines
2.9 KiB
C
/**
|
|
* Copyright 2013, GitHub, Inc
|
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon,
|
|
* David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
#include "git-compat-util.h"
|
|
#include "ewok.h"
|
|
#include "ewok_rlw.h"
|
|
|
|
static inline int next_word(struct rlw_iterator *it)
|
|
{
|
|
if (it->pointer >= it->size)
|
|
return 0;
|
|
|
|
it->rlw.word = &it->buffer[it->pointer];
|
|
it->pointer += rlw_get_literal_words(it->rlw.word) + 1;
|
|
|
|
it->rlw.literal_words = rlw_get_literal_words(it->rlw.word);
|
|
it->rlw.running_len = rlw_get_running_len(it->rlw.word);
|
|
it->rlw.running_bit = rlw_get_run_bit(it->rlw.word);
|
|
it->rlw.literal_word_offset = 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
void rlwit_init(struct rlw_iterator *it, struct ewah_bitmap *from_ewah)
|
|
{
|
|
it->buffer = from_ewah->buffer;
|
|
it->size = from_ewah->buffer_size;
|
|
it->pointer = 0;
|
|
|
|
next_word(it);
|
|
|
|
it->literal_word_start = rlwit_literal_words(it) +
|
|
it->rlw.literal_word_offset;
|
|
}
|
|
|
|
void rlwit_discard_first_words(struct rlw_iterator *it, size_t x)
|
|
{
|
|
while (x > 0) {
|
|
size_t discard;
|
|
|
|
if (it->rlw.running_len > x) {
|
|
it->rlw.running_len -= x;
|
|
return;
|
|
}
|
|
|
|
x -= it->rlw.running_len;
|
|
it->rlw.running_len = 0;
|
|
|
|
discard = (x > it->rlw.literal_words) ? it->rlw.literal_words : x;
|
|
|
|
it->literal_word_start += discard;
|
|
it->rlw.literal_words -= discard;
|
|
x -= discard;
|
|
|
|
if (x > 0 || rlwit_word_size(it) == 0) {
|
|
if (!next_word(it))
|
|
break;
|
|
|
|
it->literal_word_start =
|
|
rlwit_literal_words(it) + it->rlw.literal_word_offset;
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t rlwit_discharge(
|
|
struct rlw_iterator *it, struct ewah_bitmap *out, size_t max, int negate)
|
|
{
|
|
size_t index = 0;
|
|
|
|
while (index < max && rlwit_word_size(it) > 0) {
|
|
size_t pd, pl = it->rlw.running_len;
|
|
|
|
if (index + pl > max)
|
|
pl = max - index;
|
|
|
|
ewah_add_empty_words(out, it->rlw.running_bit ^ negate, pl);
|
|
index += pl;
|
|
|
|
pd = it->rlw.literal_words;
|
|
if (pd + index > max)
|
|
pd = max - index;
|
|
|
|
ewah_add_dirty_words(out,
|
|
it->buffer + it->literal_word_start, pd, negate);
|
|
|
|
rlwit_discard_first_words(it, pd + pl);
|
|
index += pd;
|
|
}
|
|
|
|
return index;
|
|
}
|
|
|
|
void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out)
|
|
{
|
|
while (rlwit_word_size(it) > 0) {
|
|
ewah_add_empty_words(out, 0, rlwit_word_size(it));
|
|
rlwit_discard_first_words(it, rlwit_word_size(it));
|
|
}
|
|
}
|