2005-06-30 11:50:15 +08:00
|
|
|
#include "cache.h"
|
|
|
|
#include "pkt-line.h"
|
|
|
|
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:57 +08:00
|
|
|
char packet_buffer[LARGE_PACKET_MAX];
|
2011-04-03 15:06:54 +08:00
|
|
|
static const char *packet_trace_prefix = "git";
|
2011-02-24 22:30:19 +08:00
|
|
|
static const char trace_key[] = "GIT_TRACE_PACKET";
|
|
|
|
|
|
|
|
void packet_trace_identity(const char *prog)
|
|
|
|
{
|
|
|
|
packet_trace_prefix = xstrdup(prog);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void packet_trace(const char *buf, unsigned int len, int write)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct strbuf out;
|
|
|
|
|
|
|
|
if (!trace_want(trace_key))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* +32 is just a guess for header + quoting */
|
|
|
|
strbuf_init(&out, len+32);
|
|
|
|
|
|
|
|
strbuf_addf(&out, "packet: %12s%c ",
|
|
|
|
packet_trace_prefix, write ? '>' : '<');
|
|
|
|
|
|
|
|
if ((len >= 4 && !prefixcmp(buf, "PACK")) ||
|
|
|
|
(len >= 5 && !prefixcmp(buf+1, "PACK"))) {
|
|
|
|
strbuf_addstr(&out, "PACK ...");
|
|
|
|
unsetenv(trace_key);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* XXX we should really handle printable utf8 */
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
/* suppress newlines */
|
|
|
|
if (buf[i] == '\n')
|
|
|
|
continue;
|
|
|
|
if (buf[i] >= 0x20 && buf[i] <= 0x7e)
|
|
|
|
strbuf_addch(&out, buf[i]);
|
|
|
|
else
|
|
|
|
strbuf_addf(&out, "\\%o", buf[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_addch(&out, '\n');
|
|
|
|
trace_strbuf(trace_key, &out);
|
|
|
|
strbuf_release(&out);
|
|
|
|
}
|
|
|
|
|
2005-06-30 11:50:15 +08:00
|
|
|
/*
|
|
|
|
* If we buffered things up above (we don't, but we should),
|
|
|
|
* we'd flush it here
|
|
|
|
*/
|
|
|
|
void packet_flush(int fd)
|
|
|
|
{
|
2011-02-24 22:30:19 +08:00
|
|
|
packet_trace("0000", 4, 1);
|
2013-02-21 04:01:56 +08:00
|
|
|
write_or_die(fd, "0000", 4);
|
2005-06-30 11:50:15 +08:00
|
|
|
}
|
|
|
|
|
2009-10-31 08:47:21 +08:00
|
|
|
void packet_buf_flush(struct strbuf *buf)
|
|
|
|
{
|
2011-02-24 22:30:19 +08:00
|
|
|
packet_trace("0000", 4, 1);
|
2009-10-31 08:47:21 +08:00
|
|
|
strbuf_add(buf, "0000", 4);
|
|
|
|
}
|
|
|
|
|
2005-06-30 11:50:15 +08:00
|
|
|
#define hex(a) (hexchar[(a) & 15])
|
2009-10-31 08:47:21 +08:00
|
|
|
static char buffer[1000];
|
|
|
|
static unsigned format_packet(const char *fmt, va_list args)
|
2005-06-30 11:50:15 +08:00
|
|
|
{
|
|
|
|
static char hexchar[] = "0123456789abcdef";
|
|
|
|
unsigned n;
|
|
|
|
|
|
|
|
n = vsnprintf(buffer + 4, sizeof(buffer) - 4, fmt, args);
|
|
|
|
if (n >= sizeof(buffer)-4)
|
|
|
|
die("protocol error: impossibly long line");
|
|
|
|
n += 4;
|
|
|
|
buffer[0] = hex(n >> 12);
|
|
|
|
buffer[1] = hex(n >> 8);
|
|
|
|
buffer[2] = hex(n >> 4);
|
|
|
|
buffer[3] = hex(n);
|
2011-02-24 22:30:19 +08:00
|
|
|
packet_trace(buffer+4, n-4, 1);
|
2009-10-31 08:47:21 +08:00
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
void packet_write(int fd, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
unsigned n;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
n = format_packet(fmt, args);
|
|
|
|
va_end(args);
|
2013-02-21 04:01:56 +08:00
|
|
|
write_or_die(fd, buffer, n);
|
2005-06-30 11:50:15 +08:00
|
|
|
}
|
|
|
|
|
2009-10-31 08:47:21 +08:00
|
|
|
void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
unsigned n;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
n = format_packet(fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
strbuf_add(buf, buffer, n);
|
|
|
|
}
|
|
|
|
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:10 +08:00
|
|
|
static int safe_read(int fd, void *buffer, unsigned size, int options)
|
2005-06-30 11:50:15 +08:00
|
|
|
{
|
2008-05-03 21:27:26 +08:00
|
|
|
ssize_t ret = read_in_full(fd, buffer, size);
|
|
|
|
if (ret < 0)
|
2009-06-27 23:58:46 +08:00
|
|
|
die_errno("read error");
|
2012-06-20 02:24:50 +08:00
|
|
|
else if (ret < size) {
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:10 +08:00
|
|
|
if (options & PACKET_READ_GENTLE_ON_EOF)
|
2012-06-20 02:24:50 +08:00
|
|
|
return -1;
|
|
|
|
|
2008-05-03 21:27:26 +08:00
|
|
|
die("The remote end hung up unexpectedly");
|
2012-06-20 02:24:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2005-06-30 11:50:15 +08:00
|
|
|
}
|
|
|
|
|
2009-10-31 08:47:21 +08:00
|
|
|
static int packet_length(const char *linelen)
|
2005-06-30 11:50:15 +08:00
|
|
|
{
|
|
|
|
int n;
|
2009-10-31 08:47:21 +08:00
|
|
|
int len = 0;
|
2005-06-30 11:50:15 +08:00
|
|
|
|
|
|
|
for (n = 0; n < 4; n++) {
|
|
|
|
unsigned char c = linelen[n];
|
|
|
|
len <<= 4;
|
|
|
|
if (c >= '0' && c <= '9') {
|
|
|
|
len += c - '0';
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (c >= 'a' && c <= 'f') {
|
|
|
|
len += c - 'a' + 10;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (c >= 'A' && c <= 'F') {
|
|
|
|
len += c - 'A' + 10;
|
|
|
|
continue;
|
|
|
|
}
|
2009-10-31 08:47:21 +08:00
|
|
|
return -1;
|
2005-06-30 11:50:15 +08:00
|
|
|
}
|
2009-10-31 08:47:21 +08:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:10 +08:00
|
|
|
int packet_read(int fd, char *buffer, unsigned size, int options)
|
2009-10-31 08:47:21 +08:00
|
|
|
{
|
2012-06-20 02:24:50 +08:00
|
|
|
int len, ret;
|
2009-10-31 08:47:21 +08:00
|
|
|
char linelen[4];
|
|
|
|
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:10 +08:00
|
|
|
ret = safe_read(fd, linelen, 4, options);
|
|
|
|
if (ret < 0)
|
2012-06-20 02:24:50 +08:00
|
|
|
return ret;
|
2009-10-31 08:47:21 +08:00
|
|
|
len = packet_length(linelen);
|
|
|
|
if (len < 0)
|
2009-10-31 08:47:22 +08:00
|
|
|
die("protocol error: bad line length character: %.4s", linelen);
|
2011-02-24 22:30:19 +08:00
|
|
|
if (!len) {
|
|
|
|
packet_trace("0000", 4, 0);
|
2005-06-30 11:50:15 +08:00
|
|
|
return 0;
|
2011-02-24 22:30:19 +08:00
|
|
|
}
|
2005-06-30 11:50:15 +08:00
|
|
|
len -= 4;
|
|
|
|
if (len >= size)
|
|
|
|
die("protocol error: bad line length %d", len);
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:10 +08:00
|
|
|
ret = safe_read(fd, buffer, len, options);
|
|
|
|
if (ret < 0)
|
2012-06-20 02:24:50 +08:00
|
|
|
return ret;
|
pkt-line: teach packet_read_line to chomp newlines
The packets sent during ref negotiation are all terminated
by newline; even though the code to chomp these newlines is
short, we end up doing it in a lot of places.
This patch teaches packet_read_line to auto-chomp the
trailing newline; this lets us get rid of a lot of inline
chomping code.
As a result, some call-sites which are not reading
line-oriented data (e.g., when reading chunks of packfiles
alongside sideband) transition away from packet_read_line to
the generic packet_read interface. This patch converts all
of the existing callsites.
Since the function signature of packet_read_line does not
change (but its behavior does), there is a possibility of
new callsites being introduced in later commits, silently
introducing an incompatibility. However, since a later
patch in this series will change the signature, such a
commit would have to be merged directly into this commit,
not to the tip of the series; we can therefore ignore the
issue.
This is an internal cleanup and should produce no change of
behavior in the normal case. However, there is one corner
case to note. Callers of packet_read_line have never been
able to tell the difference between a flush packet ("0000")
and an empty packet ("0004"), as both cause packet_read_line
to return a length of 0. Readers treat them identically,
even though Documentation/technical/protocol-common.txt says
we must not; it also says that implementations should not
send an empty pkt-line.
By stripping out the newline before the result gets to the
caller, we will now treat the newline-only packet ("0005\n")
the same as an empty packet, which in turn gets treated like
a flush packet. In practice this doesn't matter, as neither
empty nor newline-only packets are part of git's protocols
(at least not for the line-oriented bits, and readers who
are not expecting line-oriented packets will be calling
packet_read directly, anyway). But even if we do decide to
care about the distinction later, it is orthogonal to this
patch. The right place to tighten would be to stop treating
empty packets as flush packets, and this change does not
make doing so any harder.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:28 +08:00
|
|
|
|
|
|
|
if ((options & PACKET_READ_CHOMP_NEWLINE) &&
|
|
|
|
len && buffer[len-1] == '\n')
|
|
|
|
len--;
|
|
|
|
|
2005-06-30 11:50:15 +08:00
|
|
|
buffer[len] = 0;
|
2011-02-24 22:30:19 +08:00
|
|
|
packet_trace(buffer, len, 0);
|
2005-06-30 11:50:15 +08:00
|
|
|
return len;
|
|
|
|
}
|
2009-10-31 08:47:21 +08:00
|
|
|
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:57 +08:00
|
|
|
char *packet_read_line(int fd, int *len_p)
|
2012-06-20 02:24:50 +08:00
|
|
|
{
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:57 +08:00
|
|
|
int len = packet_read(fd, packet_buffer, sizeof(packet_buffer),
|
|
|
|
PACKET_READ_CHOMP_NEWLINE);
|
|
|
|
if (len_p)
|
|
|
|
*len_p = len;
|
|
|
|
return len ? packet_buffer : NULL;
|
2012-06-20 02:24:50 +08:00
|
|
|
}
|
|
|
|
|
2009-10-31 08:47:21 +08:00
|
|
|
int packet_get_line(struct strbuf *out,
|
|
|
|
char **src_buf, size_t *src_len)
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (*src_len < 4)
|
|
|
|
return -1;
|
|
|
|
len = packet_length(*src_buf);
|
|
|
|
if (len < 0)
|
|
|
|
return -1;
|
|
|
|
if (!len) {
|
|
|
|
*src_buf += 4;
|
|
|
|
*src_len -= 4;
|
2011-02-24 22:30:19 +08:00
|
|
|
packet_trace("0000", 4, 0);
|
2009-10-31 08:47:21 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (*src_len < len)
|
|
|
|
return -2;
|
|
|
|
|
|
|
|
*src_buf += 4;
|
|
|
|
*src_len -= 4;
|
|
|
|
len -= 4;
|
|
|
|
|
|
|
|
strbuf_add(out, *src_buf, len);
|
|
|
|
*src_buf += len;
|
|
|
|
*src_len -= len;
|
2011-02-24 22:30:19 +08:00
|
|
|
packet_trace(out->buf, out->len, 0);
|
2009-10-31 08:47:21 +08:00
|
|
|
return len;
|
|
|
|
}
|