diagnostics: Support for -finput-charset [PR93067]

Adds the logic to handle -finput-charset in layout_get_source_line(), so that
source lines are converted from their input encodings prior to being output by
diagnostics machinery. Also adds the ability to strip a UTF-8 BOM similarly.

gcc/c-family/ChangeLog:

	PR other/93067
	* c-opts.c (c_common_input_charset_cb): New function.
	(c_common_post_options): Call new function
	diagnostic_initialize_input_context().

gcc/d/ChangeLog:

	PR other/93067
	* d-lang.cc (d_input_charset_callback): New function.
	(d_init): Call new function
	diagnostic_initialize_input_context().

gcc/fortran/ChangeLog:

	PR other/93067
	* cpp.c (gfc_cpp_post_options): Call new function
	diagnostic_initialize_input_context().

gcc/ChangeLog:

	PR other/93067
	* coretypes.h (typedef diagnostic_input_charset_callback): Declare.
	* diagnostic.c (diagnostic_initialize_input_context): New function.
	* diagnostic.h (diagnostic_initialize_input_context): Declare.
	* input.c (default_charset_callback): New function.
	(file_cache::initialize_input_context): New function.
	(file_cache_slot::create): Added ability to convert the input
	according to the input context.
	(file_cache::file_cache): Initialize the new input context.
	(class file_cache_slot): Added new m_alloc_offset member.
	(file_cache_slot::file_cache_slot): Initialize the new member.
	(file_cache_slot::~file_cache_slot): Handle potentially offset buffer.
	(file_cache_slot::maybe_grow): Likewise.
	(file_cache_slot::needs_read_p): Handle NULL fp, which is now possible.
	(file_cache_slot::get_next_line): Likewise.
	* input.h (class file_cache): Added input context member.

libcpp/ChangeLog:

	PR other/93067
	* charset.c (init_iconv_desc): Adapt to permit PFILE argument to
	be NULL.
	(_cpp_convert_input): Likewise. Also move UTF-8 BOM logic to...
	(cpp_check_utf8_bom): ...here.  New function.
	(cpp_input_conversion_is_trivial): New function.
	* files.c (read_file_guts): Allow PFILE argument to be NULL.  Add
	INPUT_CHARSET argument as an alternate source of this information.
	(read_file): Pass the new argument to read_file_guts.
	(cpp_get_converted_source): New function.
	* include/cpplib.h (struct cpp_converted_source): Declare.
	(cpp_get_converted_source): Declare.
	(cpp_input_conversion_is_trivial): Declare.
	(cpp_check_utf8_bom): Declare.

gcc/testsuite/ChangeLog:

	PR other/93067
	* gcc.dg/diagnostic-input-charset-1.c: New test.
	* gcc.dg/diagnostic-input-utf8-bom.c: New test.
This commit is contained in:
Lewis Hyatt 2021-08-24 19:30:44 -04:00
parent 43a5d46fea
commit 3ac6b5cff1
13 changed files with 335 additions and 58 deletions

View File

@ -188,6 +188,14 @@ c_common_diagnostics_set_defaults (diagnostic_context *context)
context->opt_permissive = OPT_fpermissive;
}
/* Input charset configuration for diagnostics. */
static const char *
c_common_input_charset_cb (const char * /*filename*/)
{
const char *cs = cpp_opts->input_charset;
return cpp_input_conversion_is_trivial (cs) ? nullptr : cs;
}
/* Whether options from all C-family languages should be accepted
quietly. */
static bool accept_all_c_family_options = false;
@ -1136,6 +1144,11 @@ c_common_post_options (const char **pfilename)
cpp_post_options (parse_in);
init_global_opts_from_cpp (&global_options, cpp_get_options (parse_in));
/* Let diagnostics infrastructure know how to convert input files the same
way libcpp will do it, namely using the configured input charset and
skipping a UTF-8 BOM if present. */
diagnostic_initialize_input_context (global_dc,
c_common_input_charset_cb, true);
input_location = UNKNOWN_LOCATION;
*pfilename = this_input_filename

View File

@ -154,6 +154,7 @@ struct cl_option_handlers;
struct diagnostic_context;
class pretty_printer;
class diagnostic_event_id_t;
typedef const char * (*diagnostic_input_charset_callback)(const char *);
template<typename T> struct array_traits;

View File

@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see
#include "output.h"
#include "print-tree.h"
#include "debug.h"
#include "input.h"
#include "d-tree.h"
#include "id.h"
@ -362,6 +363,19 @@ d_option_lang_mask (void)
return CL_D;
}
/* Implements input charset and BOM skipping configuration for
diagnostics. */
static const char *d_input_charset_callback (const char * /*filename*/)
{
/* TODO: The input charset is automatically determined by code in
dmd/dmodule.c based on the contents of the file. If this detection
logic were factored out and could be reused here, then we would be able
to return UTF-16 or UTF-32 as needed here. For now, we return always
NULL, which means no conversion is necessary, i.e. the input is assumed
to be UTF-8 when diagnostics read this file. */
return nullptr;
}
/* Implements the lang_hooks.init routine for language D. */
static bool
@ -373,6 +387,11 @@ d_init (void)
Expression::_init ();
Objc::_init ();
/* Diagnostics input init, to enable BOM skipping and
input charset conversion. */
diagnostic_initialize_input_context (global_dc,
d_input_charset_callback, true);
/* Back-end init. */
global_binding_level = ggc_cleared_alloc <binding_level> ();
current_binding_level = global_binding_level;

View File

@ -293,6 +293,17 @@ diagnostic_urls_init (diagnostic_context *context, int value /*= -1 */)
= determine_url_format ((diagnostic_url_rule_t) value);
}
/* Create the file_cache, if not already created, and tell it how to
translate files on input. */
void diagnostic_initialize_input_context (diagnostic_context *context,
diagnostic_input_charset_callback ccb,
bool should_skip_bom)
{
if (!context->m_file_cache)
context->m_file_cache = new file_cache;
context->m_file_cache->initialize_input_context (ccb, should_skip_bom);
}
/* Do any cleaning up required after the last diagnostic is emitted. */
void

View File

@ -446,6 +446,25 @@ extern void diagnostic_show_locus (diagnostic_context *,
diagnostic_t diagnostic_kind);
extern void diagnostic_show_any_path (diagnostic_context *, diagnostic_info *);
/* Because we read source files a second time after the frontend did it the
first time, we need to know how the frontend handled things like character
set conversion and UTF-8 BOM stripping, in order to make everything
consistent. This function needs to be called by each frontend that requires
non-default behavior, to inform the diagnostics infrastructure how input is
to be processed. The default behavior is to do no conversion and not to
strip a UTF-8 BOM.
The callback should return the input charset to be used to convert the given
file's contents to UTF-8, or it should return NULL if no conversion is needed
for this file. SHOULD_SKIP_BOM only applies in case no conversion was
performed, and if true, it will cause a UTF-8 BOM to be skipped at the
beginning of the file. (In case a conversion was performed, the BOM is
rather skipped as part of the conversion process.) */
void diagnostic_initialize_input_context (diagnostic_context *context,
diagnostic_input_charset_callback ccb,
bool should_skip_bom);
/* Force diagnostics controlled by OPTIDX to be kind KIND. */
extern diagnostic_t diagnostic_classify_diagnostic (diagnostic_context *,
int /* optidx */,

View File

@ -493,6 +493,12 @@ gfc_cpp_post_options (void)
cpp_post_options (cpp_in);
/* Let diagnostics infrastructure know how to convert input files the same
way libcpp will do it, namely, with no charset conversion but with
skipping of a UTF-8 BOM if present. */
diagnostic_initialize_input_context (global_dc, nullptr, true);
gfc_cpp_register_include_paths ();
}

View File

@ -22,7 +22,6 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "intl.h"
#include "diagnostic.h"
#include "diagnostic-core.h"
#include "selftest.h"
#include "cpplib.h"
@ -30,6 +29,20 @@ along with GCC; see the file COPYING3. If not see
#define HAVE_ICONV 0
#endif
/* Input charset configuration. */
static const char *default_charset_callback (const char *)
{
return nullptr;
}
void
file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
bool should_skip_bom)
{
in_context.ccb = (ccb ? ccb : default_charset_callback);
in_context.should_skip_bom = should_skip_bom;
}
/* This is a cache used by get_next_line to store the content of a
file to be searched for file lines. */
class file_cache_slot
@ -51,7 +64,8 @@ public:
void inc_use_count () { m_use_count++; }
void create (const char *file_path, FILE *fp, unsigned highest_use_count);
bool create (const file_cache::input_context &in_context,
const char *file_path, FILE *fp, unsigned highest_use_count);
void evict ();
private:
@ -110,6 +124,10 @@ public:
far. */
char *m_data;
/* The allocated buffer to be freed may start a little earlier than DATA,
e.g. if a UTF8 BOM was skipped at the beginning. */
int m_alloc_offset;
/* The size of the DATA array above.*/
size_t m_size;
@ -147,6 +165,17 @@ public:
doesn't explode. We thus scale total_lines down to
line_record_size. */
vec<line_info, va_heap> m_line_record;
void offset_buffer (int offset)
{
gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
: (size_t) offset <= m_size);
gcc_assert (m_data);
m_alloc_offset += offset;
m_data += offset;
m_size -= offset;
}
};
/* Current position in real source file. */
@ -419,21 +448,25 @@ file_cache::add_file (const char *file_path)
unsigned highest_use_count = 0;
file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
r->create (file_path, fp, highest_use_count);
if (!r->create (in_context, file_path, fp, highest_use_count))
return NULL;
return r;
}
/* Populate this slot for use on FILE_PATH and FP, dropping any
existing cached content within it. */
void
file_cache_slot::create (const char *file_path, FILE *fp,
bool
file_cache_slot::create (const file_cache::input_context &in_context,
const char *file_path, FILE *fp,
unsigned highest_use_count)
{
m_file_path = file_path;
if (m_fp)
fclose (m_fp);
m_fp = fp;
if (m_alloc_offset)
offset_buffer (-m_alloc_offset);
m_nb_read = 0;
m_line_start_idx = 0;
m_line_num = 0;
@ -443,6 +476,36 @@ file_cache_slot::create (const char *file_path, FILE *fp,
m_use_count = ++highest_use_count;
m_total_lines = total_lines_num (file_path);
m_missing_trailing_newline = true;
/* Check the input configuration to determine if we need to do any
transformations, such as charset conversion or BOM skipping. */
if (const char *input_charset = in_context.ccb (file_path))
{
/* Need a full-blown conversion of the input charset. */
fclose (m_fp);
m_fp = NULL;
const cpp_converted_source cs
= cpp_get_converted_source (file_path, input_charset);
if (!cs.data)
return false;
if (m_data)
XDELETEVEC (m_data);
m_data = cs.data;
m_nb_read = m_size = cs.len;
m_alloc_offset = cs.data - cs.to_free;
}
else if (in_context.should_skip_bom)
{
if (read_data ())
{
const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
offset_buffer (offset);
m_nb_read -= offset;
}
}
return true;
}
/* file_cache's ctor. */
@ -450,6 +513,7 @@ file_cache_slot::create (const char *file_path, FILE *fp,
file_cache::file_cache ()
: m_file_slots (new file_cache_slot[num_file_slots])
{
initialize_input_context (nullptr, false);
}
/* file_cache's dtor. */
@ -478,8 +542,8 @@ file_cache::lookup_or_add_file (const char *file_path)
file_cache_slot::file_cache_slot ()
: m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
m_size (0), m_nb_read (0), m_line_start_idx (0), m_line_num (0),
m_total_lines (0), m_missing_trailing_newline (true)
m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
{
m_line_record.create (0);
}
@ -495,6 +559,7 @@ file_cache_slot::~file_cache_slot ()
}
if (m_data)
{
offset_buffer (-m_alloc_offset);
XDELETEVEC (m_data);
m_data = 0;
}
@ -509,7 +574,7 @@ file_cache_slot::~file_cache_slot ()
bool
file_cache_slot::needs_read_p () const
{
return (m_nb_read == 0
return m_fp && (m_nb_read == 0
|| m_nb_read == m_size
|| (m_line_start_idx >= m_nb_read - 1));
}
@ -531,9 +596,20 @@ file_cache_slot::maybe_grow ()
if (!needs_grow_p ())
return;
size_t size = m_size == 0 ? buffer_size : m_size * 2;
m_data = XRESIZEVEC (char, m_data, size);
m_size = size;
if (!m_data)
{
gcc_assert (m_size == 0 && m_alloc_offset == 0);
m_size = buffer_size;
m_data = XNEWVEC (char, m_size);
}
else
{
const int offset = m_alloc_offset;
offset_buffer (-offset);
m_size *= 2;
m_data = XRESIZEVEC (char, m_data, m_size);
offset_buffer (offset);
}
}
/* Read more data into the cache. Extends the cache if need be.
@ -632,7 +708,7 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
m_missing_trailing_newline = false;
}
if (ferror (m_fp))
if (m_fp && ferror (m_fp))
return false;
/* At this point, we've found the end of the of line. It either

View File

@ -111,6 +111,15 @@ class file_cache
file_cache_slot *lookup_or_add_file (const char *file_path);
void forcibly_evict_file (const char *file_path);
/* See comments in diagnostic.h about the input conversion context. */
struct input_context
{
diagnostic_input_charset_callback ccb;
bool should_skip_bom;
};
void initialize_input_context (diagnostic_input_charset_callback ccb,
bool should_skip_bom);
private:
file_cache_slot *evicted_cache_tab_entry (unsigned *highest_use_count);
file_cache_slot *add_file (const char *file_path);
@ -119,6 +128,7 @@ class file_cache
private:
static const size_t num_file_slots = 16;
file_cache_slot *m_file_slots;
input_context in_context;
};
extern expanded_location

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-require-iconv "CP850" } */
/* { dg-options "-finput-charset=CP850 -fdiagnostics-show-caret" } */
/* Test that diagnostics are converted to UTF-8; this file is encoded in
CP850. Why CP850? -finput-charset only supports encodings that are a
superset of ASCII. But encodings that look like latin-1 are automatically
converted by expect to UTF-8, and hence by the time dg sees them, it can't
verify they were actually output in UTF-8. So codepage 850 was chosen as one
that is hopefully available and meets the requirements of matching ASCII and
not matching latin-1. */
const char *section = "õ"
/* { dg-error "expected .* at end of input" "" { target *-*-*} .-1 } */
/* { dg-begin-multiline-output "" }
const char *section = "§"
^~~~~
{ dg-end-multiline-output "" } */

View File

@ -0,0 +1,14 @@
int 1;
/* { dg-do compile } */
/* { dg-options "-fdiagnostics-show-caret" } */
/* This file begins with a UTF-8 byte order mark. Verify that diagnostics
still point to the right place, since the stripping of the BOM happens twice,
once when libcpp reads the file, and once when diagnostics infrastucture
reads it. */
/* { dg-error "expected .* before numeric constant" "" { target *-*-*} 1 } */
/* { dg-begin-multiline-output "" }
int 1;
^
{ dg-end-multiline-output "" } */

View File

@ -630,7 +630,11 @@ static const struct cpp_conversion conversion_tab[] = {
cset_converter structure for conversion from FROM to TO. If
iconv_open() fails, issue an error and return an identity
converter. Silently return an identity converter if FROM and TO
are identical. */
are identical.
PFILE is only used for generating diagnostics; setting it to NULL
suppresses diagnostics. */
static struct cset_converter
init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
{
@ -672,25 +676,31 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
if (ret.cd == (iconv_t) -1)
{
if (errno == EINVAL)
cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv",
from, to);
else
cpp_errno (pfile, CPP_DL_ERROR, "iconv_open");
if (pfile)
{
if (errno == EINVAL)
cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv",
from, to);
else
cpp_errno (pfile, CPP_DL_ERROR, "iconv_open");
}
ret.func = convert_no_conversion;
}
}
else
{
cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s",
from, to);
if (pfile)
{
cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s",
from, to);
}
ret.func = convert_no_conversion;
ret.cd = (iconv_t) -1;
ret.width = -1;
}
return ret;
}
@ -2122,6 +2132,25 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
buf, bufp - buf, HT_ALLOC));
}
/* Utility to strip a UTF-8 byte order marking from the beginning
of a buffer. Returns the number of bytes to skip, which currently
will be either 0 or 3. */
int
cpp_check_utf8_bom (const char *data, size_t data_length)
{
#if HOST_CHARSET == HOST_CHARSET_ASCII
const unsigned char *udata = (const unsigned char *) data;
if (data_length >= 3 && udata[0] == 0xef && udata[1] == 0xbb
&& udata[2] == 0xbf)
return 3;
#endif
return 0;
}
/* Convert an input buffer (containing the complete contents of one
source file) from INPUT_CHARSET to the source character set. INPUT
points to the input buffer, SIZE is its allocated size, and LEN is
@ -2135,7 +2164,11 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
INPUT is expected to have been allocated with xmalloc. This
function will either set *BUFFER_START to INPUT, or free it and set
*BUFFER_START to a pointer to another xmalloc-allocated block of
memory. */
memory.
PFILE is only used to generate diagnostics; setting it to NULL suppresses
diagnostics, and causes a return of NULL if there was any error instead. */
uchar *
_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
uchar *input, size_t size, size_t len,
@ -2158,17 +2191,27 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
to.text = XNEWVEC (uchar, to.asize);
to.len = 0;
if (!APPLY_CONVERSION (input_cset, input, len, &to))
cpp_error (pfile, CPP_DL_ERROR,
"failure to convert %s to %s",
CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
const bool ok = APPLY_CONVERSION (input_cset, input, len, &to);
free (input);
}
/* Clean up the mess. */
if (input_cset.func == convert_using_iconv)
iconv_close (input_cset.cd);
/* Clean up the mess. */
if (input_cset.func == convert_using_iconv)
iconv_close (input_cset.cd);
/* Handle conversion failure. */
if (!ok)
{
if (!pfile)
{
XDELETEVEC (to.text);
*buffer_start = NULL;
*st_size = 0;
return NULL;
}
cpp_error (pfile, CPP_DL_ERROR, "failure to convert %s to %s",
input_charset, SOURCE_CHARSET);
}
}
/* Resize buffer if we allocated substantially too much, or if we
haven't enough space for the \n-terminator or following
@ -2192,19 +2235,14 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
buffer = to.text;
*st_size = to.len;
#if HOST_CHARSET == HOST_CHARSET_ASCII
/* The HOST_CHARSET test just above ensures that the source charset
is UTF-8. So, ignore a UTF-8 BOM if we see one. Note that
glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
/* Ignore a UTF-8 BOM if we see one and the source charset is UTF-8. Note
that glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
BOM -- however, even if it did, we would still need this code due
to the 'convert_no_conversion' case. */
if (to.len >= 3 && to.text[0] == 0xef && to.text[1] == 0xbb
&& to.text[2] == 0xbf)
{
*st_size -= 3;
buffer += 3;
}
#endif
const int bom_len = cpp_check_utf8_bom ((const char *) to.text, to.len);
*st_size -= bom_len;
buffer += bom_len;
*buffer_start = to.text;
return buffer;
@ -2244,6 +2282,13 @@ _cpp_default_encoding (void)
return current_encoding;
}
/* Check if the configured input charset requires no conversion, other than
possibly stripping a UTF-8 BOM. */
bool cpp_input_conversion_is_trivial (const char *input_charset)
{
return !strcasecmp (input_charset, SOURCE_CHARSET);
}
/* Implementation of class cpp_string_location_reader. */
/* Constructor for cpp_string_location_reader. */

View File

@ -173,7 +173,7 @@ static bool pch_open_file (cpp_reader *pfile, _cpp_file *file,
static bool find_file_in_dir (cpp_reader *pfile, _cpp_file *file,
bool *invalid_pch, location_t loc);
static bool read_file_guts (cpp_reader *pfile, _cpp_file *file,
location_t loc);
location_t loc, const char *input_charset);
static bool read_file (cpp_reader *pfile, _cpp_file *file,
location_t loc);
static struct cpp_dir *search_path_head (cpp_reader *, const char *fname,
@ -671,9 +671,12 @@ _cpp_find_file (cpp_reader *pfile, const char *fname, cpp_dir *start_dir,
Use LOC for any diagnostics.
PFILE may be NULL. In this case, no diagnostics are issued.
FIXME: Flush file cache and try again if we run out of memory. */
static bool
read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc)
read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc,
const char *input_charset)
{
ssize_t size, total, count;
uchar *buf;
@ -681,8 +684,9 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc)
if (S_ISBLK (file->st.st_mode))
{
cpp_error_at (pfile, CPP_DL_ERROR, loc,
"%s is a block device", file->path);
if (pfile)
cpp_error_at (pfile, CPP_DL_ERROR, loc,
"%s is a block device", file->path);
return false;
}
@ -699,8 +703,9 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc)
does not bite us. */
if (file->st.st_size > INTTYPE_MAXIMUM (ssize_t))
{
cpp_error_at (pfile, CPP_DL_ERROR, loc,
"%s is too large", file->path);
if (pfile)
cpp_error_at (pfile, CPP_DL_ERROR, loc,
"%s is too large", file->path);
return false;
}
@ -733,29 +738,29 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc)
if (count < 0)
{
cpp_errno_filename (pfile, CPP_DL_ERROR, file->path, loc);
if (pfile)
cpp_errno_filename (pfile, CPP_DL_ERROR, file->path, loc);
free (buf);
return false;
}
if (regular && total != size && STAT_SIZE_RELIABLE (file->st))
if (pfile && regular && total != size && STAT_SIZE_RELIABLE (file->st))
cpp_error_at (pfile, CPP_DL_WARNING, loc,
"%s is shorter than expected", file->path);
file->buffer = _cpp_convert_input (pfile,
CPP_OPTION (pfile, input_charset),
input_charset,
buf, size + 16, total,
&file->buffer_start,
&file->st.st_size);
file->buffer_valid = true;
return true;
file->buffer_valid = file->buffer;
return file->buffer_valid;
}
/* Convenience wrapper around read_file_guts that opens the file if
necessary and closes the file descriptor after reading. FILE must
have been passed through find_file() at some stage. Use LOC for
any diagnostics. */
any diagnostics. Unlike read_file_guts(), PFILE may not be NULL. */
static bool
read_file (cpp_reader *pfile, _cpp_file *file, location_t loc)
{
@ -773,7 +778,8 @@ read_file (cpp_reader *pfile, _cpp_file *file, location_t loc)
return false;
}
file->dont_read = !read_file_guts (pfile, file, loc);
file->dont_read = !read_file_guts (pfile, file, loc,
CPP_OPTION (pfile, input_charset));
close (file->fd);
file->fd = -1;
@ -2145,3 +2151,25 @@ _cpp_has_header (cpp_reader *pfile, const char *fname, int angle_brackets,
return file->err_no != ENOENT;
}
/* Read a file and convert to input charset, the same as if it were being read
by a cpp_reader. */
cpp_converted_source
cpp_get_converted_source (const char *fname, const char *input_charset)
{
cpp_converted_source res = {};
_cpp_file file = {};
file.fd = -1;
file.name = lbasename (fname);
file.path = fname;
if (!open_file (&file))
return res;
const bool ok = read_file_guts (NULL, &file, 0, input_charset);
close (file.fd);
if (!ok)
return res;
res.to_free = (char *) file.buffer_start;
res.data = (char *) file.buffer;
res.len = file.st.st_size;
return res;
}

View File

@ -1379,6 +1379,20 @@ extern struct _cpp_file *cpp_get_file (cpp_buffer *);
extern cpp_buffer *cpp_get_prev (cpp_buffer *);
extern void cpp_clear_file_cache (cpp_reader *);
/* cpp_get_converted_source returns the contents of the given file, as it exists
after cpplib has read it and converted it from the input charset to the
source charset. Return struct will be zero-filled if the data could not be
read for any reason. The data starts at the DATA pointer, but the TO_FREE
pointer is what should be passed to free(), as there may be an offset. */
struct cpp_converted_source
{
char *to_free;
char *data;
size_t len;
};
cpp_converted_source cpp_get_converted_source (const char *fname,
const char *input_charset);
/* In pch.c */
struct save_macro_data;
extern int cpp_save_state (cpp_reader *, FILE *);
@ -1449,6 +1463,7 @@ class cpp_display_width_computation {
/* Convenience functions that are simple use cases for class
cpp_display_width_computation. Tab characters will be expanded to spaces
as determined by TABSTOP. */
int cpp_byte_column_to_display_column (const char *data, int data_length,
int column, int tabstop);
inline int cpp_display_width (const char *data, int data_length,
@ -1461,4 +1476,7 @@ int cpp_display_column_to_byte_column (const char *data, int data_length,
int display_col, int tabstop);
int cpp_wcwidth (cppchar_t c);
bool cpp_input_conversion_is_trivial (const char *input_charset);
int cpp_check_utf8_bom (const char *data, size_t data_length);
#endif /* ! LIBCPP_CPPLIB_H */