binutils-gdb/ld/ldexp.h
Alan Modra 9833b7757d PR28824, relro security issues
Background
==========
There are constraints on layout of binaries to meet demand paging and
memory protection requirements.  Demand paged binaries must have file
offset mod pagesize equal to vma mod pagesize.  Memory protection
(executable, read, write status) can only change at page boundaries.
The linker's MAXPAGESIZE variable gives the page size for these layout
constraints.

In a typical basic executable with two memory segments, text (RE) and
data (RW), the data segment must start on a different page to the
last text segment page.  For example, with 64k pages and a small
executable of 48k text and 1k data, the text segment might start at
address 0x10000 and data at 0x20000 for a total of two 64k memory
pages.  Demand paging would require the image on disk to be 64k+1k
in size.  We can do better than that.  If the data segment instead
starts at 0x2c000 (the end of the text segment plus one 64k page) then
there are still only two memory pages, but the disk image is now
smaller, 48k+1k in size.  This is why the linker normally starts the
data segment at the end of the text segment plus one page.  That
simple heuristic isn't ideal in all cases.  Changing our simple
example to one with 64k-1 text size, following that heuristic would
result in data starting at 0x2ffff.  Now we have two 64k memory data
pages for a data segment of 1k!  If the data segment instead started
at 0x30000 we'd get a single data segment page at the cost of 1 byte
extra in the disk image, which is likely a good trade-off.  So the
linker does adjust the simple heuristic.  Just how much disk image
size increase is allowed is controlled by the linker's COMMONPAGESIZE
variable.

A PT_GNU_RELRO segment overlays the initial part of the data segment,
saying that those pages should be made read-only after relocation by
the dynamic loader.  Page granularity for memory protection means that
the end of the relro segment must be at a page boundary.

The problem
===========
Unfortunately most targets currently only align the end of the relro
segment to COMMONPAGESIZE.  That results in only partial relro
protection if an executable is running with MAXPAGESIZE pages, since
any part of the relro segment past the last MAXPAGESIZE boundary can't
be made read-only without also affecting sections past the end of the
relro segment.  I believe this problem arose because x86 always runs
with 4k (COMMPAGESIZE) memory pages, and therefore using a larger
MAXPAGESIZE on x86 is for reasons other than the demand paging and
memory page protection boundary requirements.

The solution
============
Always end the relro segment on a MAXPAGESIZE boundary, except for
x86.  Note that the relro segment, comprising of sections at the start
of the data segment, is sized according to how those sections are laid
out.  That means the start of the relro segment is fixed relative to
its end.  Which also means the start of the data segment must be at a
fixed address mod MAXPAGESIZE.  So for relro the linker can't play
games with the start of the data segment to save disk space.  At
least, not without introducing gaps between the relro sections.  In
fact, because the linker was starting layout using its simple
heuristic of starting the data segment at the end of the text segment
plus one page, it was sometimes introducing page gaps for no reason.
See pr28743.

	PR 28824
	PR 28734
	* ldexp.c (fold_segment_align): When relro, don't adjust up by
	offset within page.  Set relropagesize.
	(fold_segment_relro_end): Align to relropagesize.
	* ldexp.h (seg_align_type): Rename pagesize to commonpagesize.
	Add relropagesize.  Comment.
	* ldlang.c (lang_size_segment): Adjust to suit field renaming.
	(lang_size_relro_segment_1): Align relro_end using relropagesize.
2022-02-13 14:00:56 +10:30

249 lines
6.4 KiB
C

/* ldexp.h -
Copyright (C) 1991-2022 Free Software Foundation, Inc.
This file is part of the GNU Binutils.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
MA 02110-1301, USA. */
#ifndef LDEXP_H
#define LDEXP_H
/* The result of an expression tree */
typedef struct {
bfd_vma value;
char *str;
asection *section;
bool valid_p;
} etree_value_type;
enum node_tree_enum {
etree_binary,
etree_trinary,
etree_unary,
etree_name,
etree_assign,
etree_provide,
etree_provided,
etree_value,
etree_assert,
etree_rel
};
typedef struct {
int node_code;
unsigned int lineno;
const char *filename;
enum node_tree_enum node_class;
} node_type;
typedef union etree_union {
node_type type;
struct {
node_type type;
union etree_union *lhs;
union etree_union *rhs;
} binary;
struct {
node_type type;
union etree_union *cond;
union etree_union *lhs;
union etree_union *rhs;
} trinary;
struct {
node_type type;
const char *dst;
union etree_union *src;
bool hidden;
} assign;
struct {
node_type type;
union etree_union *child;
} unary;
struct {
node_type type;
const char *name;
} name;
struct {
node_type type;
bfd_vma value;
char *str;
} value;
struct {
node_type type;
asection *section;
bfd_vma value;
} rel;
struct {
node_type type;
union etree_union *child;
const char *message;
} assert_s;
} etree_type;
/* Expression evaluation control. */
typedef enum
{
/* Parsing linker script. Will only return "valid" for expressions
that evaluate to a constant. */
lang_first_phase_enum,
/* Prior to section sizing. */
lang_mark_phase_enum,
/* During section sizing. */
lang_allocating_phase_enum,
/* During assignment of symbol values when relaxation in progress. */
lang_assigning_phase_enum,
/* Final assignment of symbol values. */
lang_final_phase_enum,
/* Run after symbol values have been fixed, for lang_map. */
lang_fixed_phase_enum
} lang_phase_type;
union lang_statement_union;
enum phase_enum {
/* We step through the first four states here as we see the
associated linker script tokens. */
exp_seg_none,
exp_seg_align_seen,
exp_seg_relro_seen,
exp_seg_end_seen,
/* The last three states are final, and affect the value returned
by XXX_SEGMENT_ALIGN. */
exp_seg_relro_adjust,
exp_seg_adjust,
exp_seg_done
};
enum relro_enum {
exp_seg_relro_none,
exp_seg_relro_start,
exp_seg_relro_end,
};
typedef struct {
enum phase_enum phase;
bfd_vma base, relro_offset, relro_end, end;
/* MAXPAGESIZE and COMMMONPAGESIZE as passed to DATA_SEGMENT_ALIGN.
relropagesize sets the alignment of the end of the relro segment. */
bfd_vma maxpagesize, commonpagesize, relropagesize;
enum relro_enum relro;
union lang_statement_union *relro_start_stat;
union lang_statement_union *relro_end_stat;
} seg_align_type;
struct ldexp_control {
/* Modify expression evaluation depending on this. */
lang_phase_type phase;
/* Principally used for diagnostics. */
bool assigning_to_dot;
/* Set if the current expression used "dot", SEGMENT_START or
ORIGIN, but not ABSOLUTE or combined symbols in a way that forces
an absolute result. Used in tracking symbols assigned from dot
outside of output section statements, in order to later convert
them from absolute. */
bool rel_from_abs;
/* If evaluating an assignment, the destination. Cleared if an
etree_name NAME matches this, to signal a self-assignment.
Note that an etree_name DEFINED does not clear this field, nor
does the false branch of a trinary expression. */
const char *assign_name;
/* If evaluating an assignment, the source if it is an expression
referencing single etree_name NAME, or a trinary expression where
the true branch references a single etree_name NAME. */
struct bfd_link_hash_entry *assign_src;
/* Working results. */
etree_value_type result;
bfd_vma dot;
/* Current dot and section passed to ldexp folder. */
bfd_vma *dotp;
asection *section;
/* State machine and results for DATASEG. */
seg_align_type dataseg;
};
extern struct ldexp_control expld;
/* A maps from a segment name to a base address. */
typedef struct segment_struct {
/* The next segment in the linked list. */
struct segment_struct *next;
/* The name of the sgement. */
const char *name;
/* The base address for the segment. */
bfd_vma value;
/* True if a SEGMENT_START directive corresponding to this segment
has been seen. */
bool used;
} segment_type;
/* The segments specified by the user on the command-line. */
extern segment_type *segments;
typedef struct _fill_type fill_type;
etree_type *exp_intop
(bfd_vma);
etree_type *exp_bigintop
(bfd_vma, char *);
etree_type *exp_relop
(asection *, bfd_vma);
void exp_fold_tree
(etree_type *, asection *, bfd_vma *);
void exp_fold_tree_no_dot
(etree_type *);
etree_type *exp_binop
(int, etree_type *, etree_type *);
etree_type *exp_trinop
(int,etree_type *, etree_type *, etree_type *);
etree_type *exp_unop
(int, etree_type *);
etree_type *exp_nameop
(int, const char *);
etree_type *exp_assign
(const char *, etree_type *, bool);
etree_type *exp_defsym
(const char *, etree_type *);
etree_type *exp_provide
(const char *, etree_type *, bool);
etree_type *exp_assert
(etree_type *, const char *);
void exp_print_tree
(etree_type *);
bfd_vma exp_get_vma
(etree_type *, bfd_vma, char *);
int exp_get_power
(etree_type *, char *);
fill_type *exp_get_fill
(etree_type *, fill_type *, char *);
bfd_vma exp_get_abs_int
(etree_type *, int, char *);
void ldexp_init (void);
void ldexp_finalize_syms (void);
bool ldexp_is_final_sym_absolute (const struct bfd_link_hash_entry *);
void ldexp_finish (void);
#endif