libbacktrace: don't get confused by overlapping address ranges

Fixes https://github.com/ianlancetaylor/libbacktrace/issues/137. * dwarf.c (resolve_unit_addrs_overlap_walk): New static function. (resolve_unit_addrs_overlap): New static function. (build_dwarf_data): Call resolve_unit_addrs_overlap.
2024-11-26 21:33:59 +08:00 · 2024-10-18 13:02:21 -07:00 · 2024-10-18 13:02:21 -07:00 · f8687bceaa
commit f8687bceaa
parent aaa855fac0
1 changed files with 199 additions and 15 deletions
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@ -1276,6 +1276,194 @@ unit_addrs_search (const void *vkey, const void *ventry)
    return 0;
 }

+/* Fill in overlapping ranges as needed.  This is a subroutine of
+   resolve_unit_addrs_overlap.  */
+
+static int
+resolve_unit_addrs_overlap_walk (struct backtrace_state *state,
+				 size_t *pfrom, size_t *pto,
+				 struct unit_addrs *enclosing,
+				 struct unit_addrs_vector *old_vec,
+				 backtrace_error_callback error_callback,
+				 void *data,
+				 struct unit_addrs_vector *new_vec)
+{
+  struct unit_addrs *old_addrs;
+  size_t old_count;
+  struct unit_addrs *new_addrs;
+  size_t from;
+  size_t to;
+
+  old_addrs = (struct unit_addrs *) old_vec->vec.base;
+  old_count = old_vec->count;
+  new_addrs = (struct unit_addrs *) new_vec->vec.base;
+
+  for (from = *pfrom, to = *pto; from < old_count; from++, to++)
+    {
+      /* If we are in the scope of a larger range that can no longer
+	 cover any further ranges, return back to the caller.  */
+
+      if (enclosing != NULL
+	  && enclosing->high <= old_addrs[from].low)
+	{
+	  *pfrom = from;
+	  *pto = to;
+	  return 1;
+	}
+
+      new_addrs[to] = old_addrs[from];
+
+      /* If we are in scope of a larger range, fill in any gaps
+	 between this entry and the next one.
+
+	 There is an extra entry at the end of the vector, so it's
+	 always OK to refer to from + 1.  */
+
+      if (enclosing != NULL
+	  && enclosing->high > old_addrs[from].high
+	  && old_addrs[from].high < old_addrs[from + 1].low)
+	{
+	  void *grew;
+	  size_t new_high;
+
+	  grew = backtrace_vector_grow (state, sizeof (struct unit_addrs),
+					error_callback, data, &new_vec->vec);
+	  if (grew == NULL)
+	    return 0;
+	  new_addrs = (struct unit_addrs *) new_vec->vec.base;
+	  to++;
+	  new_addrs[to].low = old_addrs[from].high;
+	  new_high = old_addrs[from + 1].low;
+	  if (enclosing->high < new_high)
+	    new_high = enclosing->high;
+	  new_addrs[to].high = new_high;
+	  new_addrs[to].u = enclosing->u;
+	}
+
+      /* If this range has a larger scope than the next one, use it to
+	 fill in any gaps.  */
+
+      if (old_addrs[from].high > old_addrs[from + 1].high)
+	{
+	  *pfrom = from + 1;
+	  *pto = to + 1;
+	  if (!resolve_unit_addrs_overlap_walk (state, pfrom, pto,
+						&old_addrs[from], old_vec,
+						error_callback, data, new_vec))
+	    return 0;
+	  from = *pfrom;
+	  to = *pto;
+
+	  /* Undo the increment the loop is about to do.  */
+	  from--;
+	  to--;
+	}
+    }
+
+  if (enclosing == NULL)
+    {
+      struct unit_addrs *pa;
+
+      /* Add trailing entry.  */
+
+      pa = ((struct unit_addrs *)
+	    backtrace_vector_grow (state, sizeof (struct unit_addrs),
+				   error_callback, data, &new_vec->vec));
+      if (pa == NULL)
+	return 0;
+      pa->low = 0;
+      --pa->low;
+      pa->high = pa->low;
+      pa->u = NULL;
+
+      new_vec->count = to;
+    }
+
+  return 1;
+}
+
+/* It is possible for the unit_addrs list to contain overlaps, as in
+
+       10: low == 10, high == 20, unit 1
+       11: low == 12, high == 15, unit 2
+       12: low == 20, high == 30, unit 1
+
+   In such a case, for pc == 17, a search using units_addr_search will
+   return entry 11.  However, pc == 17 doesn't fit in that range.  We
+   actually want range 10.
+
+   It seems that in general we might have an arbitrary number of
+   ranges in between 10 and 12.
+
+   To handle this we look for cases where range R1 is followed by
+   range R2 such that R2 is a strict subset of R1.  In such cases we
+   insert a new range R3 following R2 that fills in the remainder of
+   the address space covered by R1.  That lets a relatively simple
+   search find the correct range.
+
+   These overlaps can occur because of the range merging we do in
+   add_unit_addr.  When the linker de-duplicates functions, it can
+   leave behind an address range that refers to the address range of
+   the retained duplicate.  If the retained duplicate address range is
+   merged with others, then after sorting we can see overlapping
+   address ranges.
+
+   See https://github.com/ianlancetaylor/libbacktrace/issues/137.  */
+
+static int
+resolve_unit_addrs_overlap (struct backtrace_state *state,
+			    backtrace_error_callback error_callback,
+			    void *data, struct unit_addrs_vector *addrs_vec)
+{
+  struct unit_addrs *addrs;
+  size_t count;
+  int found;
+  struct unit_addrs *entry;
+  size_t i;
+  struct unit_addrs_vector new_vec;
+  void *grew;
+  size_t from;
+  size_t to;
+
+  addrs = (struct unit_addrs *) addrs_vec->vec.base;
+  count = addrs_vec->count;
+
+  if (count == 0)
+    return 1;
+
+  /* Optimistically assume that overlaps are rare.  */
+  found = 0;
+  entry = addrs;
+  for (i = 0; i < count - 1; i++)
+    {
+      if (entry->low < (entry + 1)->low
+	  && entry->high > (entry + 1)->high)
+	{
+	  found = 1;
+	  break;
+	}
+      entry++;
+    }
+  if (!found)
+    return 1;
+
+  memset (&new_vec, 0, sizeof new_vec);
+  grew = backtrace_vector_grow (state,
+				count * sizeof (struct unit_addrs),
+				error_callback, data, &new_vec.vec);
+  if (grew == NULL)
+    return 0;
+
+  from = 0;
+  to = 0;
+  resolve_unit_addrs_overlap_walk (state, &from, &to, NULL, addrs_vec,
+				   error_callback, data, &new_vec);
+  backtrace_vector_free (state, &addrs_vec->vec, error_callback, data);
+  *addrs_vec = new_vec;
+
+  return 1;
+}
+
 /* Sort the line vector by PC.  We want a stable sort here to maintain
   the order of lines for the same PC values.  Since the sequence is
   being sorted in place, their addresses cannot be relied on to
@ -2980,7 +3168,7 @@ read_line_info (struct backtrace_state *state, struct dwarf_data *ddata,

  if (vec.count == 0)
    {
-      /* This is not a failure in the sense of a generating an error,
+      /* This is not a failure in the sense of generating an error,
 	 but it is a failure in that sense that we have no useful
 	 information.  */
      goto fail;
@ -3966,11 +4154,7 @@ build_dwarf_data (struct backtrace_state *state,
 		  void *data)
 {
  struct unit_addrs_vector addrs_vec;
-  struct unit_addrs *addrs;
-  size_t addrs_count;
  struct unit_vector units_vec;
-  struct unit **units;
-  size_t units_count;
  struct dwarf_data *fdata;

  if (!build_address_map (state, base_address, dwarf_sections, is_bigendian,
@ -3982,12 +4166,12 @@ build_dwarf_data (struct backtrace_state *state,
    return NULL;
  if (!backtrace_vector_release (state, &units_vec.vec, error_callback, data))
    return NULL;
-  addrs = (struct unit_addrs *) addrs_vec.vec.base;
-  units = (struct unit **) units_vec.vec.base;
-  addrs_count = addrs_vec.count;
-  units_count = units_vec.count;
-  backtrace_qsort (addrs, addrs_count, sizeof (struct unit_addrs),
-		   unit_addrs_compare);
+
+  backtrace_qsort ((struct unit_addrs *) addrs_vec.vec.base, addrs_vec.count,
+		   sizeof (struct unit_addrs), unit_addrs_compare);
+  if (!resolve_unit_addrs_overlap (state, error_callback, data, &addrs_vec))
+    return NULL;
+
  /* No qsort for units required, already sorted.  */

  fdata = ((struct dwarf_data *)
@ -3999,10 +4183,10 @@ build_dwarf_data (struct backtrace_state *state,
  fdata->next = NULL;
  fdata->altlink = altlink;
  fdata->base_address = base_address;
-  fdata->addrs = addrs;
-  fdata->addrs_count = addrs_count;
-  fdata->units = units;
-  fdata->units_count = units_count;
+  fdata->addrs = (struct unit_addrs *) addrs_vec.vec.base;
+  fdata->addrs_count = addrs_vec.count;
+  fdata->units = (struct unit **) units_vec.vec.base;
+  fdata->units_count = units_vec.count;
  fdata->dwarf_sections = *dwarf_sections;
  fdata->is_bigendian = is_bigendian;
  memset (&fdata->fvec, 0, sizeof fdata->fvec);