Patch to improve MIPS call stack unwind performance by caching the results
of code reading.
by Dan Howell <dahowell@directv.com>

diff -urN mpatrol-uclibc/src/stack.c mpatrol-unwindcache/src/stack.c
--- mpatrol-uclibc/src/stack.c	2006-06-22 15:39:04.000000000 -0700
+++ mpatrol-unwindcache/src/stack.c	2006-06-22 15:42:20.000000000 -0700
@@ -68,6 +68,7 @@
 #define ucontext asm_ucontext
 #include <asm/ucontext.h>
 #undef ucontext
+#include "heap.h"
 #endif /* ARCH */
 #endif /* SYSTEM */
 #endif /* TARGET */
@@ -280,6 +281,136 @@
 
 #if !MP_BUILTINSTACK_SUPPORT && !MP_LIBRARYSTACK_SUPPORT
 #if TARGET == TARGET_UNIX && ARCH == ARCH_MIPS
+/* Set up a tree to cache the results of code searching to determine the
+   location of the return address for each code point encountered. */
+
+/* An unwind node belongs to a binary search tree of nodes, ordered by
+ * code address, and contains call stack unwinding details for a given
+ * code address. An internal index node stores details of a single memory
+ * block allocated for unwind node slots.
+ */
+typedef union unwindnode
+{
+    struct
+    {
+        treenode node;   /* internal tree node */
+        void *block;     /* pointer to block of memory */
+        size_t size;     /* size of block of memory */
+    }
+    index;
+    struct
+    {
+        treenode node;   /* tree node */
+        long p;          /* return address offset in the stack */
+        long m;          /* frame pointer offset in stack */
+        long s;          /* stack pointer offset from previous frame */
+        unsigned long a; /* flags */
+    }
+    data;
+}
+unwindnode;
+
+/* An unwindhead holds the table of address node slots as well as the
+ * internal list of memory blocks allocated for address node slots.
+ */
+typedef struct unwindhead
+{
+    heaphead heap;       /* pointer to heap */
+    slottable table;     /* table of address nodes */
+    treeroot itree;      /* internal list of memory blocks */
+    treeroot dtree;      /* tree for sorting */
+    size_t size;         /* memory used by internal blocks */
+    char init;           /* initialization flag */
+}
+unwindhead;
+
+static unwindhead unwindcache;
+
+/* Initialise the fields of an unwindhead so that there are no allocated,
+ * freed or free blocks.
+ */
+
+static
+void
+newunwindcache(void)
+{
+    struct { char x; unwindnode y; } z;
+    long n;
+
+    __mp_newheap(&unwindcache.heap);
+    /* Determine the minimum alignment for an unwind node on this
+     * system and force the alignment to be a power of two.  This
+     * information is used when initialising the slot table.
+     */
+    n = (char *) &z.y - &z.x;
+    __mp_newslots(&unwindcache.table, sizeof(unwindnode), __mp_poweroftwo(n));
+    __mp_newtree(&unwindcache.itree);
+    __mp_newtree(&unwindcache.dtree);
+    unwindcache.size = 0;
+    unwindcache.init = 1;
+}
+
+
+/* Forget all unwind information.
+ */
+
+static
+void
+deleteunwindcache(void)
+{
+    /* We don't need to explicitly free any memory as this is dealt with
+     * at a lower level by the heap manager.
+     */
+    __mp_deleteheap(&unwindcache.heap);
+    unwindcache.table.free = NULL;
+    unwindcache.table.size = 0;
+    __mp_newtree(&unwindcache.itree);
+    __mp_newtree(&unwindcache.dtree);
+    unwindcache.size = 0;
+    unwindcache.init = 0;
+}
+
+
+/* Allocate a new unwind node.
+ */
+
+static
+unwindnode *
+getunwindnode(void)
+{
+    unwindnode *n;
+    heapnode *p;
+
+    /* If we have no more allocation node slots left then we must allocate
+     * some more memory for them.  An extra MP_ALLOCFACTOR pages of memory
+     * should suffice.
+     */
+    if ((n = (unwindnode *) __mp_getslot(&unwindcache.table)) == NULL)
+    {
+        if ((p = __mp_heapalloc(&unwindcache.heap, unwindcache.heap.memory.page * MP_ALLOCFACTOR,
+              unwindcache.table.entalign, 1)) == NULL)
+            return NULL;
+        __mp_initslots(&unwindcache.table, p->block, p->size);
+        n = (unwindnode *) __mp_getslot(&unwindcache.table);
+        __mp_treeinsert(&unwindcache.itree, &n->index.node, (unsigned long) p->block);
+        n->index.block = p->block;
+        n->index.size = p->size;
+        unwindcache.size += p->size;
+        n = (unwindnode *) __mp_getslot(&unwindcache.table);
+    }
+    return n;
+}
+
+/* Search for the unwind node associated with a given address.
+ */
+static
+unwindnode *
+findunwindnode(unsigned long p)
+{
+    return (unwindnode *) __mp_search(unwindcache.dtree.root, p);
+}
+
+
 /* Determine the stack pointer and return address of the previous stack frame
  * by performing code reading.
  */
@@ -289,8 +420,9 @@
 unwind(frameinfo *f)
 {
     long p, m, s;
-    unsigned long a, i, q, t, b, r;
+    unsigned long a, i, q, t, b, r, k;
     unsigned short l, u;
+    unwindnode *n = NULL;
 
     s = -1;
     p = m = 0;
@@ -322,7 +454,23 @@
 #endif
     /* Save initial code-reading starting point.
      */
-    r = f->ra;
+    r = k = f->ra;
+    /* Create the cache if not yet created.
+     */
+    if (!unwindcache.init)
+    {
+        newunwindcache();
+        __mp_atexit(deleteunwindcache);
+    }
+    if ((n = findunwindnode(f->ra)) != NULL)
+    {
+        /* We've been here before, so get the cached information.
+         */
+        p = n->data.p;
+        m = n->data.m;
+        s = n->data.s;
+        a = n->data.a;
+    }
     /* Search for the return address offset in the stack frame.
      */
     while (!((a & RA_OFFSET) && (a & SP_OFFSET)) && (f->ra < q))
@@ -478,6 +626,19 @@
         return 1;
     }
 #endif
+    if (n == NULL)
+    {
+        if ((n = getunwindnode()) != NULL)
+        {
+            /* Cache the information we just got in the tree.
+             */
+            n->data.p = p;
+            n->data.m = m;
+            n->data.s = s;
+            n->data.a = a;
+            __mp_treeinsert(&unwindcache.dtree, &n->data.node, k);
+        }
+    }
     if (a & SP_IN_FP)
         f->sp = f->fp;
     if (m > 0)