The 30_all_gcc34-arm-ldm-peephole.patch from Debian was conflicting 
with the newer 36_all_pr16201-fix.patch, so i cut out the hunk from 
it that was causing problems and grabbed an updated version from 
upstream cvs.

Index: gcc/config/arm/arm.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.432
retrieving revision 1.433
diff -u -r1.432 -r1.433
--- gcc-3.4.4/gcc/config/arm/arm.c	29 Mar 2005 03:00:23 -0000	1.432
+++ gcc-3.4.4/gcc/config/arm/arm.c	1 Apr 2005 11:02:22 -0000	1.433
@@ -5139,6 +5139,10 @@
 int
 adjacent_mem_locations (rtx a, rtx b)
 {
+  /* We don't guarantee to preserve the order of these memory refs.  */
+  if (volatile_refs_p (a) || volatile_refs_p (b))
+    return 0;
+
   if ((GET_CODE (XEXP (a, 0)) == REG
        || (GET_CODE (XEXP (a, 0)) == PLUS
 	   && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
@@ -5178,6 +5182,17 @@
 	return 0;
 
       val_diff = val1 - val0;
+
+      if (arm_ld_sched)
+	{
+	  /* If the target has load delay slots, then there's no benefit
+	     to using an ldm instruction unless the offset is zero and
+	     we are optimizing for size.  */
+	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
+		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
+		  && (val_diff == 4 || val_diff == -4));
+	}
+
       return ((REGNO (reg0) == REGNO (reg1))
 	      && (val_diff == 4 || val_diff == -4));
     }