From c77c9625dd63138512ce0f67e07dd254771e566f Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 16 Nov 2012 01:15:28 +0100
Subject: [PATCH 2/2] Add aarch64 support from upstream

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 ChangeLog                |   27 ++
 Makefile.am              |    4 +
 Makefile.in              |   44 +-
 README                   |    3 +
 configure                |  185 +++++---
 configure.ac             |    5 +
 src/aarch64/ffi.c        | 1076 ++++++++++++++++++++++++++++++++++++++++++++++
 src/aarch64/ffitarget.h  |   59 +++
 src/aarch64/sysv.S       |  307 +++++++++++++
 testsuite/lib/libffi.exp |    4 +
 10 files changed, 1647 insertions(+), 67 deletions(-)
 create mode 100644 src/aarch64/ffi.c
 create mode 100644 src/aarch64/ffitarget.h
 create mode 100644 src/aarch64/sysv.S

diff --git a/ChangeLog b/ChangeLog
index 376edf7..4e8ea91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+2012-10-30  James Greenhalgh  <james.greenhalgh at arm.com>
+            Marcus Shawcroft  <marcus.shawcroft at arm.com>
+
+        * README: Add details of aarch64 port.
+        * src/aarch64/ffi.c: New.
+        * src/aarch64/ffitarget.h: Likewise.
+        * src/aarch64/sysv.S: Likewise.
+	* Makefile.am: Support aarch64.
+	* configure.ac: Support aarch64.
+	* Makefile.in, configure: Rebuilt.
+
+2012-10-30  James Greenhalgh  <james.greenhalgh at arm.com>
+            Marcus Shawcroft  <marcus.shawcroft at arm.com>
+
+        * testsuite/lib/libffi.exp: Add support for aarch64.
+        * testsuite/libffi.call/cls_struct_va1.c: New.
+        * testsuite/libffi.call/cls_uchar_va.c: Likewise.
+        * testsuite/libffi.call/cls_uint_va.c: Likewise.
+        * testsuite/libffi.call/cls_ulong_va.c: Liekwise.
+        * testsuite/libffi.call/cls_ushort_va.c: Likewise.
+        * testsuite/libffi.call/nested_struct11.c: Likewise.
+        * testsuite/libffi.call/uninitialized.c: Likewise.
+        * testsuite/libffi.call/va_1.c: Likewise.
+        * testsuite/libffi.call/va_struct1.c: Likewise.
+        * testsuite/libffi.call/va_struct2.c: Likewise.
+        * testsuite/libffi.call/va_struct3.c: Likewise.
+
 2012-04-23  Alexandre Keunecke I. de Mendonca <alexandre.keunecke@gmail.com>
 
 	* configure.ac: Add Blackfin/sysv support
diff --git a/Makefile.am b/Makefile.am
index 16f32a6..bd4d5c4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -5,6 +5,7 @@ AUTOMAKE_OPTIONS = foreign subdir-objects
 SUBDIRS = include testsuite man
 
 EXTRA_DIST = LICENSE ChangeLog.v1 ChangeLog.libgcj configure.host \
+	src/aarch64/ffi.c src/aarch64/ffitarget.h \
 	src/alpha/ffi.c src/alpha/osf.S src/alpha/ffitarget.h \
 	src/arm/ffi.c src/arm/sysv.S src/arm/ffitarget.h \
 	src/avr32/ffi.c src/avr32/sysv.S src/avr32/ffitarget.h \
@@ -151,6 +152,9 @@ endif
 if POWERPC_FREEBSD
 nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S
 endif
+if AARCH64
+nodist_libffi_la_SOURCES += src/aarch64/sysv.S src/aarch64/ffi.c
+endif
 if ARM
 nodist_libffi_la_SOURCES += src/arm/sysv.S src/arm/ffi.c
 if FFI_EXEC_TRAMPOLINE_TABLE
diff --git a/Makefile.in b/Makefile.in
index f5c10af..c4f4470 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -65,9 +65,10 @@ target_triplet = @target@
 @SH64_TRUE@am__append_27 = src/sh64/sysv.S src/sh64/ffi.c
 @PA_LINUX_TRUE@am__append_28 = src/pa/linux.S src/pa/ffi.c
 @PA_HPUX_TRUE@am__append_29 = src/pa/hpux32.S src/pa/ffi.c
+@AARCH64_TRUE@am__append_30 = src/aarch64/sysv.S src/aarch64/ffi.c
 # Build debug. Define FFI_DEBUG on the commandline so that, when building with
 # MSVC, it can link against the debug CRT.
-@FFI_DEBUG_TRUE@am__append_30 = -DFFI_DEBUG
+@FFI_DEBUG_TRUE@am__append_31 = -DFFI_DEBUG
 subdir = .
 DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
 	$(srcdir)/Makefile.in $(srcdir)/doc/stamp-vti \
@@ -156,6 +157,7 @@ am_libffi_la_OBJECTS = src/prep_cif.lo src/types.lo src/raw_api.lo \
 @SH64_TRUE@am__objects_27 = src/sh64/sysv.lo src/sh64/ffi.lo
 @PA_LINUX_TRUE@am__objects_28 = src/pa/linux.lo src/pa/ffi.lo
 @PA_HPUX_TRUE@am__objects_29 = src/pa/hpux32.lo src/pa/ffi.lo
+@AARCH64_TRUE@am__objects_29 = src/aarch64/sysv.lo src/aarch64/ffi.lo
 nodist_libffi_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
 	$(am__objects_3) $(am__objects_4) $(am__objects_5) \
 	$(am__objects_6) $(am__objects_7) $(am__objects_8) \
@@ -165,17 +167,18 @@ nodist_libffi_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
 	$(am__objects_18) $(am__objects_19) $(am__objects_20) \
 	$(am__objects_21) $(am__objects_22) $(am__objects_23) \
 	$(am__objects_24) $(am__objects_25) $(am__objects_26) \
-	$(am__objects_27) $(am__objects_28) $(am__objects_29)
+	$(am__objects_27) $(am__objects_28) $(am__objects_29) \
+	$(am__objects_30)
 libffi_la_OBJECTS = $(am_libffi_la_OBJECTS) \
 	$(nodist_libffi_la_OBJECTS)
 libffi_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
 	$(libffi_la_LDFLAGS) $(LDFLAGS) -o $@
 libffi_convenience_la_LIBADD =
-am__objects_30 = src/prep_cif.lo src/types.lo src/raw_api.lo \
+am__objects_31 = src/prep_cif.lo src/types.lo src/raw_api.lo \
 	src/java_raw_api.lo src/closures.lo
-am_libffi_convenience_la_OBJECTS = $(am__objects_30)
-am__objects_31 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \
+am_libffi_convenience_la_OBJECTS = $(am__objects_31)
+am__objects_32 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \
 	$(am__objects_4) $(am__objects_5) $(am__objects_6) \
 	$(am__objects_7) $(am__objects_8) $(am__objects_9) \
 	$(am__objects_10) $(am__objects_11) $(am__objects_12) \
@@ -185,7 +188,7 @@ am__objects_31 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \
 	$(am__objects_22) $(am__objects_23) $(am__objects_24) \
 	$(am__objects_25) $(am__objects_26) $(am__objects_27) \
 	$(am__objects_28) $(am__objects_29)
-nodist_libffi_convenience_la_OBJECTS = $(am__objects_31)
+nodist_libffi_convenience_la_OBJECTS = $(am__objects_32)
 libffi_convenience_la_OBJECTS = $(am_libffi_convenience_la_OBJECTS) \
 	$(nodist_libffi_convenience_la_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@
@@ -410,6 +413,7 @@ top_srcdir = @top_srcdir@
 AUTOMAKE_OPTIONS = foreign subdir-objects
 SUBDIRS = include testsuite man
 EXTRA_DIST = LICENSE ChangeLog.v1 ChangeLog.libgcj configure.host \
+	src/aarch64/ffi.c src/aarch64/ffitarget.h \
 	src/alpha/ffi.c src/alpha/osf.S src/alpha/ffitarget.h \
 	src/arm/ffi.c src/arm/sysv.S src/arm/ffitarget.h \
 	src/avr32/ffi.c src/avr32/sysv.S src/avr32/ffitarget.h \
@@ -501,10 +505,11 @@ nodist_libffi_la_SOURCES = $(am__append_1) $(am__append_2) \
 	$(am__append_18) $(am__append_19) $(am__append_20) \
 	$(am__append_21) $(am__append_22) $(am__append_23) \
 	$(am__append_24) $(am__append_25) $(am__append_26) \
-	$(am__append_27) $(am__append_28) $(am__append_29)
+	$(am__append_27) $(am__append_28) $(am__append_29) \
+	$(am__append_30)
 libffi_convenience_la_SOURCES = $(libffi_la_SOURCES)
 nodist_libffi_convenience_la_SOURCES = $(nodist_libffi_la_SOURCES)
-AM_CFLAGS = -g $(am__append_30)
+AM_CFLAGS = -g $(am__append_31)
 libffi_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` $(LTLDFLAGS) $(AM_LTLDFLAGS)
 AM_CPPFLAGS = -I. -I$(top_srcdir)/include -Iinclude -I$(top_srcdir)/src -DFFI_BUILDING
 AM_CCASFLAGS = $(AM_CPPFLAGS) -g
@@ -640,6 +645,16 @@ src/bfin/ffi.lo: src/bfin/$(am__dirstamp) \
 	src/bfin/$(DEPDIR)/$(am__dirstamp)
 src/bfin/sysv.lo: src/bfin/$(am__dirstamp) \
 	src/bfin/$(DEPDIR)/$(am__dirstamp)
+src/aarch64/$(am__dirstamp):
+	@$(MKDIR_P) src/aarch64
+	@: > src/aarch64/$(am__dirstamp)
+src/aarch64/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) src/aarch64/$(DEPDIR)
+	@: > src/aarch64/$(DEPDIR)/$(am__dirstamp)
+src/aarch64/ffi.lo: src/aarch64/$(am__dirstamp) \
+	src/aarch64/$(DEPDIR)/$(am__dirstamp)
+src/aarch64/sysv.lo: src/aarch64/$(am__dirstamp) \
+	src/aarch64/$(DEPDIR)/$(am__dirstamp)
 src/x86/$(am__dirstamp):
 	@$(MKDIR_P) src/x86
 	@: > src/x86/$(am__dirstamp)
@@ -859,6 +874,10 @@ mostlyclean-compile:
 	-rm -f src/bfin/ffi.lo
 	-rm -f src/bfin/sysv.$(OBJEXT)
 	-rm -f src/bfin/sysv.lo
+	-rm -f src/aarch64/ffi.$(OBJEXT)
+	-rm -f src/aarch64/ffi.lo
+	-rm -f src/aarch64/sysv.$(OBJEXT)
+	-rm -f src/aarch64/sysv.lo
 	-rm -f src/closures.$(OBJEXT)
 	-rm -f src/closures.lo
 	-rm -f src/cris/ffi.$(OBJEXT)
@@ -973,6 +992,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/prep_cif.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/raw_api.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/types.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/aarch64/$(DEPDIR)/ffi.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/aarch64/$(DEPDIR)/sysv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/alpha/$(DEPDIR)/ffi.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/alpha/$(DEPDIR)/osf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/arm/$(DEPDIR)/ffi.Plo@am__quote@
@@ -1083,6 +1104,7 @@ mostlyclean-libtool:
 clean-libtool:
 	-rm -rf .libs _libs
 	-rm -rf src/.libs src/_libs
+	-rm -rf src/aarch64/.libs src/aarch64/_libs
 	-rm -rf src/alpha/.libs src/alpha/_libs
 	-rm -rf src/arm/.libs src/arm/_libs
 	-rm -rf src/avr32/.libs src/avr32/_libs
@@ -1635,6 +1657,8 @@ distclean-generic:
 	-rm -f doc/$(am__dirstamp)
 	-rm -f src/$(DEPDIR)/$(am__dirstamp)
 	-rm -f src/$(am__dirstamp)
+	-rm -f src/aarch64/$(DEPDIR)/$(am__dirstamp)
+	-rm -f src/aarch64/$(am__dirstamp)
 	-rm -f src/alpha/$(DEPDIR)/$(am__dirstamp)
 	-rm -f src/alpha/$(am__dirstamp)
 	-rm -f src/arm/$(DEPDIR)/$(am__dirstamp)
@@ -1682,7 +1706,7 @@ clean-am: clean-aminfo clean-generic clean-libLTLIBRARIES \
 
 distclean: distclean-recursive
 	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-	-rm -rf src/$(DEPDIR) src/alpha/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/x86/$(DEPDIR)
+	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/x86/$(DEPDIR)
 	-rm -f Makefile
 distclean-am: clean-am distclean-compile distclean-generic \
 	distclean-hdr distclean-libtool distclean-tags
@@ -1802,7 +1826,7 @@ installcheck-am:
 maintainer-clean: maintainer-clean-recursive
 	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
 	-rm -rf $(top_srcdir)/autom4te.cache
-	-rm -rf src/$(DEPDIR) src/alpha/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/x86/$(DEPDIR)
+	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/x86/$(DEPDIR)
 	-rm -f Makefile
 maintainer-clean-am: distclean-am maintainer-clean-aminfo \
 	maintainer-clean-generic maintainer-clean-vti
diff --git a/README b/README
index ec240a4..9aa99f4 100644
--- a/README
+++ b/README
@@ -51,6 +51,7 @@ tested:
 |--------------+------------------|
 | Architecture | Operating System |
 |--------------+------------------|
+| AArch64         | Linux            |
 | Alpha        | Linux            |
 | Alpha        | Tru64            |
 | ARM          | Linux            |
@@ -151,6 +152,7 @@ See the ChangeLog files for details.
 
 3.0.12 XXX-XX-XX
         Add Blackfin support.
+	Add AArch64 support.
 
 3.0.11 Apr-11-12
         Add support for variadic functions (ffi_prep_cif_var).
@@ -320,6 +322,7 @@ Thorup.
 Major processor architecture ports were contributed by the following
 developers:
 
+aarch64		Marcus Shawcroft, James Greenhalgh
 alpha		Richard Henderson
 arm		Raffaele Sena
 cris		Simon Posnjak, Hans-Peter Nilsson
diff --git a/configure b/configure
index 4ccba55..419275b 100755
--- a/configure
+++ b/configure
@@ -649,6 +649,8 @@ AVR32_FALSE
 AVR32_TRUE
 ARM_FALSE
 ARM_TRUE
+AARCH64_FALSE
+AARCH64_TRUE
 POWERPC_FREEBSD_FALSE
 POWERPC_FREEBSD_TRUE
 POWERPC_DARWIN_FALSE
@@ -1478,7 +1480,7 @@ Optional Features:
 Optional Packages:
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
   --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
-  --with-pic              try to use only PIC/non-PIC objects [default=use
+  --with-pic[=PKGS]       try to use only PIC/non-PIC objects [default=use
                           both]
   --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
   --with-sysroot=DIR Search for dependent libraries within DIR
@@ -5276,6 +5278,11 @@ else
     lt_cv_sys_max_cmd_len=196608
     ;;
 
+  os2*)
+    # The test takes a long time on OS/2.
+    lt_cv_sys_max_cmd_len=8192
+    ;;
+
   osf*)
     # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
     # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
@@ -5315,7 +5322,7 @@ else
       # If test is not a shell built-in, we'll probably end up computing a
       # maximum length that is only half of the actual maximum length, but
       # we can't tell.
-      while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \
+      while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
 	         = "X$teststring$teststring"; } >/dev/null 2>&1 &&
 	      test $i != 17 # 1/2 MB should be enough
       do
@@ -5744,7 +5751,7 @@ irix5* | irix6* | nonstopux*)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
-# This must be Linux ELF.
+# This must be glibc/ELF.
 linux* | k*bsd*-gnu | kopensolaris*-gnu)
   lt_cv_deplibs_check_method=pass_all
   ;;
@@ -6538,6 +6545,7 @@ for ac_symprfx in "" "_"; do
     # which start with @ or ?.
     lt_cv_sys_global_symbol_pipe="$AWK '"\
 "     {last_section=section; section=\$ 3};"\
+"     /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
 "     /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
 "     \$ 0!~/External *\|/{next};"\
 "     / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
@@ -6926,7 +6934,7 @@ $as_echo "$lt_cv_cc_needs_belf" >&6; }
     CFLAGS="$SAVE_CFLAGS"
   fi
   ;;
-sparc*-*solaris*)
+*-*solaris*)
   # Find out which ABI we are using.
   echo 'int i;' > conftest.$ac_ext
   if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
@@ -6937,7 +6945,20 @@ sparc*-*solaris*)
     case `/usr/bin/file conftest.o` in
     *64-bit*)
       case $lt_cv_prog_gnu_ld in
-      yes*) LD="${LD-ld} -m elf64_sparc" ;;
+      yes*)
+        case $host in
+        i?86-*-solaris*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        sparc*-*-solaris*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+        esac
+        # GNU ld 2.21 introduced _sol2 emulations.  Use them if available.
+        if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+          LD="${LD-ld}_sol2"
+        fi
+        ;;
       *)
 	if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
 	  LD="${LD-ld} -64"
@@ -7577,7 +7598,13 @@ else
 	$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
 	  -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
         _lt_result=$?
-	if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then
+	# If there is a non-empty error log, and "single_module"
+	# appears in it, assume the flag caused a linker warning
+        if test -s conftest.err && $GREP single_module conftest.err; then
+	  cat conftest.err >&5
+	# Otherwise, if the output was created with a 0 exit code from
+	# the compiler, it worked.
+	elif test -f libconftest.dylib && test $_lt_result -eq 0; then
 	  lt_cv_apple_cc_single_mod=yes
 	else
 	  cat conftest.err >&5
@@ -7588,6 +7615,7 @@ else
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5
 $as_echo "$lt_cv_apple_cc_single_mod" >&6; }
+
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
 $as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
 if ${lt_cv_ld_exported_symbols_list+:} false; then :
@@ -7620,6 +7648,7 @@ rm -f core conftest.err conftest.$ac_objext \
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5
 $as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
+
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
 $as_echo_n "checking for -force_load linker flag... " >&6; }
 if ${lt_cv_ld_force_load+:} false; then :
@@ -7641,7 +7670,9 @@ _LT_EOF
       echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5
       $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
       _lt_result=$?
-      if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then
+      if test -s conftest.err && $GREP force_load conftest.err; then
+	cat conftest.err >&5
+      elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
 	lt_cv_ld_force_load=yes
       else
 	cat conftest.err >&5
@@ -8046,7 +8077,22 @@ fi
 
 # Check whether --with-pic was given.
 if test "${with_pic+set}" = set; then :
-  withval=$with_pic; pic_mode="$withval"
+  withval=$with_pic; lt_p=${PACKAGE-default}
+    case $withval in
+    yes|no) pic_mode=$withval ;;
+    *)
+      pic_mode=default
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for lt_pkg in $withval; do
+	IFS="$lt_save_ifs"
+	if test "X$lt_pkg" = "X$lt_p"; then
+	  pic_mode=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
 else
   pic_mode=default
 fi
@@ -8124,6 +8170,10 @@ LIBTOOL='$(SHELL) $(top_builddir)/libtool'
 
 
 
+
+
+
+
 test -z "$LN_S" && LN_S="ln -s"
 
 
@@ -8579,7 +8629,9 @@ lt_prog_compiler_static=
     case $cc_basename in
     nvcc*) # Cuda Compiler Driver 2.2
       lt_prog_compiler_wl='-Xlinker '
-      lt_prog_compiler_pic='-Xcompiler -fPIC'
+      if test -n "$lt_prog_compiler_pic"; then
+        lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
+      fi
       ;;
     esac
   else
@@ -8670,18 +8722,33 @@ lt_prog_compiler_static=
 	;;
       *)
 	case `$CC -V 2>&1 | sed 5q` in
-	*Sun\ F* | *Sun*Fortran*)
+	*Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
 	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
 	  lt_prog_compiler_pic='-KPIC'
 	  lt_prog_compiler_static='-Bstatic'
 	  lt_prog_compiler_wl=''
 	  ;;
+	*Sun\ F* | *Sun*Fortran*)
+	  lt_prog_compiler_pic='-KPIC'
+	  lt_prog_compiler_static='-Bstatic'
+	  lt_prog_compiler_wl='-Qoption ld '
+	  ;;
 	*Sun\ C*)
 	  # Sun C 5.9
 	  lt_prog_compiler_pic='-KPIC'
 	  lt_prog_compiler_static='-Bstatic'
 	  lt_prog_compiler_wl='-Wl,'
 	  ;;
+        *Intel*\ [CF]*Compiler*)
+	  lt_prog_compiler_wl='-Wl,'
+	  lt_prog_compiler_pic='-fPIC'
+	  lt_prog_compiler_static='-static'
+	  ;;
+	*Portland\ Group*)
+	  lt_prog_compiler_wl='-Wl,'
+	  lt_prog_compiler_pic='-fpic'
+	  lt_prog_compiler_static='-Bstatic'
+	  ;;
 	esac
 	;;
       esac
@@ -9043,7 +9110,6 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
   hardcode_direct=no
   hardcode_direct_absolute=no
   hardcode_libdir_flag_spec=
-  hardcode_libdir_flag_spec_ld=
   hardcode_libdir_separator=
   hardcode_minus_L=no
   hardcode_shlibpath_var=unsupported
@@ -9293,8 +9359,7 @@ _LT_EOF
 	xlf* | bgf* | bgxlf* | mpixlf*)
 	  # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
 	  whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive'
-	  hardcode_libdir_flag_spec=
-	  hardcode_libdir_flag_spec_ld='-rpath $libdir'
+	  hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
 	  archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
 	  if test "x$supports_anon_versioning" = xyes; then
 	    archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
@@ -9673,6 +9738,7 @@ fi
 	# The linker will not automatically build a static lib if we build a DLL.
 	# _LT_TAGVAR(old_archive_from_new_cmds, )='true'
 	enable_shared_with_static_runtimes=yes
+	exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
 	export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
 	# Don't use ranlib
 	old_postinstall_cmds='chmod 644 $oldlib'
@@ -9718,6 +9784,7 @@ fi
   hardcode_shlibpath_var=unsupported
   if test "$lt_cv_ld_force_load" = "yes"; then
     whole_archive_flag_spec='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
   else
     whole_archive_flag_spec=''
   fi
@@ -9746,10 +9813,6 @@ fi
       hardcode_shlibpath_var=no
       ;;
 
-    freebsd1*)
-      ld_shlibs=no
-      ;;
-
     # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
     # support.  Future versions do this automatically, but an explicit c++rt0.o
     # does not break anything, and helps significantly (at the cost of a little
@@ -9762,7 +9825,7 @@ fi
       ;;
 
     # Unfortunately, older versions of FreeBSD 2 do not have this feature.
-    freebsd2*)
+    freebsd2.*)
       archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
       hardcode_direct=yes
       hardcode_minus_L=yes
@@ -9801,7 +9864,6 @@ fi
       fi
       if test "$with_gnu_ld" = no; then
 	hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
-	hardcode_libdir_flag_spec_ld='+b $libdir'
 	hardcode_libdir_separator=:
 	hardcode_direct=yes
 	hardcode_direct_absolute=yes
@@ -10425,11 +10487,6 @@ esac
 
 
 
-
-
-
-
-
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
 $as_echo_n "checking dynamic linker characteristics... " >&6; }
 
@@ -10519,7 +10576,7 @@ need_version=unknown
 
 case $host_os in
 aix3*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
   shlibpath_var=LIBPATH
 
@@ -10528,7 +10585,7 @@ aix3*)
   ;;
 
 aix[4-9]*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   hardcode_into_libs=yes
@@ -10593,7 +10650,7 @@ beos*)
   ;;
 
 bsdi[45]*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_version=no
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
   soname_spec='${libname}${release}${shared_ext}$major'
@@ -10732,7 +10789,7 @@ darwin* | rhapsody*)
   ;;
 
 dgux*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -10740,10 +10797,6 @@ dgux*)
   shlibpath_var=LD_LIBRARY_PATH
   ;;
 
-freebsd1*)
-  dynamic_linker=no
-  ;;
-
 freebsd* | dragonfly*)
   # DragonFly does not have aout.  When/if they implement a new
   # versioning mechanism, adjust this.
@@ -10751,7 +10804,7 @@ freebsd* | dragonfly*)
     objformat=`/usr/bin/objformat`
   else
     case $host_os in
-    freebsd[123]*) objformat=aout ;;
+    freebsd[23].*) objformat=aout ;;
     *) objformat=elf ;;
     esac
   fi
@@ -10769,7 +10822,7 @@ freebsd* | dragonfly*)
   esac
   shlibpath_var=LD_LIBRARY_PATH
   case $host_os in
-  freebsd2*)
+  freebsd2.*)
     shlibpath_overrides_runpath=yes
     ;;
   freebsd3.[01]* | freebsdelf3.[01]*)
@@ -10789,17 +10842,18 @@ freebsd* | dragonfly*)
   ;;
 
 gnu*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
   soname_spec='${libname}${release}${shared_ext}$major'
   shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
   hardcode_into_libs=yes
   ;;
 
 haiku*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   dynamic_linker="$host_os runtime_loader"
@@ -10860,7 +10914,7 @@ hpux9* | hpux10* | hpux11*)
   ;;
 
 interix[3-9]*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -10876,7 +10930,7 @@ irix5* | irix6* | nonstopux*)
     nonstopux*) version_type=nonstopux ;;
     *)
 	if test "$lt_cv_prog_gnu_ld" = yes; then
-		version_type=linux
+		version_type=linux # correct to gnu/linux during the next big refactor
 	else
 		version_type=irix
 	fi ;;
@@ -10913,9 +10967,9 @@ linux*oldld* | linux*aout* | linux*coff*)
   dynamic_linker=no
   ;;
 
-# This must be Linux ELF.
+# This must be glibc/ELF.
 linux* | k*bsd*-gnu | kopensolaris*-gnu)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11001,7 +11055,7 @@ netbsd*)
   ;;
 
 newsos6)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
   shlibpath_var=LD_LIBRARY_PATH
   shlibpath_overrides_runpath=yes
@@ -11070,7 +11124,7 @@ rdos*)
   ;;
 
 solaris*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11095,7 +11149,7 @@ sunos4*)
   ;;
 
 sysv4 | sysv4.3*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
   soname_spec='${libname}${release}${shared_ext}$major'
   shlibpath_var=LD_LIBRARY_PATH
@@ -11119,7 +11173,7 @@ sysv4 | sysv4.3*)
 
 sysv4*MP*)
   if test -d /usr/nec ;then
-    version_type=linux
+    version_type=linux # correct to gnu/linux during the next big refactor
     library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
     soname_spec='$libname${shared_ext}.$major'
     shlibpath_var=LD_LIBRARY_PATH
@@ -11150,7 +11204,7 @@ sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
 
 tpf*)
   # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   need_lib_prefix=no
   need_version=no
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11160,7 +11214,7 @@ tpf*)
   ;;
 
 uts4*)
-  version_type=linux
+  version_type=linux # correct to gnu/linux during the next big refactor
   library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
   soname_spec='${libname}${release}${shared_ext}$major'
   shlibpath_var=LD_LIBRARY_PATH
@@ -11942,6 +11996,8 @@ CC="$lt_save_CC"
 
 
 
+
+
         ac_config_commands="$ac_config_commands libtool"
 
 
@@ -13132,6 +13188,10 @@ fi
 
 TARGETDIR="unknown"
 case "$host" in
+  aarch64*-*-*)
+	TARGET=AARCH64; TARGETDIR=aarch64
+	;;
+
   alpha*-*-*)
 	TARGET=ALPHA; TARGETDIR=alpha;
 	# Support 128-bit long double, changeable via command-line switch.
@@ -13431,6 +13491,14 @@ else
   POWERPC_FREEBSD_FALSE=
 fi
 
+ if test x$TARGET = xAARCH64; then
+  AARCH64_TRUE=
+  AARCH64_FALSE='#'
+else
+  AARCH64_TRUE='#'
+  AARCH64_FALSE=
+fi
+
  if test x$TARGET = xARM; then
   ARM_TRUE=
   ARM_FALSE='#'
@@ -14786,6 +14854,10 @@ if test -z "${POWERPC_FREEBSD_TRUE}" && test -z "${POWERPC_FREEBSD_FALSE}"; then
   as_fn_error $? "conditional \"POWERPC_FREEBSD\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${AARCH64_TRUE}" && test -z "${AARCH64_FALSE}"; then
+  as_fn_error $? "conditional \"AARCH64\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${ARM_TRUE}" && test -z "${ARM_FALSE}"; then
   as_fn_error $? "conditional \"ARM\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -15463,6 +15535,7 @@ pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
 enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
 SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
 ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
+PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
 host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
 host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
 host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
@@ -15545,7 +15618,6 @@ with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`'
 allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`'
 no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`'
 hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_ld='`$ECHO "$hardcode_libdir_flag_spec_ld" | $SED "$delay_single_quote_subst"`'
 hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`'
 hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
 hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`'
@@ -15601,6 +15673,7 @@ _LTECHO_EOF'
 # Quote evaled strings.
 for var in SHELL \
 ECHO \
+PATH_SEPARATOR \
 SED \
 GREP \
 EGREP \
@@ -15651,7 +15724,6 @@ with_gnu_ld \
 allow_undefined_flag \
 no_undefined_flag \
 hardcode_libdir_flag_spec \
-hardcode_libdir_flag_spec_ld \
 hardcode_libdir_separator \
 exclude_expsyms \
 include_expsyms \
@@ -16633,8 +16705,8 @@ $as_echo X"$file" |
 # NOTE: Changes made to this file will be lost: look at ltmain.sh.
 #
 #   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-#                 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-#                 Inc.
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
 #   Written by Gordon Matzigkeit, 1996
 #
 #   This file is part of GNU Libtool.
@@ -16688,6 +16760,9 @@ SHELL=$lt_SHELL
 # An echo program that protects backslashes.
 ECHO=$lt_ECHO
 
+# The PATH separator for the build system.
+PATH_SEPARATOR=$lt_PATH_SEPARATOR
+
 # The host system.
 host_alias=$host_alias
 host=$host
@@ -16989,10 +17064,6 @@ no_undefined_flag=$lt_no_undefined_flag
 # This must work even if \$libdir does not exist
 hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
 
-# If ld is used when linking, flag to hardcode \$libdir into a binary
-# during linking.  This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld
-
 # Whether we need a single "-rpath" flag with a separated argument.
 hardcode_libdir_separator=$lt_hardcode_libdir_separator
 
diff --git a/configure.ac b/configure.ac
index 9b946a2..8c3f40c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,6 +53,10 @@ AM_CONDITIONAL(TESTSUBDIR, test -d $srcdir/testsuite)
 
 TARGETDIR="unknown"
 case "$host" in
+  aarch64*-*-*)
+	TARGET=AARCH64; TARGETDIR=aarch64
+	;;
+
   alpha*-*-*)
 	TARGET=ALPHA; TARGETDIR=alpha;
 	# Support 128-bit long double, changeable via command-line switch.
@@ -233,6 +237,7 @@ AM_CONDITIONAL(POWERPC, test x$TARGET = xPOWERPC)
 AM_CONDITIONAL(POWERPC_AIX, test x$TARGET = xPOWERPC_AIX)
 AM_CONDITIONAL(POWERPC_DARWIN, test x$TARGET = xPOWERPC_DARWIN)
 AM_CONDITIONAL(POWERPC_FREEBSD, test x$TARGET = xPOWERPC_FREEBSD)
+AM_CONDITIONAL(AARCH64, test x$TARGET = xAARCH64)
 AM_CONDITIONAL(ARM, test x$TARGET = xARM)
 AM_CONDITIONAL(AVR32, test x$TARGET = xAVR32)
 AM_CONDITIONAL(LIBFFI_CRIS, test x$TARGET = xLIBFFI_CRIS)
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
new file mode 100644
index 0000000..1405665
--- /dev/null
+++ b/src/aarch64/ffi.c
@@ -0,0 +1,1076 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#include <stdio.h>
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* Stack alignment requirement in bytes */
+#define AARCH64_STACK_ALIGN 16
+
+#define N_X_ARG_REG 8
+#define N_V_ARG_REG 8
+
+#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
+
+union _d
+{
+  UINT64 d;
+  UINT32 s[2];
+};
+
+struct call_context
+{
+  UINT64 x [AARCH64_N_XREG];
+  struct
+  {
+    union _d d[2];
+  } v [AARCH64_N_VREG];
+};
+
+static void *
+get_x_addr (struct call_context *context, unsigned n)
+{
+  return &context->x[n];
+}
+
+static void *
+get_s_addr (struct call_context *context, unsigned n)
+{
+#if defined __AARCH64EB__
+  return &context->v[n].d[1].s[1];
+#else
+  return &context->v[n].d[0].s[0];
+#endif
+}
+
+static void *
+get_d_addr (struct call_context *context, unsigned n)
+{
+#if defined __AARCH64EB__
+  return &context->v[n].d[1];
+#else
+  return &context->v[n].d[0];
+#endif
+}
+
+static void *
+get_v_addr (struct call_context *context, unsigned n)
+{
+  return &context->v[n];
+}
+
+/* Return the memory location at which a basic type would reside
+   were it to have been stored in register n.  */
+
+static void *
+get_basic_type_addr (unsigned short type, struct call_context *context,
+		     unsigned n)
+{
+  switch (type)
+    {
+    case FFI_TYPE_FLOAT:
+      return get_s_addr (context, n);
+    case FFI_TYPE_DOUBLE:
+      return get_d_addr (context, n);
+    case FFI_TYPE_LONGDOUBLE:
+      return get_v_addr (context, n);
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_INT:
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      return get_x_addr (context, n);
+    default:
+      FFI_ASSERT (0);
+      return NULL;
+    }
+}
+
+/* Return the alignment width for each of the basic types.  */
+
+static size_t
+get_basic_type_alignment (unsigned short type)
+{
+  switch (type)
+    {
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+      return sizeof (UINT64);
+    case FFI_TYPE_LONGDOUBLE:
+      return sizeof (long double);
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      return sizeof (UINT64);
+
+    default:
+      FFI_ASSERT (0);
+      return 0;
+    }
+}
+
+/* Return the size in bytes for each of the basic types.  */
+
+static size_t
+get_basic_type_size (unsigned short type)
+{
+  switch (type)
+    {
+    case FFI_TYPE_FLOAT:
+      return sizeof (UINT32);
+    case FFI_TYPE_DOUBLE:
+      return sizeof (UINT64);
+    case FFI_TYPE_LONGDOUBLE:
+      return sizeof (long double);
+    case FFI_TYPE_UINT8:
+      return sizeof (UINT8);
+    case FFI_TYPE_SINT8:
+      return sizeof (SINT8);
+    case FFI_TYPE_UINT16:
+      return sizeof (UINT16);
+    case FFI_TYPE_SINT16:
+      return sizeof (SINT16);
+    case FFI_TYPE_UINT32:
+      return sizeof (UINT32);
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      return sizeof (SINT32);
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_UINT64:
+      return sizeof (UINT64);
+    case FFI_TYPE_SINT64:
+      return sizeof (SINT64);
+
+    default:
+      FFI_ASSERT (0);
+      return 0;
+    }
+}
+
+extern void
+ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
+			    extended_cif *),
+               struct call_context *context,
+               extended_cif *,
+               unsigned,
+               void (*fn)(void));
+
+extern void
+ffi_closure_SYSV (ffi_closure *);
+
+/* Test for an FFI floating point representation.  */
+
+static unsigned
+is_floating_type (unsigned short type)
+{
+  return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
+	  || type == FFI_TYPE_LONGDOUBLE);
+}
+
+/* Test for a homogeneous structure.  */
+
+static unsigned short
+get_homogeneous_type (ffi_type *ty)
+{
+  if (ty->type == FFI_TYPE_STRUCT && ty->elements)
+    {
+      unsigned i;
+      unsigned short candidate_type
+	= get_homogeneous_type (ty->elements[0]);
+      for (i =1; ty->elements[i]; i++)
+	{
+	  unsigned short iteration_type = 0;
+	  /* If we have a nested struct, we must find its homogeneous type.
+	     If that fits with our candidate type, we are still
+	     homogeneous.  */
+	  if (ty->elements[i]->type == FFI_TYPE_STRUCT
+	      && ty->elements[i]->elements)
+	    {
+	      iteration_type = get_homogeneous_type (ty->elements[i]);
+	    }
+	  else
+	    {
+	      iteration_type = ty->elements[i]->type;
+	    }
+
+	  /* If we are not homogeneous, return FFI_TYPE_STRUCT.  */
+	  if (candidate_type != iteration_type)
+	    return FFI_TYPE_STRUCT;
+	}
+      return candidate_type;
+    }
+
+  /* Base case, we have no more levels of nesting, so we
+     are a basic type, and so, trivially homogeneous in that type.  */
+  return ty->type;
+}
+
+/* Determine the number of elements within a STRUCT.
+
+   Note, we must handle nested structs.
+
+   If ty is not a STRUCT this function will return 0.  */
+
+static unsigned
+element_count (ffi_type *ty)
+{
+  if (ty->type == FFI_TYPE_STRUCT && ty->elements)
+    {
+      unsigned n;
+      unsigned elems = 0;
+      for (n = 0; ty->elements[n]; n++)
+	{
+	  if (ty->elements[n]->type == FFI_TYPE_STRUCT
+	      && ty->elements[n]->elements)
+	    elems += element_count (ty->elements[n]);
+	  else
+	    elems++;
+	}
+      return elems;
+    }
+  return 0;
+}
+
+/* Test for a homogeneous floating point aggregate.
+
+   A homogeneous floating point aggregate is a homogeneous aggregate of
+   a half- single- or double- precision floating point type with one
+   to four elements.  Note that this includes nested structs of the
+   basic type.  */
+
+static int
+is_hfa (ffi_type *ty)
+{
+  if (ty->type == FFI_TYPE_STRUCT
+      && ty->elements[0]
+      && is_floating_type (get_homogeneous_type (ty)))
+    {
+      unsigned n = element_count (ty);
+      return n >= 1 && n <= 4;
+    }
+  return 0;
+}
+
+/* Test if an ffi_type is a candidate for passing in a register.
+
+   This test does not check that sufficient registers of the
+   appropriate class are actually available, merely that IFF
+   sufficient registers are available then the argument will be passed
+   in register(s).
+
+   Note that an ffi_type that is deemed to be a register candidate
+   will always be returned in registers.
+
+   Returns 1 if a register candidate else 0.  */
+
+static int
+is_register_candidate (ffi_type *ty)
+{
+  switch (ty->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+    case FFI_TYPE_LONGDOUBLE:
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT64:
+      return 1;
+
+    case FFI_TYPE_STRUCT:
+      if (is_hfa (ty))
+        {
+          return 1;
+        }
+      else if (ty->size > 16)
+        {
+          /* Too large. Will be replaced with a pointer to memory. The
+             pointer MAY be passed in a register, but the value will
+             not. This test specifically fails since the argument will
+             never be passed by value in registers. */
+          return 0;
+        }
+      else
+        {
+          /* Might be passed in registers depending on the number of
+             registers required. */
+          return (ty->size + 7) / 8 < N_X_ARG_REG;
+        }
+      break;
+
+    default:
+      FFI_ASSERT (0);
+      break;
+    }
+
+  return 0;
+}
+
+/* Test if an ffi_type argument or result is a candidate for a vector
+   register.  */
+
+static int
+is_v_register_candidate (ffi_type *ty)
+{
+  return is_floating_type (ty->type)
+	   || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
+}
+
+/* Representation of the procedure call argument marshalling
+   state.
+
+   The terse state variable names match the names used in the AARCH64
+   PCS. */
+
+struct arg_state
+{
+  unsigned ngrn;                /* Next general-purpose register number. */
+  unsigned nsrn;                /* Next vector register number. */
+  unsigned nsaa;                /* Next stack offset. */
+};
+
+/* Initialize a procedure call argument marshalling state.  */
+static void
+arg_init (struct arg_state *state, unsigned call_frame_size)
+{
+  state->ngrn = 0;
+  state->nsrn = 0;
+  state->nsaa = 0;
+}
+
+/* Return the number of available consecutive core argument
+   registers.  */
+
+static unsigned
+available_x (struct arg_state *state)
+{
+  return N_X_ARG_REG - state->ngrn;
+}
+
+/* Return the number of available consecutive vector argument
+   registers.  */
+
+static unsigned
+available_v (struct arg_state *state)
+{
+  return N_V_ARG_REG - state->nsrn;
+}
+
+static void *
+allocate_to_x (struct call_context *context, struct arg_state *state)
+{
+  FFI_ASSERT (state->ngrn < N_X_ARG_REG)
+  return get_x_addr (context, (state->ngrn)++);
+}
+
+static void *
+allocate_to_s (struct call_context *context, struct arg_state *state)
+{
+  FFI_ASSERT (state->nsrn < N_V_ARG_REG)
+  return get_s_addr (context, (state->nsrn)++);
+}
+
+static void *
+allocate_to_d (struct call_context *context, struct arg_state *state)
+{
+  FFI_ASSERT (state->nsrn < N_V_ARG_REG)
+  return get_d_addr (context, (state->nsrn)++);
+}
+
+static void *
+allocate_to_v (struct call_context *context, struct arg_state *state)
+{
+  FFI_ASSERT (state->nsrn < N_V_ARG_REG)
+  return get_v_addr (context, (state->nsrn)++);
+}
+
+/* Allocate an aligned slot on the stack and return a pointer to it.  */
+static void *
+allocate_to_stack (struct arg_state *state, void *stack, unsigned alignment,
+		   unsigned size)
+{
+  void *allocation;
+
+  /* Round up the NSAA to the larger of 8 or the natural
+     alignment of the argument's type.  */
+  state->nsaa = ALIGN (state->nsaa, alignment);
+  state->nsaa = ALIGN (state->nsaa, alignment);
+  state->nsaa = ALIGN (state->nsaa, 8);
+
+  allocation = stack + state->nsaa;
+
+  state->nsaa += size;
+  return allocation;
+}
+
+static void
+copy_basic_type (void *dest, void *source, unsigned short type)
+{
+  /* This is neccessary to ensure that basic types are copied
+     sign extended to 64-bits as libffi expects.  */
+  switch (type)
+    {
+    case FFI_TYPE_FLOAT:
+      *(float *) dest = *(float *) source;
+      break;
+    case FFI_TYPE_DOUBLE:
+      *(double *) dest = *(double *) source;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      *(long double *) dest = *(long double *) source;
+      break;
+    case FFI_TYPE_UINT8:
+      *(ffi_arg *) dest = *(UINT8 *) source;
+      break;
+    case FFI_TYPE_SINT8:
+      *(ffi_sarg *) dest = *(SINT8 *) source;
+      break;
+    case FFI_TYPE_UINT16:
+      *(ffi_arg *) dest = *(UINT16 *) source;
+      break;
+    case FFI_TYPE_SINT16:
+      *(ffi_sarg *) dest = *(SINT16 *) source;
+      break;
+    case FFI_TYPE_UINT32:
+      *(ffi_arg *) dest = *(UINT32 *) source;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      *(ffi_sarg *) dest = *(SINT32 *) source;
+      break;
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_UINT64:
+      *(ffi_arg *) dest = *(UINT64 *) source;
+      break;
+    case FFI_TYPE_SINT64:
+      *(ffi_sarg *) dest = *(SINT64 *) source;
+      break;
+
+    default:
+      FFI_ASSERT (0);
+    }
+}
+
+static void
+copy_hfa_to_reg_or_stack (void *memory,
+			  ffi_type *ty,
+			  struct call_context *context,
+			  unsigned char *stack,
+			  struct arg_state *state)
+{
+  unsigned elems = element_count (ty);
+  if (available_v (state) < elems)
+    {
+      /* There are insufficient V registers. Further V register allocations
+	 are prevented, the NSAA is adjusted (by allocate_to_stack ())
+	 and the argument is copied to memory at the adjusted NSAA.  */
+      state->nsrn = N_V_ARG_REG;
+      memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
+	      memory,
+	      ty->size);
+    }
+  else
+    {
+      int i;
+      unsigned short type = get_homogeneous_type (ty);
+      unsigned elems = element_count (ty);
+      for (i = 0; i < elems; i++)
+	{
+	  void *reg = allocate_to_v (context, state);
+	  copy_basic_type (reg, memory, type);
+	  memory += get_basic_type_size (type);
+	}
+    }
+}
+
+/* Either allocate an appropriate register for the argument type, or if
+   none are available, allocate a stack slot and return a pointer
+   to the allocated space.  */
+
+static void *
+allocate_to_register_or_stack (struct call_context *context,
+			       unsigned char *stack,
+			       struct arg_state *state,
+			       unsigned short type)
+{
+  size_t alignment = get_basic_type_alignment (type);
+  size_t size = alignment;
+  switch (type)
+    {
+    case FFI_TYPE_FLOAT:
+      /* This is the only case for which the allocated stack size
+	 should not match the alignment of the type.  */
+      size = sizeof (UINT32);
+      /* Fall through.  */
+    case FFI_TYPE_DOUBLE:
+      if (state->nsrn < N_V_ARG_REG)
+	return allocate_to_d (context, state);
+      state->nsrn = N_V_ARG_REG;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      if (state->nsrn < N_V_ARG_REG)
+	return allocate_to_v (context, state);
+      state->nsrn = N_V_ARG_REG;
+      break;
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_INT:
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      if (state->ngrn < N_X_ARG_REG)
+	return allocate_to_x (context, state);
+      state->ngrn = N_X_ARG_REG;
+      break;
+    default:
+      FFI_ASSERT (0);
+    }
+
+    return allocate_to_stack (state, stack, alignment, size);
+}
+
+/* Copy a value to an appropriate register, or if none are
+   available, to the stack.  */
+
+static void
+copy_to_register_or_stack (struct call_context *context,
+			   unsigned char *stack,
+			   struct arg_state *state,
+			   void *value,
+			   unsigned short type)
+{
+  copy_basic_type (
+	  allocate_to_register_or_stack (context, stack, state, type),
+	  value,
+	  type);
+}
+
+/* Marshall the arguments from FFI representation to procedure call
+   context and stack.  */
+
+static unsigned
+aarch64_prep_args (struct call_context *context, unsigned char *stack,
+		   extended_cif *ecif)
+{
+  int i;
+  struct arg_state state;
+
+  arg_init (&state, ALIGN(ecif->cif->bytes, 16));
+
+  for (i = 0; i < ecif->cif->nargs; i++)
+    {
+      ffi_type *ty = ecif->cif->arg_types[i];
+      switch (ty->type)
+	{
+	case FFI_TYPE_VOID:
+	  FFI_ASSERT (0);
+	  break;
+
+	/* If the argument is a basic type the argument is allocated to an
+	   appropriate register, or if none are available, to the stack.  */
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	  copy_to_register_or_stack (context, stack, &state,
+				     ecif->avalue[i], ty->type);
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  if (is_hfa (ty))
+	    {
+	      copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
+					stack, &state);
+	    }
+	  else if (ty->size > 16)
+	    {
+	      /* If the argument is a composite type that is larger than 16
+		 bytes, then the argument has been copied to memory, and
+		 the argument is replaced by a pointer to the copy.  */
+
+	      copy_to_register_or_stack (context, stack, &state,
+					 &(ecif->avalue[i]), FFI_TYPE_POINTER);
+	    }
+	  else if (available_x (&state) >= (ty->size + 7) / 8)
+	    {
+	      /* If the argument is a composite type and the size in
+		 double-words is not more than the number of available
+		 X registers, then the argument is copied into consecutive
+		 X registers.  */
+	      int j;
+	      for (j = 0; j < (ty->size + 7) / 8; j++)
+		{
+		  memcpy (allocate_to_x (context, &state),
+			  &(((UINT64 *) ecif->avalue[i])[j]),
+			  sizeof (UINT64));
+		}
+	    }
+	  else
+	    {
+	      /* Otherwise, there are insufficient X registers. Further X
+		 register allocations are prevented, the NSAA is adjusted
+		 (by allocate_to_stack ()) and the argument is copied to
+		 memory at the adjusted NSAA.  */
+	      state.ngrn = N_X_ARG_REG;
+
+	      memcpy (allocate_to_stack (&state, stack, ty->alignment,
+					 ty->size), ecif->avalue + i, ty->size);
+	    }
+	  break;
+
+	default:
+	  FFI_ASSERT (0);
+	  break;
+	}
+    }
+
+  return ecif->cif->aarch64_flags;
+}
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  /* Round the stack up to a multiple of the stack alignment requirement. */
+  cif->bytes =
+    (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
+
+  /* Initialize our flags. We are interested if this CIF will touch a
+     vector register, if so we will enable context save and load to
+     those registers, otherwise not. This is intended to be friendly
+     to lazy float context switching in the kernel.  */
+  cif->aarch64_flags = 0;
+
+  if (is_v_register_candidate (cif->rtype))
+    {
+      cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+    }
+  else
+    {
+      int i;
+      for (i = 0; i < cif->nargs; i++)
+        if (is_v_register_candidate (cif->arg_types[i]))
+          {
+            cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+            break;
+          }
+    }
+
+  return FFI_OK;
+}
+
+/* Call a function with the provided arguments and capture the return
+   value.  */
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  ecif.rvalue = rvalue;
+
+  switch (cif->abi)
+    {
+    case FFI_SYSV:
+      {
+        struct call_context context;
+	unsigned stack_bytes;
+
+	/* Figure out the total amount of stack space we need, the
+	   above call frame space needs to be 16 bytes aligned to
+	   ensure correct alignment of the first object inserted in
+	   that space hence the ALIGN applied to cif->bytes.*/
+	stack_bytes = ALIGN(cif->bytes, 16);
+
+	memset (&context, 0, sizeof (context));
+        if (is_register_candidate (cif->rtype))
+          {
+            ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+            switch (cif->rtype->type)
+              {
+              case FFI_TYPE_VOID:
+              case FFI_TYPE_FLOAT:
+              case FFI_TYPE_DOUBLE:
+              case FFI_TYPE_LONGDOUBLE:
+              case FFI_TYPE_UINT8:
+              case FFI_TYPE_SINT8:
+              case FFI_TYPE_UINT16:
+              case FFI_TYPE_SINT16:
+              case FFI_TYPE_UINT32:
+              case FFI_TYPE_SINT32:
+              case FFI_TYPE_POINTER:
+              case FFI_TYPE_UINT64:
+              case FFI_TYPE_INT:
+              case FFI_TYPE_SINT64:
+		{
+		  void *addr = get_basic_type_addr (cif->rtype->type,
+						    &context, 0);
+		  copy_basic_type (rvalue, addr, cif->rtype->type);
+		  break;
+		}
+
+              case FFI_TYPE_STRUCT:
+                if (is_hfa (cif->rtype))
+		  {
+		    int j;
+		    unsigned short type = get_homogeneous_type (cif->rtype);
+		    unsigned elems = element_count (cif->rtype);
+		    for (j = 0; j < elems; j++)
+		      {
+			void *reg = get_basic_type_addr (type, &context, j);
+			copy_basic_type (rvalue, reg, type);
+			rvalue += get_basic_type_size (type);
+		      }
+		  }
+                else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
+                  {
+                    unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64));
+                    memcpy (rvalue, get_x_addr (&context, 0), size);
+                  }
+                else
+                  {
+                    FFI_ASSERT (0);
+                  }
+                break;
+
+              default:
+                FFI_ASSERT (0);
+                break;
+              }
+          }
+        else
+          {
+            memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
+            ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
+			   stack_bytes, fn);
+          }
+        break;
+      }
+
+    default:
+      FFI_ASSERT (0);
+      break;
+    }
+}
+
+static unsigned char trampoline [] =
+{ 0x70, 0x00, 0x00, 0x58,	/* ldr	x16, 1f	*/
+  0x91, 0x00, 0x00, 0x10,	/* adr	x17, 2f	*/
+  0x00, 0x02, 0x1f, 0xd6	/* br	x16	*/
+};
+
+/* Build a trampoline.  */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS)			\
+  ({unsigned char *__tramp = (unsigned char*)(TRAMP);			\
+    UINT64  __fun = (UINT64)(FUN);					\
+    UINT64  __ctx = (UINT64)(CTX);					\
+    UINT64  __flags = (UINT64)(FLAGS);					\
+    memcpy (__tramp, trampoline, sizeof (trampoline));			\
+    memcpy (__tramp + 12, &__fun, sizeof (__fun));			\
+    memcpy (__tramp + 20, &__ctx, sizeof (__ctx));			\
+    memcpy (__tramp + 28, &__flags, sizeof (__flags));			\
+    __clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE);		\
+  })
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+  if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
+		       cif->aarch64_flags);
+
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* Primary handler to setup and invoke a function within a closure.
+
+   A closure when invoked enters via the assembler wrapper
+   ffi_closure_SYSV(). The wrapper allocates a call context on the
+   stack, saves the interesting registers (from the perspective of
+   the calling convention) into the context then passes control to
+   ffi_closure_SYSV_inner() passing the saved context and a pointer to
+   the stack at the point ffi_closure_SYSV() was invoked.
+
+   On the return path the assembler wrapper will reload call context
+   regsiters.
+
+   ffi_closure_SYSV_inner() marshalls the call context into ffi value
+   desriptors, invokes the wrapped function, then marshalls the return
+   value back into the call context.  */
+
+void
+ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+			void *stack)
+{
+  ffi_cif *cif = closure->cif;
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  void *rvalue = NULL;
+  int i;
+  struct arg_state state;
+
+  arg_init (&state, ALIGN(cif->bytes, 16));
+
+  for (i = 0; i < cif->nargs; i++)
+    {
+      ffi_type *ty = cif->arg_types[i];
+
+      switch (ty->type)
+	{
+	case FFI_TYPE_VOID:
+	  FFI_ASSERT (0);
+	  break;
+
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case  FFI_TYPE_FLOAT:
+	case  FFI_TYPE_DOUBLE:
+	case  FFI_TYPE_LONGDOUBLE:
+	  avalue[i] = allocate_to_register_or_stack (context, stack,
+						     &state, ty->type);
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  if (is_hfa (ty))
+	    {
+	      unsigned n = element_count (ty);
+	      if (available_v (&state) < n)
+		{
+		  state.nsrn = N_V_ARG_REG;
+		  avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
+						 ty->size);
+		}
+	      else
+		{
+		  switch (get_homogeneous_type (ty))
+		    {
+		    case FFI_TYPE_FLOAT:
+		      {
+			/* Eeek! We need a pointer to the structure,
+			   however the homogeneous float elements are
+			   being passed in individual S registers,
+			   therefore the structure is not represented as
+			   a contiguous sequence of bytes in our saved
+			   register context. We need to fake up a copy
+			   of the structure layed out in memory
+			   correctly. The fake can be tossed once the
+			   closure function has returned hence alloca()
+			   is sufficient. */
+			int j;
+			UINT32 *p = avalue[i] = alloca (ty->size);
+			for (j = 0; j < element_count (ty); j++)
+			  memcpy (&p[j],
+				  allocate_to_s (context, &state),
+				  sizeof (*p));
+			break;
+		      }
+
+		    case FFI_TYPE_DOUBLE:
+		      {
+			/* Eeek! We need a pointer to the structure,
+			   however the homogeneous float elements are
+			   being passed in individual S registers,
+			   therefore the structure is not represented as
+			   a contiguous sequence of bytes in our saved
+			   register context. We need to fake up a copy
+			   of the structure layed out in memory
+			   correctly. The fake can be tossed once the
+			   closure function has returned hence alloca()
+			   is sufficient. */
+			int j;
+			UINT64 *p = avalue[i] = alloca (ty->size);
+			for (j = 0; j < element_count (ty); j++)
+			  memcpy (&p[j],
+				  allocate_to_d (context, &state),
+				  sizeof (*p));
+			break;
+		      }
+
+		    case FFI_TYPE_LONGDOUBLE:
+			  memcpy (&avalue[i],
+				  allocate_to_v (context, &state),
+				  sizeof (*avalue));
+		      break;
+
+		    default:
+		      FFI_ASSERT (0);
+		      break;
+		    }
+		}
+	    }
+	  else if (ty->size > 16)
+	    {
+	      /* Replace Composite type of size greater than 16 with a
+		 pointer.  */
+	      memcpy (&avalue[i],
+		      allocate_to_register_or_stack (context, stack,
+						     &state, FFI_TYPE_POINTER),
+		      sizeof (avalue[i]));
+	    }
+	  else if (available_x (&state) >= (ty->size + 7) / 8)
+	    {
+	      avalue[i] = get_x_addr (context, state.ngrn);
+	      state.ngrn += (ty->size + 7) / 8;
+	    }
+	  else
+	    {
+	      state.ngrn = N_X_ARG_REG;
+
+	      avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
+					     ty->size);
+	    }
+	  break;
+
+	default:
+	  FFI_ASSERT (0);
+	  break;
+	}
+    }
+
+  /* Figure out where the return value will be passed, either in
+     registers or in a memory block allocated by the caller and passed
+     in x8.  */
+
+  if (is_register_candidate (cif->rtype))
+    {
+      /* Register candidates are *always* returned in registers. */
+
+      /* Allocate a scratchpad for the return value, we will let the
+         callee scrible the result into the scratch pad then move the
+         contents into the appropriate return value location for the
+         call convention.  */
+      rvalue = alloca (cif->rtype->size);
+      (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+      /* Copy the return value into the call context so that it is returned
+         as expected to our caller.  */
+      switch (cif->rtype->type)
+        {
+        case FFI_TYPE_VOID:
+          break;
+
+        case FFI_TYPE_UINT8:
+        case FFI_TYPE_UINT16:
+        case FFI_TYPE_UINT32:
+        case FFI_TYPE_POINTER:
+        case FFI_TYPE_UINT64:
+        case FFI_TYPE_SINT8:
+        case FFI_TYPE_SINT16:
+        case FFI_TYPE_INT:
+        case FFI_TYPE_SINT32:
+        case FFI_TYPE_SINT64:
+        case FFI_TYPE_FLOAT:
+        case FFI_TYPE_DOUBLE:
+        case FFI_TYPE_LONGDOUBLE:
+	  {
+	    void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
+	    copy_basic_type (addr, rvalue, cif->rtype->type);
+            break;
+	  }
+        case FFI_TYPE_STRUCT:
+          if (is_hfa (cif->rtype))
+	    {
+	      int i;
+	      unsigned short type = get_homogeneous_type (cif->rtype);
+	      unsigned elems = element_count (cif->rtype);
+	      for (i = 0; i < elems; i++)
+		{
+		  void *reg = get_basic_type_addr (type, context, i);
+		  copy_basic_type (reg, rvalue, type);
+		  rvalue += get_basic_type_size (type);
+		}
+	    }
+          else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
+            {
+              unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
+              memcpy (get_x_addr (context, 0), rvalue, size);
+            }
+          else
+            {
+              FFI_ASSERT (0);
+            }
+          break;
+        default:
+          FFI_ASSERT (0);
+          break;
+        }
+    }
+  else
+    {
+      memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
+      (closure->fun) (cif, rvalue, avalue, closure->user_data);
+    }
+}
+
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
new file mode 100644
index 0000000..6f1a348
--- /dev/null
+++ b/src/aarch64/ffitarget.h
@@ -0,0 +1,59 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi
+  {
+    FFI_FIRST_ABI = 0,
+    FFI_SYSV,
+    FFI_LAST_ABI,
+    FFI_DEFAULT_ABI = FFI_SYSV
+  } ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TRAMPOLINE_SIZE 36
+#define FFI_NATIVE_RAW_API 0
+
+/* ---- Internal ---- */
+
+
+#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
+
+#define AARCH64_FFI_WITH_V_BIT 0
+
+#define AARCH64_N_XREG 32
+#define AARCH64_N_VREG 32
+#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
+
+#endif
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
new file mode 100644
index 0000000..b8cd421
--- /dev/null
+++ b/src/aarch64/sysv.S
@@ -0,0 +1,307 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+#define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#define cfi_restore(reg)		.cfi_restore reg
+#define cfi_def_cfa_register(reg)	.cfi_def_cfa_register reg
+
+        .text
+        .globl ffi_call_SYSV
+        .type ffi_call_SYSV, #function
+
+/* ffi_call_SYSV()
+
+   Create a stack frame, setup an argument context, call the callee
+   and extract the result.
+
+   The maximum required argument stack size is provided,
+   ffi_call_SYSV() allocates that stack space then calls the
+   prepare_fn to populate register context and stack.  The
+   argument passing registers are loaded from the register
+   context and the callee called, on return the register passing
+   register are saved back to the context.  Our caller will
+   extract the return value from the final state of the saved
+   register context.
+
+   Prototype:
+
+   extern unsigned
+   ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
+			   extended_cif *),
+                  struct call_context *context,
+                  extended_cif *,
+                  unsigned required_stack_size,
+                  void (*fn)(void));
+
+   Therefore on entry we have:
+
+   x0 prepare_fn
+   x1 &context
+   x2 &ecif
+   x3 bytes
+   x4 fn
+
+   This function uses the following stack frame layout:
+
+   ==
+                saved x30(lr)
+   x29(fp)->    saved x29(fp)
+                saved x24
+                saved x23
+                saved x22
+   sp'    ->    saved x21
+                ...
+   sp     ->    (constructed callee stack arguments)
+   ==
+
+   Voila! */
+
+#define ffi_call_SYSV_FS (8 * 4)
+
+        .cfi_startproc
+ffi_call_SYSV:
+        stp     x29, x30, [sp, #-16]!
+	cfi_adjust_cfa_offset (16)
+        cfi_rel_offset (x29, 0)
+        cfi_rel_offset (x30, 8)
+
+        mov     x29, sp
+	cfi_def_cfa_register (x29)
+        sub     sp, sp, #ffi_call_SYSV_FS
+
+        stp     x21, x22, [sp, 0]
+        cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
+        cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
+
+        stp     x23, x24, [sp, 16]
+        cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
+        cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
+
+        mov     x21, x1
+        mov     x22, x2
+        mov     x24, x4
+
+        /* Allocate the stack space for the actual arguments, many
+           arguments will be passed in registers, but we assume
+           worst case and allocate sufficient stack for ALL of
+           the arguments.  */
+        sub     sp, sp, x3
+
+        /* unsigned (*prepare_fn) (struct call_context *context,
+				   unsigned char *stack, extended_cif *ecif);
+	 */
+        mov     x23, x0
+        mov     x0, x1
+        mov     x1, sp
+        /* x2 already in place */
+        blr     x23
+
+        /* Preserve the flags returned.  */
+        mov     x23, x0
+
+        /* Figure out if we should touch the vector registers.  */
+        tbz     x23, #AARCH64_FFI_WITH_V_BIT, 1f
+
+        /* Load the vector argument passing registers.  */
+        ldp     q0, q1, [x21, #8*32 +  0]
+        ldp     q2, q3, [x21, #8*32 + 32]
+        ldp     q4, q5, [x21, #8*32 + 64]
+        ldp     q6, q7, [x21, #8*32 + 96]
+1:
+        /* Load the core argument passing registers.  */
+        ldp     x0, x1, [x21,  #0]
+        ldp     x2, x3, [x21, #16]
+        ldp     x4, x5, [x21, #32]
+        ldp     x6, x7, [x21, #48]
+
+        /* Don't forget x8 which may be holding the address of a return buffer.
+	 */
+        ldr     x8,     [x21, #8*8]
+
+        blr     x24
+
+        /* Save the core argument passing registers.  */
+        stp     x0, x1, [x21,  #0]
+        stp     x2, x3, [x21, #16]
+        stp     x4, x5, [x21, #32]
+        stp     x6, x7, [x21, #48]
+
+        /* Note nothing useful ever comes back in x8!  */
+
+        /* Figure out if we should touch the vector registers.  */
+        tbz     x23, #AARCH64_FFI_WITH_V_BIT, 1f
+
+        /* Save the vector argument passing registers.  */
+        stp     q0, q1, [x21, #8*32 + 0]
+        stp     q2, q3, [x21, #8*32 + 32]
+        stp     q4, q5, [x21, #8*32 + 64]
+        stp     q6, q7, [x21, #8*32 + 96]
+1:
+        /* All done, unwind our stack frame.  */
+        ldp     x21, x22, [x29,  # - ffi_call_SYSV_FS]
+        cfi_restore (x21)
+        cfi_restore (x22)
+
+        ldp     x23, x24, [x29,  # - ffi_call_SYSV_FS + 16]
+        cfi_restore (x23)
+        cfi_restore (x24)
+
+        mov     sp, x29
+	cfi_def_cfa_register (sp)
+
+        ldp     x29, x30, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+        cfi_restore (x29)
+        cfi_restore (x30)
+
+        ret
+
+        .cfi_endproc
+        .size ffi_call_SYSV, .-ffi_call_SYSV
+
+#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
+
+/* ffi_closure_SYSV
+
+   Closure invocation glue. This is the low level code invoked directly by
+   the closure trampoline to setup and call a closure.
+
+   On entry x17 points to a struct trampoline_data, x16 has been clobbered
+   all other registers are preserved.
+
+   We allocate a call context and save the argument passing registers,
+   then invoked the generic C ffi_closure_SYSV_inner() function to do all
+   the real work, on return we load the result passing registers back from
+   the call context.
+
+   On entry
+
+   extern void
+   ffi_closure_SYSV (struct trampoline_data *);
+
+   struct trampoline_data
+   {
+        UINT64 *ffi_closure;
+        UINT64 flags;
+   };
+
+   This function uses the following stack frame layout:
+
+   ==
+                saved x30(lr)
+   x29(fp)->    saved x29(fp)
+                saved x22
+                saved x21
+                ...
+   sp     ->    call_context
+   ==
+
+   Voila!  */
+
+        .text
+        .globl ffi_closure_SYSV
+        .cfi_startproc
+ffi_closure_SYSV:
+        stp     x29, x30, [sp, #-16]!
+	cfi_adjust_cfa_offset (16)
+        cfi_rel_offset (x29, 0)
+        cfi_rel_offset (x30, 8)
+
+        mov     x29, sp
+
+        sub     sp, sp, #ffi_closure_SYSV_FS
+	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+
+        stp     x21, x22, [x29, #-16]
+        cfi_rel_offset (x21, 0)
+        cfi_rel_offset (x22, 8)
+
+        /* Load x21 with &call_context.  */
+        mov     x21, sp
+        /* Preserve our struct trampoline_data *  */
+        mov     x22, x17
+
+        /* Save the rest of the argument passing registers.  */
+        stp     x0, x1, [x21, #0]
+        stp     x2, x3, [x21, #16]
+        stp     x4, x5, [x21, #32]
+        stp     x6, x7, [x21, #48]
+        /* Don't forget we may have been given a result scratch pad address.
+	 */
+        str     x8,     [x21, #64]
+
+        /* Figure out if we should touch the vector registers.  */
+        ldr     x0, [x22, #8]
+        tbz     x0, #AARCH64_FFI_WITH_V_BIT, 1f
+
+        /* Save the argument passing vector registers.  */
+        stp     q0, q1, [x21, #8*32 + 0]
+        stp     q2, q3, [x21, #8*32 + 32]
+        stp     q4, q5, [x21, #8*32 + 64]
+        stp     q6, q7, [x21, #8*32 + 96]
+1:
+        /* Load &ffi_closure..  */
+        ldr     x0, [x22, #0]
+        mov     x1, x21
+        /* Compute the location of the stack at the point that the
+           trampoline was called.  */
+        add     x2, x29, #16
+
+        bl      ffi_closure_SYSV_inner
+
+        /* Figure out if we should touch the vector registers.  */
+        ldr     x0, [x22, #8]
+        tbz     x0, #AARCH64_FFI_WITH_V_BIT, 1f
+
+        /* Load the result passing vector registers.  */
+        ldp     q0, q1, [x21, #8*32 + 0]
+        ldp     q2, q3, [x21, #8*32 + 32]
+        ldp     q4, q5, [x21, #8*32 + 64]
+        ldp     q6, q7, [x21, #8*32 + 96]
+1:
+        /* Load the result passing core registers.  */
+        ldp     x0, x1, [x21,  #0]
+        ldp     x2, x3, [x21, #16]
+        ldp     x4, x5, [x21, #32]
+        ldp     x6, x7, [x21, #48]
+        /* Note nothing usefull is returned in x8.  */
+
+        /* We are done, unwind our frame.  */
+        ldp     x21, x22, [x29,  #-16]
+        cfi_restore (x21)
+        cfi_restore (x22)
+
+        mov     sp, x29
+	cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
+
+        ldp     x29, x30, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+        cfi_restore (x29)
+        cfi_restore (x30)
+
+        ret
+        .cfi_endproc
+        .size ffi_closure_SYSV, .-ffi_closure_SYSV
diff --git a/testsuite/lib/libffi.exp b/testsuite/lib/libffi.exp
index 4a65ed1..8ee3f15 100644
--- a/testsuite/lib/libffi.exp
+++ b/testsuite/lib/libffi.exp
@@ -203,6 +203,10 @@ proc libffi_target_compile { source dest type options } {
 
     lappend options "libs= -lffi"
 
+    if { [string match "aarch64*-*-linux*" $target_triplet] } {
+	lappend options "libs= -lpthread"
+    }
+
     verbose "options: $options"
     return [target_compile $source $dest $type $options]
 }
-- 
1.7.9.5