diff options
Diffstat (limited to 'target/linux/ubicom32/files/arch/ubicom32/lib/mem_ubicom32.c')
-rw-r--r-- | target/linux/ubicom32/files/arch/ubicom32/lib/mem_ubicom32.c | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/target/linux/ubicom32/files/arch/ubicom32/lib/mem_ubicom32.c b/target/linux/ubicom32/files/arch/ubicom32/lib/mem_ubicom32.c new file mode 100644 index 000000000..d9c302ec6 --- /dev/null +++ b/target/linux/ubicom32/files/arch/ubicom32/lib/mem_ubicom32.c @@ -0,0 +1,343 @@ +/* + * arch/ubicom32/lib/mem_ubicom32.c + * String functions. + * + * (C) Copyright 2009, Ubicom, Inc. + * + * This file is part of the Ubicom32 Linux Kernel Port. + * + * The Ubicom32 Linux Kernel Port is free software: you can redistribute + * it and/or modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 2 of the + * License, or (at your option) any later version. + * + * The Ubicom32 Linux Kernel Port is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the Ubicom32 Linux Kernel Port. If not, + * see <http://www.gnu.org/licenses/>. + * + * Ubicom32 implementation derived from (with many thanks): + * arch/m68knommu + * arch/blackfin + * arch/parisc + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/compiler.h> + +#define LIKELY likely +#define UNLIKELY unlikely + +typedef u32_t addr_t; + +/* + * memcpy() + */ +void *memcpy(void *dest, const void *src, size_t n) +{ + void *dest_ret = dest; + + if (LIKELY((((addr_t)dest ^ (addr_t)src) & 3) == 0) && LIKELY(n > 6)) { + size_t m; + n -= (4 - (addr_t)dest) & 0x03; + m = n >> 2; + asm volatile ( + " sub.4 d15, #0, %2 \n\t" // set up for jump table + " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.4 a3, (a3,d15) \n\t" + + " bfextu d15, %0, #2 \n\t" // d15 = (dest & 3) + " jmpne.w.f 100f \n\t" + " calli a3, 0(a3) \n\t" // 4-byte alignment + + "100: cmpi d15, #2 \n\t" + " jmpne.s.f 101f \n\t" + " move.2 (%0)2++, (%1)2++ \n\t" + " calli a3, 0(a3) \n\t" // 2-byte alignment + + "101: move.1 (%0)1++, (%1)1++ \n\t" + " jmpgt.s.f 102f \n\t" // 3-byte alignment + " move.2 (%0)2++, (%1)2++ \n\t" // 1-byte alignment + "102: calli a3, 0(a3) \n\t" + + "200: cmpi %3, #2 \n\t" + " jmplt.s.f 201f \n\t" + " move.2 (%0)2++, (%1)2++ \n\t" + " jmpeq.s.t 2f \n\t" + "201: move.1 (%0)1++, (%1)1++ \n\t" + " jmpt.w.t 2f \n\t" + + "1: .rept 25 \n\t" + " movea (%0)4++, (%1)4++ \n\t" + " .endr \n\t" + " .rept 7 \n\t" + " move.4 (%0)4++, (%1)4++ \n\t" + " .endr \n\t" + " add.4 %2, #-32, %2 \n\t" + " jmpgt.w.f 1b \n\t" + + " and.4 %3, #3, %3 \n\t" // check n + " jmpne.w.f 200b \n\t" + "2: \n\t" + : "+a" (dest), "+a" (src), "+d" (m), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + + return dest_ret; + } + + if (LIKELY((((addr_t)dest ^ (addr_t)src) & 1) == 0) && LIKELY(n > 2)) { + size_t m; + n -= (addr_t)dest & 0x01; + m = n >> 1; + asm volatile ( + " sub.4 d15, #0, %2 \n\t" // set up for jump table + " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.4 a3, (a3,d15) \n\t" + + " btst %0, #0 \n\t" // check bit 0 + " jmpne.w.f 100f \n\t" + " calli a3, 0(a3) \n\t" // 4-byte alignment + + "100: move.1 (%0)1++, (%1)1++ \n\t" + " calli a3, 0(a3) \n\t" + + "200: move.1 (%0)1++, (%1)1++ \n\t" + " jmpt.w.t 2f \n\t" + + "1: .rept 32 \n\t" + " move.2 (%0)2++, (%1)2++ \n\t" + " .endr \n\t" + " add.4 %2, #-32, %2 \n\t" + " jmpgt.w.f 1b \n\t" + + " and.4 %3, #1, %3 \n\t" // check n + " jmpne.w.f 200b \n\t" + "2: \n\t" + + : "+a" (dest), "+a" (src), "+d" (m), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + + return dest_ret; + } + + asm volatile ( + " sub.4 d15, #0, %2 \n\t" + " jmpeq.w.f 2f \n\t" + " and.4 d15, #(16-1), d15 \n\t" // d15 = (-n) & (16 - 1) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.4 a3, (a3,d15) \n\t" + " calli a3, 0(a3) \n\t" + + "1: .rept 16 \n\t" + " move.1 (%0)1++, (%1)1++ \n\t" + " .endr \n\t" + " add.4 %2, #-16, %2 \n\t" + " jmpgt.w.f 1b \n\t" + "2: \n\t" + + : "+a" (dest), "+a" (src), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + + return dest_ret; +} + +/* + * memset() + */ +void *memset(void *s, int c, size_t n) +{ + void *s_ret = s; + + if (LIKELY(n > 6)) { + size_t m; + n -= (4 - (addr_t)s) & 0x03; + m = n >> 2; + asm volatile ( + " sub.4 d15, #0, %2 \n\t" // set up for jump table + " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1) + " shmrg.1 %1, %1, %1 \n\t" + " shmrg.2 %1, %1, %1 \n\t" // %1 = (c<<24)|(c<<16)|(c<<8)|c + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.4 a3, (a3,d15) \n\t" + + " bfextu d15, %0, #2 \n\t" // d15 = (s & 3) + " jmpne.w.f 100f \n\t" + " calli a3, 0(a3) \n\t" // 4-byte alignment + + "100: cmpi d15, #2 \n\t" + " jmpne.s.f 101f \n\t" + " move.2 (%0)2++, %1 \n\t" + " calli a3, 0(a3) \n\t" // 2-byte alignment + + "101: move.1 (%0)1++, %1 \n\t" + " jmpgt.s.f 102f \n\t" // 3-byte alignment + " move.2 (%0)2++, %1 \n\t" // 1-byte alignment + "102: calli a3, 0(a3) \n\t" + + "200: cmpi %3, #2 \n\t" + " jmplt.s.f 201f \n\t" + " move.2 (%0)2++, %1 \n\t" + " jmpeq.s.t 2f \n\t" + "201: move.1 (%0)1++, %1 \n\t" + " jmpt.w.t 2f \n\t" + + "1: .rept 25 \n\t" + " movea (%0)4++, %1 \n\t" + " .endr \n\t" + " .rept 7 \n\t" + " move.4 (%0)4++, %1 \n\t" + " .endr \n\t" + " add.4 %2, #-32, %2 \n\t" + " jmpgt.w.f 1b \n\t" + + " and.4 %3, #3, %3 \n\t" // test bit 1 of n + " jmpne.w.f 200b \n\t" + "2: \n\t" + + : "+a" (s), "+d" (c), "+d" (m), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + + return s_ret; + } + + asm volatile ( + " sub.4 d15, #0, %2 \n\t" + " jmpeq.w.f 2f \n\t" + " and.4 d15, #(8-1), d15 \n\t" // d15 = (-%2) & (16 - 1) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.4 a3, (a3,d15) \n\t" + " calli a3, 0(a3) \n\t" + + "1: .rept 8 \n\t" + " move.1 (%0)1++, %1 \n\t" + " .endr \n\t" + "2: \n\t" + + : "+a" (s), "+d" (c), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + + return s_ret; +} + +void *memmove(void *dest, const void *src, size_t n) +{ + char *tmp; + const char *s; + + if (n == 0) + return dest; + + tmp = dest; + s = src; + + /* + * Will perform 16-bit move if possible + */ + if (likely((((u32)dest | (u32)src | n) & 1) == 0)) { + if (dest <= src) { + asm volatile ( + " sub.4 d15, #0, %2 \n\t" // set up for jump table + " and.4 d15, #(32-2), d15 \n\t" // d15 = (- count) & (32 - 2) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.2 a3, (a3,d15) \n\t" + " calli a3, 0(a3) \n\t" + + "1: .rept 16 \n\t" + " move.2 (%0)2++, (%1)2++ \n\t" + " .endr \n\t" + " add.4 %2, #-32, %2 \n\t" + " jmpgt.w.f 1b \n\t" + + : "+a" (tmp), "+a" (s), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + } else { + tmp += n; + s += n; + asm volatile ( + " sub.4 d15, #0, %2 \n\t" // set up for jump table + " and.4 d15, #(32-2), d15 \n\t" // d15 = (- count) & (32 - 2) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.2 a3, (a3,d15) \n\t" + " calli a3, 0(a3) \n\t" + + "1: .rept 16 \n\t" + " move.2 -2(%0)++, -2(%1)++ \n\t" + " .endr \n\t" + " add.4 %2, #-32, %2 \n\t" + " jmpgt.w.f 1b \n\t" + + : "+a" (tmp), "+a" (s), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + } + return dest; + } + + if (dest <= src) { + asm volatile ( + " sub.4 d15, #0, %2 \n\t" // set up for jump table + " and.4 d15, #(16-1), d15 \n\t" // d15 = (- count) & (16 - 1) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.4 a3, (a3,d15) \n\t" + " calli a3, 0(a3) \n\t" + + "1: .rept 16 \n\t" + " move.1 (%0)1++, (%1)1++ \n\t" + " .endr \n\t" + " add.4 %2, #-16, %2 \n\t" + " jmpgt.w.f 1b \n\t" + : "+a" (tmp), "+a" (s), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + } else { + tmp += n; + s += n; + asm volatile ( + " sub.4 d15, #0, %2 \n\t" // set up for jump table + " and.4 d15, #(16-1), d15 \n\t" // d15 = (- count) & (16 - 1) + " moveai a3, #%%hi(1f) \n\t" + " lea.1 a3, %%lo(1f)(a3) \n\t" + " lea.4 a3, (a3,d15) \n\t" + " calli a3, 0(a3) \n\t" + + "1: .rept 16 \n\t" + " move.1 -1(%0)++, -1(%1)++ \n\t" + " .endr \n\t" + " add.4 %2, #-16, %2 \n\t" + " jmpgt.w.f 1b \n\t" + : "+a" (tmp), "+a" (s), "+d" (n) + : + : "d15", "a3", "memory", "cc" + ); + } + return dest; +} |