/*
* arch/ubicom32/lib/checksum.c
* Optimized checksum utilities for IP.
*
* (C) Copyright 2009, Ubicom, Inc.
*
* This file is part of the Ubicom32 Linux Kernel Port.
*
* The Ubicom32 Linux Kernel Port is free software: you can redistribute
* it and/or modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 2 of the
* License, or (at your option) any later version.
*
* The Ubicom32 Linux Kernel Port is distributed in the hope that it
* will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with the Ubicom32 Linux Kernel Port. If not,
* see .
*
* Ubicom32 implementation derived from (with many thanks):
* arch/m68knommu
* arch/blackfin
* arch/parisc
*/
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IP/TCP/UDP checksumming routines
*
* Authors: Jorge Cwik,
* Arnt Gulbrandsen,
* Tom May,
* Andreas Schwab,
* Lots of code moved from tcp.c and ip.c; see those files
* for more names.
*
* 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek:
* Fixed some nasty bugs, causing some horrible crashes.
* A: At some points, the sum (%0) was used as
* length-counter instead of the length counter
* (%1). Thanks to Roman Hodek for pointing this out.
* B: GCC seems to mess up if one uses too many
* data-registers to hold input values and one tries to
* specify d0 and d1 as scratch registers. Letting gcc choose these
* registers itself solves the problem.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most
of the assembly has to go. */
#include
#include
static unsigned long do_csum(const unsigned char * buff, int len)
{
int count;
unsigned long result = 0;
/*
* The following optimized assembly code cannot handle data length less than 7 bytes!
*/
if (likely(len >= 7)) {
len -= (4 - (int)buff) & 3;
count = len >> 2;
asm (
" sub.4 d15, #0, %2 \n\t" // set up for jump table
" and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
" bfextu d14, %0, #2 \n\t" // test 2 LSB of buff
" jmpne.w.f 100f \n\t"
" add.4 %1, #0, %1 \n\t" // clear C
" moveai a3, #%%hi(1f) \n\t" // table jump
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" calli a3, 0(a3) \n\t"
"100: sub.4 %0, %0, d14 \n\t"
" sub.4 d14, #4, d14 \n\t"
" lsl.4 d14, d14, #3 \n\t"
" add.4 %1, #0, %1 \n\t" // clear C
" moveai a3, #%%hi(1f) \n\t" // table jump
" lea.1 a3, %%lo(1f)(a3) \n\t"
" lea.4 a3, (a3,d15) \n\t"
" bfextu %1, (%0)4++, d14 \n\t" // read first partial word
" calli a3, 0(a3) \n\t"
#if 1
"200: lsl.4 %3, %3, #3 \n\t"
" bfrvrs d15, (%0), #0 \n\t" // read last word (partial)
" bfextu d15, d15, %3 \n\t"
" bfrvrs d15, d15, #0 \n\t"
" add.4 %1, d15, %1 \n\t"
" addc %1, #0, %1 \n\t" // sample C again
" jmpt.w.t 2f \n\t"
#else
"200: move.1 d15, 0(%0) \n\t"
" lsl.4 d15, d15, #8 \n\t"
" add.4 %1, d15, %1 \n\t"
" addc %1, #0, %1 \n\t" // sample C again
" add.4 %3, #-1, %3 \n\t"
" jmpeq.w.t 2f \n\t"
" move.1 d15, 1(%0) \n\t"
" add.4 %1, d15, %1 \n\t"
" addc %1, #0, %1 \n\t" // sample C again
" add.4 %3, #-1, %3 \n\t"
" jmpeq.w.t 2f \n\t"
" move.1 d15, 2(%0) \n\t"
" lsl.4 d15, d15, #8 \n\t"
" add.4 %1, d15, %1 \n\t"
" addc %1, #0, %1 \n\t" // sample C again
" jmpt.w.t 2f \n\t"
#endif
#if defined(IP7000) || defined(IP7000_REV2)
"300: swapb.2 %1, %1 \n\t"
#else
"300: shmrg.2 %1, %1, %1 \n\t"
" lsr.4 %1, %1, #8 \n\t"
" bfextu %1, %1, #16 \n\t"
#endif
" jmpt.w.t 3f \n\t"
"1: add.4 %1, (%0)4++, %1 \n\t" // first add without C
" .rept 31 \n\t"
" addc %1, (%0)4++, %1 \n\t"
" .endr \n\t"
" addc %1, #0, %1 \n\t" // sample C again
" add.4 %2, #-32, %2 \n\t"
" jmpgt.w.t 1b \n\t"
" and.4 %3, #3, %3 \n\t" // check n
" jmpne.w.f 200b \n\t"
"2: .rept 2 \n\t"
" lsr.4 d15, %1, #16 \n\t"
" bfextu %1, %1, #16 \n\t"
" add.4 %1, d15, %1 \n\t"
" .endr \n\t"
" btst d14, #3 \n\t" // start from odd address (<< 3)?
" jmpne.w.f 300b \n\t"
"3: \n\t"
: "+a"(buff), "+d"(result), "+d"(count), "+d"(len)
:
: "d15", "d14", "a3", "cc"
);
return result;
}
/*
* handle a few bytes and fold result into 16-bit
*/
while (len-- > 0) {
result += (*buff++ << 8);
if (len) {
result += *buff++;
len--;
}
}
asm (
" .rept 2 \n\t"
" lsr.4 d15, %0, #16 \n\t"
" bfextu %0, %0, #16 \n\t"
" add.4 %0, d15, %0 \n\t"
" .endr \n\t"
: "+d" (result)
:
: "d15", "cc"
);
return result;
}
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
*/
__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
return (__force __sum16)~do_csum(iph,ihl*4);
}
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
*
* returns a 32-bit number suitable for feeding into itself
* or csum_tcpudp_magic
*
* this function must be called with even lengths, except
* for the last fragment, which may be odd
*
* it's best to have buff aligned on a 32-bit boundary
*/
__wsum csum_partial(const void *buff, int len, __wsum sum)
{
unsigned int result = do_csum(buff, len);
/* add in old sum, and carry.. */
result += (__force u32)sum;
if ((__force u32)sum > result)
result += 1;
return (__force __wsum)result;
}
EXPORT_SYMBOL(csum_partial);
/*
* this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c
*/
__sum16 ip_compute_csum(const void *buff, int len)
{
return (__force __sum16)~do_csum(buff,len);
}
/*
* copy from fs while checksumming, otherwise like csum_partial
*/
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum sum, int *csum_err)
{
if (csum_err) *csum_err = 0;
memcpy(dst, (__force const void *)src, len);
return csum_partial(dst, len, sum);
}
/*
* copy from ds while checksumming, otherwise like csum_partial
*/
__wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
{
memcpy(dst, src, len);
return csum_partial(dst, len, sum);
}