aboutsummaryrefslogtreecommitdiffstats
path: root/code/unix/linux_common.c
diff options
context:
space:
mode:
Diffstat (limited to 'code/unix/linux_common.c')
-rwxr-xr-xcode/unix/linux_common.c344
1 files changed, 344 insertions, 0 deletions
diff --git a/code/unix/linux_common.c b/code/unix/linux_common.c
new file mode 100755
index 0000000..512d425
--- /dev/null
+++ b/code/unix/linux_common.c
@@ -0,0 +1,344 @@
+/*
+===========================================================================
+Copyright (C) 1999-2005 Id Software, Inc.
+
+This file is part of Quake III Arena source code.
+
+Quake III Arena source code is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the License,
+or (at your option) any later version.
+
+Quake III Arena source code is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Foobar; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+===========================================================================
+*/
+/**
+ * GAS syntax equivalents of the MSVC asm memory calls in common.c
+ *
+ * The following changes have been made to the asm:
+ * 1. Registers are loaded by the inline asm arguments when possible
+ * 2. Labels have been changed to local label format (0,1,etc.) to allow inlining
+ *
+ * HISTORY:
+ * AH - Created on 08 Dec 2000
+ */
+
+#include <unistd.h> // AH - for size_t
+#include <string.h>
+
+// bk001207 - we need something under Linux, too. Mac?
+#if 1 // defined(C_ONLY) // bk010102 - dedicated?
+void Com_Memcpy (void* dest, const void* src, const size_t count) {
+ memcpy(dest, src, count);
+}
+
+void Com_Memset (void* dest, const int val, const size_t count) {
+ memset(dest, val, count);
+}
+
+#else
+
+typedef enum {
+ PRE_READ, // prefetch assuming that buffer is used for reading only
+ PRE_WRITE, // prefetch assuming that buffer is used for writing only
+ PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing
+} e_prefetch;
+
+void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
+
+void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
+ // MMX version not used on standard Pentium MMX
+ // because the dword version is faster (with
+ // proper destination prefetching)
+ __asm__ __volatile__ (" \
+ //mov eax,constant // eax = val \
+ //mov edx,dest // dest \
+ //mov ecx,count \
+ movd %%eax, %%mm0 \
+ punpckldq %%mm0, %%mm0 \
+\
+ // ensure that destination is qword aligned \
+\
+ testl $7, %%edx // qword padding?\
+ jz 0f \
+ movl %%eax, (%%edx) \
+ decl %%ecx \
+ addl $4, %%edx \
+\
+0: movl %%ecx, %%ebx \
+ andl $0xfffffff0, %%ecx \
+ jz 2f \
+ jmp 1f \
+ .align 16 \
+\
+ // funny ordering here to avoid commands \
+ // that cross 32-byte boundaries (the \
+ // [edx+0] version has a special 3-byte opcode... \
+1: movq %%mm0, 8(%%edx) \
+ movq %%mm0, 16(%%edx) \
+ movq %%mm0, 24(%%edx) \
+ movq %%mm0, 32(%%edx) \
+ movq %%mm0, 40(%%edx) \
+ movq %%mm0, 48(%%edx) \
+ movq %%mm0, 56(%%edx) \
+ movq %%mm0, (%%edx)\
+ addl $64, %%edx \
+ subl $16, %%ecx \
+ jnz 1b \
+2: \
+ movl %%ebx, %%ecx // ebx = cnt \
+ andl $0xfffffff0, %%ecx // ecx = cnt&~15 \
+ subl %%ecx, %%ebx \
+ jz 6f \
+ cmpl $8, %%ebx \
+ jl 3f \
+\
+ movq %%mm0, (%%edx) \
+ movq %%mm0, 8(%%edx) \
+ movq %%mm0, 16(%%edx) \
+ movq %%mm0, 24(%%edx) \
+ addl $32, %%edx \
+ subl $8, %%ebx \
+ jz 6f \
+\
+3: cmpl $4, %%ebx \
+ jl 4f \
+ \
+ movq %%mm0, (%%edx) \
+ movq %%mm0, 8(%%edx) \
+ addl $16, %%edx \
+ subl $4, %%ebx \
+\
+4: cmpl $2, %%ebx \
+ jl 5f \
+ movq %%mm0, (%%edx) \
+ addl $8, %%edx \
+ subl $2, %%ebx \
+\
+5: cmpl $1, %%ebx \
+ jl 6f \
+ movl %%eax, (%%edx) \
+6: \
+ emms \
+ "
+ : : "a" (constant), "c" (count), "d" (dest)
+ : "%ebx", "%edi", "%esi", "cc", "memory");
+}
+
+// optimized memory copy routine that handles all alignment
+// cases and block sizes efficiently
+void Com_Memcpy (void* dest, const void* src, const size_t count) {
+ Com_Prefetch (src, count, PRE_READ);
+ __asm__ __volatile__ (" \
+ pushl %%edi \
+ pushl %%esi \
+ //mov ecx,count \
+ cmpl $0, %%ecx // count = 0 check (just to be on the safe side) \
+ je 6f \
+ //mov edx,dest \
+ movl %0, %%ebx \
+ cmpl $32, %%ecx // padding only? \
+ jl 1f \
+\
+ movl %%ecx, %%edi \
+ andl $0xfffffe00, %%edi // edi = count&~31 \
+ subl $32, %%edi \
+\
+ .align 16 \
+0: \
+ movl (%%ebx, %%edi, 1), %%eax \
+ movl 4(%%ebx, %%edi, 1), %%esi \
+ movl %%eax, (%%edx, %%edi, 1) \
+ movl %%esi, 4(%%edx, %%edi, 1) \
+ movl 8(%%ebx, %%edi, 1), %%eax \
+ movl 12(%%ebx, %%edi, 1), %%esi \
+ movl %%eax, 8(%%edx, %%edi, 1) \
+ movl %%esi, 12(%%edx, %%edi, 1) \
+ movl 16(%%ebx, %%edi, 1), %%eax \
+ movl 20(%%ebx, %%edi, 1), %%esi \
+ movl %%eax, 16(%%edx, %%edi, 1) \
+ movl %%esi, 20(%%edx, %%edi, 1) \
+ movl 24(%%ebx, %%edi, 1), %%eax \
+ movl 28(%%ebx, %%edi, 1), %%esi \
+ movl %%eax, 24(%%edx, %%edi, 1) \
+ movl %%esi, 28(%%edx, %%edi, 1) \
+ subl $32, %%edi \
+ jge 0b \
+ \
+ movl %%ecx, %%edi \
+ andl $0xfffffe00, %%edi \
+ addl %%edi, %%ebx // increase src pointer \
+ addl %%edi, %%edx // increase dst pointer \
+ andl $31, %%ecx // new count \
+ jz 6f // if count = 0, get outta here \
+\
+1: \
+ cmpl $16, %%ecx \
+ jl 2f \
+ movl (%%ebx), %%eax \
+ movl %%eax, (%%edx) \
+ movl 4(%%ebx), %%eax \
+ movl %%eax, 4(%%edx) \
+ movl 8(%%ebx), %%eax \
+ movl %%eax, 8(%%edx) \
+ movl 12(%%ebx), %%eax \
+ movl %%eax, 12(%%edx) \
+ subl $16, %%ecx \
+ addl $16, %%ebx \
+ addl $16, %%edx \
+2: \
+ cmpl $8, %%ecx \
+ jl 3f \
+ movl (%%ebx), %%eax \
+ movl %%eax, (%%edx) \
+ movl 4(%%ebx), %%eax \
+ subl $8, %%ecx \
+ movl %%eax, 4(%%edx) \
+ addl $8, %%ebx \
+ addl $8, %%edx \
+3: \
+ cmpl $4, %%ecx \
+ jl 4f \
+ movl (%%ebx), %%eax // here 4-7 bytes \
+ addl $4, %%ebx \
+ subl $4, %%ecx \
+ movl %%eax, (%%edx) \
+ addl $4, %%edx \
+4: // 0-3 remaining bytes \
+ cmpl $2, %%ecx \
+ jl 5f \
+ movw (%%ebx), %%ax // two bytes \
+ cmpl $3, %%ecx // less than 3? \
+ movw %%ax, (%%edx) \
+ jl 6f \
+ movb 2(%%ebx), %%al // last byte \
+ movb %%al, 2(%%edx) \
+ jmp 6f \
+5: \
+ cmpl $1, %%ecx \
+ jl 6f \
+ movb (%%ebx), %%al \
+ movb %%al, (%%edx) \
+6: \
+ popl %%esi \
+ popl %%edi \
+ "
+ : : "m" (src), "d" (dest), "c" (count)
+ : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
+}
+
+void Com_Memset (void* dest, const int val, const size_t count)
+{
+ unsigned int fillval;
+
+ if (count < 8)
+ {
+ __asm__ __volatile__ (" \
+ //mov edx,dest \
+ //mov eax, val \
+ movb %%al, %%ah \
+ movl %%eax, %%ebx \
+ andl $0xffff, %%ebx \
+ shll $16, %%eax \
+ addl %%ebx, %%eax // eax now contains pattern \
+ //mov ecx,count \
+ cmpl $4, %%ecx \
+ jl 0f \
+ movl %%eax, (%%edx) // copy first dword \
+ addl $4, %%edx \
+ subl $4, %%ecx \
+ 0: cmpl $2, %%ecx \
+ jl 1f \
+ movw %%ax, (%%edx) // copy 2 bytes \
+ addl $2, %%edx \
+ subl $2, %%ecx \
+ 1: cmpl $0, %%ecx \
+ je 2f \
+ movb %%al, (%%edx) // copy single byte \
+ 2: \
+ "
+ : : "d" (dest), "a" (val), "c" (count)
+ : "%ebx", "%edi", "%esi", "cc", "memory");
+
+ return;
+ }
+
+ fillval = val;
+
+ fillval = fillval|(fillval<<8);
+ fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern
+
+ _copyDWord ((unsigned int*)(dest),fillval, count/4);
+
+ __asm__ __volatile__ (" // padding of 0-3 bytes \
+ //mov ecx,count \
+ movl %%ecx, %%eax \
+ andl $3, %%ecx \
+ jz 1f \
+ andl $0xffffff00, %%eax \
+ //mov ebx,dest \
+ addl %%eax, %%edx \
+ movl %0, %%eax \
+ cmpl $2, %%ecx \
+ jl 0f \
+ movw %%ax, (%%edx) \
+ cmpl $2, %%ecx \
+ je 1f \
+ movb %%al, 2(%%edx) \
+ jmp 1f \
+0: \
+ cmpl $0, %%ecx\
+ je 1f\
+ movb %%al, (%%edx)\
+1: \
+ "
+ : : "m" (fillval), "c" (count), "d" (dest)
+ : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
+}
+
+void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
+{
+ // write buffer prefetching is performed only if
+ // the processor benefits from it. Read and read/write
+ // prefetching is always performed.
+
+ switch (type)
+ {
+ case PRE_WRITE : break;
+ case PRE_READ:
+ case PRE_READ_WRITE:
+
+ __asm__ __volatile__ ("\
+ //mov ebx,s\
+ //mov ecx,bytes\
+ cmpl $4096, %%ecx // clamp to 4kB\
+ jle 0f\
+ movl $4096, %%ecx\
+ 0:\
+ addl $0x1f, %%ecx\
+ shrl $5, %%ecx // number of cache lines\
+ jz 2f\
+ jmp 1f\
+\
+ .align 16\
+ 1: testb %%al, (%%edx)\
+ addl $32, %%edx\
+ decl %%ecx\
+ jnz 1b\
+ 2:\
+ "
+ : : "d" (s), "c" (bytes)
+ : "%eax", "%ebx", "%edi", "%esi", "memory", "cc");
+
+ break;
+ }
+}
+
+#endif