aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--code/qcommon/common.c310
-rw-r--r--code/qcommon/md4.c5
-rw-r--r--code/qcommon/q_shared.h5
-rw-r--r--code/unix/Makefile40
-rw-r--r--code/unix/linux_common.c346
5 files changed, 22 insertions, 684 deletions
diff --git a/code/qcommon/common.c b/code/qcommon/common.c
index b9de067..74f75ea 100644
--- a/code/qcommon/common.c
+++ b/code/qcommon/common.c
@@ -2857,316 +2857,6 @@ void Com_Shutdown (void) {
}
-#if I_WANT_A_CUSTOM_MEMCPY && !defined(_WIN32)
-void Com_Memcpy (void* dest, const void* src, const size_t count)
-{
- memcpy(dest, src, count);
-}
-
-void Com_Memset (void* dest, const int val, const size_t count)
-{
- memset(dest, val, count);
-}
-
-#elif I_WANT_A_CUSTOM_MEMCPY && defined(_WIN32)
-
-typedef enum
-{
- PRE_READ, // prefetch assuming that buffer is used for reading only
- PRE_WRITE, // prefetch assuming that buffer is used for writing only
- PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing
-} e_prefetch;
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
-
-#define EMMS_INSTRUCTION __asm emms
-
-void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
- __asm
- {
- mov edx,dest
- mov eax,constant
- mov ecx,count
- and ecx,~7
- jz padding
- sub ecx,8
- jmp loopu
- align 16
-loopu:
- test [edx+ecx*4 + 28],ebx // fetch next block destination to L1 cache
- mov [edx+ecx*4 + 0],eax
- mov [edx+ecx*4 + 4],eax
- mov [edx+ecx*4 + 8],eax
- mov [edx+ecx*4 + 12],eax
- mov [edx+ecx*4 + 16],eax
- mov [edx+ecx*4 + 20],eax
- mov [edx+ecx*4 + 24],eax
- mov [edx+ecx*4 + 28],eax
- sub ecx,8
- jge loopu
-padding: mov ecx,count
- mov ebx,ecx
- and ecx,7
- jz outta
- and ebx,~7
- lea edx,[edx+ebx*4] // advance dest pointer
- test [edx+0],eax // fetch destination to L1 cache
- cmp ecx,4
- jl skip4
- mov [edx+0],eax
- mov [edx+4],eax
- mov [edx+8],eax
- mov [edx+12],eax
- add edx,16
- sub ecx,4
-skip4: cmp ecx,2
- jl skip2
- mov [edx+0],eax
- mov [edx+4],eax
- add edx,8
- sub ecx,2
-skip2: cmp ecx,1
- jl outta
- mov [edx+0],eax
-outta:
- }
-}
-
-// optimized memory copy routine that handles all alignment
-// cases and block sizes efficiently
-void Com_Memcpy (void* dest, const void* src, const size_t count) {
- Com_Prefetch (src, count, PRE_READ);
- __asm
- {
- push edi
- push esi
- mov ecx,count
- cmp ecx,0 // count = 0 check (just to be on the safe side)
- je outta
- mov edx,dest
- mov ebx,src
- cmp ecx,32 // padding only?
- jl padding
-
- mov edi,ecx
- and edi,~31 // edi = count&~31
- sub edi,32
-
- align 16
-loopMisAligned:
- mov eax,[ebx + edi + 0 + 0*8]
- mov esi,[ebx + edi + 4 + 0*8]
- mov [edx+edi+0 + 0*8],eax
- mov [edx+edi+4 + 0*8],esi
- mov eax,[ebx + edi + 0 + 1*8]
- mov esi,[ebx + edi + 4 + 1*8]
- mov [edx+edi+0 + 1*8],eax
- mov [edx+edi+4 + 1*8],esi
- mov eax,[ebx + edi + 0 + 2*8]
- mov esi,[ebx + edi + 4 + 2*8]
- mov [edx+edi+0 + 2*8],eax
- mov [edx+edi+4 + 2*8],esi
- mov eax,[ebx + edi + 0 + 3*8]
- mov esi,[ebx + edi + 4 + 3*8]
- mov [edx+edi+0 + 3*8],eax
- mov [edx+edi+4 + 3*8],esi
- sub edi,32
- jge loopMisAligned
-
- mov edi,ecx
- and edi,~31
- add ebx,edi // increase src pointer
- add edx,edi // increase dst pointer
- and ecx,31 // new count
- jz outta // if count = 0, get outta here
-
-padding:
- cmp ecx,16
- jl skip16
- mov eax,dword ptr [ebx]
- mov dword ptr [edx],eax
- mov eax,dword ptr [ebx+4]
- mov dword ptr [edx+4],eax
- mov eax,dword ptr [ebx+8]
- mov dword ptr [edx+8],eax
- mov eax,dword ptr [ebx+12]
- mov dword ptr [edx+12],eax
- sub ecx,16
- add ebx,16
- add edx,16
-skip16:
- cmp ecx,8
- jl skip8
- mov eax,dword ptr [ebx]
- mov dword ptr [edx],eax
- mov eax,dword ptr [ebx+4]
- sub ecx,8
- mov dword ptr [edx+4],eax
- add ebx,8
- add edx,8
-skip8:
- cmp ecx,4
- jl skip4
- mov eax,dword ptr [ebx] // here 4-7 bytes
- add ebx,4
- sub ecx,4
- mov dword ptr [edx],eax
- add edx,4
-skip4: // 0-3 remaining bytes
- cmp ecx,2
- jl skip2
- mov ax,word ptr [ebx] // two bytes
- cmp ecx,3 // less than 3?
- mov word ptr [edx],ax
- jl outta
- mov al,byte ptr [ebx+2] // last byte
- mov byte ptr [edx+2],al
- jmp outta
-skip2:
- cmp ecx,1
- jl outta
- mov al,byte ptr [ebx]
- mov byte ptr [edx],al
-outta:
- pop esi
- pop edi
- }
-}
-
-void Com_Memset (void* dest, const int val, const size_t count)
-{
- unsigned int fillval;
-
- if (count < 8)
- {
- __asm
- {
- mov edx,dest
- mov eax, val
- mov ah,al
- mov ebx,eax
- and ebx, 0xffff
- shl eax,16
- add eax,ebx // eax now contains pattern
- mov ecx,count
- cmp ecx,4
- jl skip4
- mov [edx],eax // copy first dword
- add edx,4
- sub ecx,4
- skip4: cmp ecx,2
- jl skip2
- mov word ptr [edx],ax // copy 2 bytes
- add edx,2
- sub ecx,2
- skip2: cmp ecx,0
- je skip1
- mov byte ptr [edx],al // copy single byte
- skip1:
- }
- return;
- }
-
- fillval = val;
-
- fillval = fillval|(fillval<<8);
- fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern
-
- _copyDWord ((unsigned int*)(dest),fillval, count/4);
-
- __asm // padding of 0-3 bytes
- {
- mov ecx,count
- mov eax,ecx
- and ecx,3
- jz skipA
- and eax,~3
- mov ebx,dest
- add ebx,eax
- mov eax,fillval
- cmp ecx,2
- jl skipB
- mov word ptr [ebx],ax
- cmp ecx,2
- je skipA
- mov byte ptr [ebx+2],al
- jmp skipA
-skipB:
- cmp ecx,0
- je skipA
- mov byte ptr [ebx],al
-skipA:
- }
-}
-
-qboolean Com_Memcmp (const void *src0, const void *src1, const unsigned int count)
-{
- unsigned int i;
- // MMX version anyone?
-
- if (count >= 16)
- {
- unsigned int *dw = (unsigned int*)(src0);
- unsigned int *sw = (unsigned int*)(src1);
-
- unsigned int nm2 = count/16;
- for (i = 0; i < nm2; i+=4)
- {
- unsigned int tmp = (dw[i+0]-sw[i+0])|(dw[i+1]-sw[i+1])|
- (dw[i+2]-sw[i+2])|(dw[i+3]-sw[i+3]);
- if (tmp)
- return qfalse;
- }
- }
- if (count & 15)
- {
- byte *d = (byte*)src0;
- byte *s = (byte*)src1;
- for (i = count & 0xfffffff0; i < count; i++)
- if (d[i]!=s[i])
- return qfalse;
- }
-
- return qtrue;
-}
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
-{
- // write buffer prefetching is performed only if
- // the processor benefits from it. Read and read/write
- // prefetching is always performed.
-
- switch (type)
- {
- case PRE_WRITE : break;
- case PRE_READ:
- case PRE_READ_WRITE:
-
- __asm
- {
- mov ebx,s
- mov ecx,bytes
- cmp ecx,4096 // clamp to 4kB
- jle skipClamp
- mov ecx,4096
-skipClamp:
- add ecx,0x1f
- shr ecx,5 // number of cache lines
- jz skip
- jmp loopie
-
- align 16
- loopie: test byte ptr [ebx],al
- add ebx,32
- dec ecx
- jnz loopie
- skip:
- }
-
- break;
- }
-}
-#endif
//------------------------------------------------------------------------
diff --git a/code/qcommon/md4.c b/code/qcommon/md4.c
index 24b7961..82c4b0d 100644
--- a/code/qcommon/md4.c
+++ b/code/qcommon/md4.c
@@ -38,13 +38,8 @@ void MD4Init (MD4_CTX *);
void MD4Update (MD4_CTX *, const unsigned char *, unsigned int);
void MD4Final (unsigned char [16], MD4_CTX *);
-#if I_WANT_A_CUSTOM_MEMCPY
-void Com_Memset (void* dest, const int val, const size_t count);
-void Com_Memcpy (void* dest, const void* src, const size_t count);
-#else
#define Com_Memset memset
#define Com_Memcpy memcpy
-#endif
/* MD4C.C - RSA Data Security, Inc., MD4 message-digest algorithm */
/* Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h
index d6c69dc..a9388e7 100644
--- a/code/qcommon/q_shared.h
+++ b/code/qcommon/q_shared.h
@@ -243,13 +243,8 @@ void Snd_Memset (void* dest, const int val, const size_t count);
#define Snd_Memset Com_Memset
#endif
-#if I_WANT_A_CUSTOM_MEMCPY
-void Com_Memset (void* dest, const int val, const size_t count);
-void Com_Memcpy (void* dest, const void* src, const size_t count);
-#else
#define Com_Memset memset
#define Com_Memcpy memcpy
-#endif
#define CIN_system 1
#define CIN_loop 2
diff --git a/code/unix/Makefile b/code/unix/Makefile
index 3aaff03..82bbe0a 100644
--- a/code/unix/Makefile
+++ b/code/unix/Makefile
@@ -350,6 +350,9 @@ ifeq ($(PLATFORM),mingw32)
LDFLAGS+=-m32
endif
+ BUILD_SERVER = 0
+ BUILD_CLIENT_SMP = 0
+
else # ifeq mingw32
#############################################################################
@@ -823,9 +826,19 @@ Q3OBJ = \
ifeq ($(ARCH),i386)
Q3OBJ += $(B)/client/vm_x86.o
+ Q3OBJ += \
+ $(B)/client/snd_mixa.o \
+ $(B)/client/matha.o \
+ $(B)/client/ftola.o \
+ $(B)/client/snapvectora.o
endif
ifeq ($(ARCH),x86)
Q3OBJ += $(B)/client/vm_x86.o
+ Q3OBJ += \
+ $(B)/client/snd_mixa.o \
+ $(B)/client/matha.o \
+ $(B)/client/ftola.o \
+ $(B)/client/snapvectora.o
endif
ifeq ($(ARCH),x86_64)
Q3OBJ += $(B)/client/vm_x86_64.o
@@ -837,21 +850,6 @@ ifeq ($(ARCH),ppc)
endif
endif
-Q3OBJ += \
- $(B)/client/linux_common.o \
- \
- $(B)/client/snd_mixa.o \
- $(B)/client/matha.o \
- $(B)/client/ftola.o \
- $(B)/client/snapvectora.o \
- \
- $(B)/client/unix_main.o \
- $(B)/client/unix_net.o \
- $(B)/client/unix_shared.o \
- $(B)/client/linux_signals.o \
- $(B)/client/linux_qgl.o \
- $(B)/client/linux_snd.o \
- $(B)/client/sdl_snd.o
ifeq ($(PLATFORM),mingw32)
Q3OBJ += \
@@ -867,6 +865,15 @@ ifeq ($(PLATFORM),mingw32)
$(B)/client/win_wndproc.o \
$(B)/client/win_resource.o
else
+ Q3OBJ += \
+ $(B)/client/unix_main.o \
+ $(B)/client/unix_net.o \
+ $(B)/client/unix_shared.o \
+ $(B)/client/linux_signals.o \
+ $(B)/client/linux_qgl.o \
+ $(B)/client/linux_snd.o \
+ $(B)/client/sdl_snd.o
+
ifeq ($(PLATFORM),linux)
Q3OBJ += $(B)/client/linux_joystick.o
endif
@@ -1051,7 +1058,6 @@ $(B)/client/irix_glimp_smp.o : $(UDIR)/irix_glimp.c; $(DO_SMP_CC)
$(B)/client/irix_snd.o : $(UDIR)/irix_snd.c; $(DO_CC)
$(B)/client/irix_input.o : $(UDIR)/irix_input.c; $(DO_CC)
$(B)/client/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_CC) $(GL_CFLAGS)
-$(B)/client/linux_common.o : $(UDIR)/linux_common.c; $(DO_CC)
$(B)/client/linux_glimp.o : $(UDIR)/linux_glimp.c; $(DO_CC) $(GL_CFLAGS)
$(B)/client/sdl_glimp.o : $(UDIR)/sdl_glimp.c; $(DO_CC) $(GL_CFLAGS)
$(B)/client/linux_glimp_smp.o : $(UDIR)/linux_glimp.c; $(DO_SMP_CC) $(GL_CFLAGS)
@@ -1154,7 +1160,6 @@ Q3DOBJ = \
$(B)/ded/l_struct.o \
\
$(B)/ded/linux_signals.o \
- $(B)/ded/linux_common.o \
$(B)/ded/unix_main.o \
$(B)/ded/unix_net.o \
$(B)/ded/unix_shared.o \
@@ -1236,7 +1241,6 @@ $(B)/ded/l_script.o : $(BLIBDIR)/l_script.c; $(DO_BOT_CC)
$(B)/ded/l_struct.o : $(BLIBDIR)/l_struct.c; $(DO_BOT_CC)
$(B)/ded/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_DED_CC)
-$(B)/ded/linux_common.o : $(UDIR)/linux_common.c; $(DO_DED_CC)
$(B)/ded/unix_main.o : $(UDIR)/unix_main.c; $(DO_DED_CC)
$(B)/ded/unix_net.o : $(UDIR)/unix_net.c; $(DO_DED_CC)
$(B)/ded/unix_shared.o : $(UDIR)/unix_shared.c; $(DO_DED_CC)
diff --git a/code/unix/linux_common.c b/code/unix/linux_common.c
deleted file mode 100644
index ab1eafa..0000000
--- a/code/unix/linux_common.c
+++ /dev/null
@@ -1,346 +0,0 @@
-#if 0 // not used anymore
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-===========================================================================
-*/
-/**
- * GAS syntax equivalents of the MSVC asm memory calls in common.c
- *
- * The following changes have been made to the asm:
- * 1. Registers are loaded by the inline asm arguments when possible
- * 2. Labels have been changed to local label format (0,1,etc.) to allow inlining
- *
- * HISTORY:
- * AH - Created on 08 Dec 2000
- */
-
-#include <unistd.h> // AH - for size_t
-#include <string.h>
-
-// bk001207 - we need something under Linux, too. Mac?
-#if 1 // defined(C_ONLY) // bk010102 - dedicated?
-void Com_Memcpy (void* dest, const void* src, const size_t count) {
- memcpy(dest, src, count);
-}
-
-void Com_Memset (void* dest, const int val, const size_t count) {
- memset(dest, val, count);
-}
-
-#else
-
-typedef enum {
- PRE_READ, // prefetch assuming that buffer is used for reading only
- PRE_WRITE, // prefetch assuming that buffer is used for writing only
- PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing
-} e_prefetch;
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
-
-void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
- // MMX version not used on standard Pentium MMX
- // because the dword version is faster (with
- // proper destination prefetching)
- __asm__ __volatile__ (" \
- //mov eax,constant // eax = val \
- //mov edx,dest // dest \
- //mov ecx,count \
- movd %%eax, %%mm0 \
- punpckldq %%mm0, %%mm0 \
-\
- // ensure that destination is qword aligned \
-\
- testl $7, %%edx // qword padding?\
- jz 0f \
- movl %%eax, (%%edx) \
- decl %%ecx \
- addl $4, %%edx \
-\
-0: movl %%ecx, %%ebx \
- andl $0xfffffff0, %%ecx \
- jz 2f \
- jmp 1f \
- .align 16 \
-\
- // funny ordering here to avoid commands \
- // that cross 32-byte boundaries (the \
- // [edx+0] version has a special 3-byte opcode... \
-1: movq %%mm0, 8(%%edx) \
- movq %%mm0, 16(%%edx) \
- movq %%mm0, 24(%%edx) \
- movq %%mm0, 32(%%edx) \
- movq %%mm0, 40(%%edx) \
- movq %%mm0, 48(%%edx) \
- movq %%mm0, 56(%%edx) \
- movq %%mm0, (%%edx)\
- addl $64, %%edx \
- subl $16, %%ecx \
- jnz 1b \
-2: \
- movl %%ebx, %%ecx // ebx = cnt \
- andl $0xfffffff0, %%ecx // ecx = cnt&~15 \
- subl %%ecx, %%ebx \
- jz 6f \
- cmpl $8, %%ebx \
- jl 3f \
-\
- movq %%mm0, (%%edx) \
- movq %%mm0, 8(%%edx) \
- movq %%mm0, 16(%%edx) \
- movq %%mm0, 24(%%edx) \
- addl $32, %%edx \
- subl $8, %%ebx \
- jz 6f \
-\
-3: cmpl $4, %%ebx \
- jl 4f \
- \
- movq %%mm0, (%%edx) \
- movq %%mm0, 8(%%edx) \
- addl $16, %%edx \
- subl $4, %%ebx \
-\
-4: cmpl $2, %%ebx \
- jl 5f \
- movq %%mm0, (%%edx) \
- addl $8, %%edx \
- subl $2, %%ebx \
-\
-5: cmpl $1, %%ebx \
- jl 6f \
- movl %%eax, (%%edx) \
-6: \
- emms \
- "
- : : "a" (constant), "c" (count), "d" (dest)
- : "%ebx", "%edi", "%esi", "cc", "memory");
-}
-
-// optimized memory copy routine that handles all alignment
-// cases and block sizes efficiently
-void Com_Memcpy (void* dest, const void* src, const size_t count) {
- Com_Prefetch (src, count, PRE_READ);
- __asm__ __volatile__ (" \
- pushl %%edi \
- pushl %%esi \
- //mov ecx,count \
- cmpl $0, %%ecx // count = 0 check (just to be on the safe side) \
- je 6f \
- //mov edx,dest \
- movl %0, %%ebx \
- cmpl $32, %%ecx // padding only? \
- jl 1f \
-\
- movl %%ecx, %%edi \
- andl $0xfffffe00, %%edi // edi = count&~31 \
- subl $32, %%edi \
-\
- .align 16 \
-0: \
- movl (%%ebx, %%edi, 1), %%eax \
- movl 4(%%ebx, %%edi, 1), %%esi \
- movl %%eax, (%%edx, %%edi, 1) \
- movl %%esi, 4(%%edx, %%edi, 1) \
- movl 8(%%ebx, %%edi, 1), %%eax \
- movl 12(%%ebx, %%edi, 1), %%esi \
- movl %%eax, 8(%%edx, %%edi, 1) \
- movl %%esi, 12(%%edx, %%edi, 1) \
- movl 16(%%ebx, %%edi, 1), %%eax \
- movl 20(%%ebx, %%edi, 1), %%esi \
- movl %%eax, 16(%%edx, %%edi, 1) \
- movl %%esi, 20(%%edx, %%edi, 1) \
- movl 24(%%ebx, %%edi, 1), %%eax \
- movl 28(%%ebx, %%edi, 1), %%esi \
- movl %%eax, 24(%%edx, %%edi, 1) \
- movl %%esi, 28(%%edx, %%edi, 1) \
- subl $32, %%edi \
- jge 0b \
- \
- movl %%ecx, %%edi \
- andl $0xfffffe00, %%edi \
- addl %%edi, %%ebx // increase src pointer \
- addl %%edi, %%edx // increase dst pointer \
- andl $31, %%ecx // new count \
- jz 6f // if count = 0, get outta here \
-\
-1: \
- cmpl $16, %%ecx \
- jl 2f \
- movl (%%ebx), %%eax \
- movl %%eax, (%%edx) \
- movl 4(%%ebx), %%eax \
- movl %%eax, 4(%%edx) \
- movl 8(%%ebx), %%eax \
- movl %%eax, 8(%%edx) \
- movl 12(%%ebx), %%eax \
- movl %%eax, 12(%%edx) \
- subl $16, %%ecx \
- addl $16, %%ebx \
- addl $16, %%edx \
-2: \
- cmpl $8, %%ecx \
- jl 3f \
- movl (%%ebx), %%eax \
- movl %%eax, (%%edx) \
- movl 4(%%ebx), %%eax \
- subl $8, %%ecx \
- movl %%eax, 4(%%edx) \
- addl $8, %%ebx \
- addl $8, %%edx \
-3: \
- cmpl $4, %%ecx \
- jl 4f \
- movl (%%ebx), %%eax // here 4-7 bytes \
- addl $4, %%ebx \
- subl $4, %%ecx \
- movl %%eax, (%%edx) \
- addl $4, %%edx \
-4: // 0-3 remaining bytes \
- cmpl $2, %%ecx \
- jl 5f \
- movw (%%ebx), %%ax // two bytes \
- cmpl $3, %%ecx // less than 3? \
- movw %%ax, (%%edx) \
- jl 6f \
- movb 2(%%ebx), %%al // last byte \
- movb %%al, 2(%%edx) \
- jmp 6f \
-5: \
- cmpl $1, %%ecx \
- jl 6f \
- movb (%%ebx), %%al \
- movb %%al, (%%edx) \
-6: \
- popl %%esi \
- popl %%edi \
- "
- : : "m" (src), "d" (dest), "c" (count)
- : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
-}
-
-void Com_Memset (void* dest, const int val, const size_t count)
-{
- unsigned int fillval;
-
- if (count < 8)
- {
- __asm__ __volatile__ (" \
- //mov edx,dest \
- //mov eax, val \
- movb %%al, %%ah \
- movl %%eax, %%ebx \
- andl $0xffff, %%ebx \
- shll $16, %%eax \
- addl %%ebx, %%eax // eax now contains pattern \
- //mov ecx,count \
- cmpl $4, %%ecx \
- jl 0f \
- movl %%eax, (%%edx) // copy first dword \
- addl $4, %%edx \
- subl $4, %%ecx \
- 0: cmpl $2, %%ecx \
- jl 1f \
- movw %%ax, (%%edx) // copy 2 bytes \
- addl $2, %%edx \
- subl $2, %%ecx \
- 1: cmpl $0, %%ecx \
- je 2f \
- movb %%al, (%%edx) // copy single byte \
- 2: \
- "
- : : "d" (dest), "a" (val), "c" (count)
- : "%ebx", "%edi", "%esi", "cc", "memory");
-
- return;
- }
-
- fillval = val;
-
- fillval = fillval|(fillval<<8);
- fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern
-
- _copyDWord ((unsigned int*)(dest),fillval, count/4);
-
- __asm__ __volatile__ (" // padding of 0-3 bytes \
- //mov ecx,count \
- movl %%ecx, %%eax \
- andl $3, %%ecx \
- jz 1f \
- andl $0xffffff00, %%eax \
- //mov ebx,dest \
- addl %%eax, %%edx \
- movl %0, %%eax \
- cmpl $2, %%ecx \
- jl 0f \
- movw %%ax, (%%edx) \
- cmpl $2, %%ecx \
- je 1f \
- movb %%al, 2(%%edx) \
- jmp 1f \
-0: \
- cmpl $0, %%ecx\
- je 1f\
- movb %%al, (%%edx)\
-1: \
- "
- : : "m" (fillval), "c" (count), "d" (dest)
- : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
-}
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
-{
- // write buffer prefetching is performed only if
- // the processor benefits from it. Read and read/write
- // prefetching is always performed.
-
- switch (type)
- {
- case PRE_WRITE : break;
- case PRE_READ:
- case PRE_READ_WRITE:
-
- __asm__ __volatile__ ("\
- //mov ebx,s\
- //mov ecx,bytes\
- cmpl $4096, %%ecx // clamp to 4kB\
- jle 0f\
- movl $4096, %%ecx\
- 0:\
- addl $0x1f, %%ecx\
- shrl $5, %%ecx // number of cache lines\
- jz 2f\
- jmp 1f\
-\
- .align 16\
- 1: testb %%al, (%%edx)\
- addl $32, %%edx\
- decl %%ecx\
- jnz 1b\
- 2:\
- "
- : : "d" (s), "c" (bytes)
- : "%eax", "%ebx", "%edi", "%esi", "memory", "cc");
-
- break;
- }
-}
-
-#endif
-#endif