$52 GRAYBYTE WORDPRESS FILE MANAGER $80

SERVER : vnpttt-amd7f72-h1.vietnix.vn #1 SMP Fri May 24 12:42:50 UTC 2024
SERVER IP : 103.200.23.149 | ADMIN IP 216.73.216.22
OPTIONS : CRL = ON | WGT = ON | SDO = OFF | PKEX = OFF
DEACTIVATED : NONE

/lib/golang/src/internal/bytealg/

HOME
Current File : /lib/golang/src/internal/bytealg//count_amd64.s
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "asm_amd64.h"
#include "textflag.h"

TEXT ·Count(SB),NOSPLIT,$0-40
#ifndef hasPOPCNT
	CMPB	internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
	JEQ	2(PC)
	JMP	·countGeneric(SB)
#endif
	MOVQ	b_base+0(FP), SI
	MOVQ	b_len+8(FP), BX
	MOVB	c+24(FP), AL
	LEAQ	ret+32(FP), R8
	JMP	countbody<>(SB)

TEXT ·CountString(SB),NOSPLIT,$0-32
#ifndef hasPOPCNT
	CMPB	internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
	JEQ	2(PC)
	JMP	·countGenericString(SB)
#endif
	MOVQ	s_base+0(FP), SI
	MOVQ	s_len+8(FP), BX
	MOVB	c+16(FP), AL
	LEAQ	ret+24(FP), R8
	JMP	countbody<>(SB)

// input:
//   SI: data
//   BX: data len
//   AL: byte sought
//   R8: address to put result
// This function requires the POPCNT instruction.
TEXT countbody<>(SB),NOSPLIT,$0
	// Shuffle X0 around so that each byte contains
	// the character we're looking for.
	MOVD AX, X0
	PUNPCKLBW X0, X0
	PUNPCKLBW X0, X0
	PSHUFL $0, X0, X0

	CMPQ BX, $16
	JLT small

	MOVQ $0, R12 // Accumulator

	MOVQ SI, DI

	CMPQ BX, $64
	JAE avx2
sse:
	LEAQ	-16(SI)(BX*1), AX	// AX = address of last 16 bytes
	JMP	sseloopentry

	PCALIGN $16
sseloop:
	// Move the next 16-byte chunk of the data into X1.
	MOVOU	(DI), X1
	// Compare bytes in X0 to X1.
	PCMPEQB	X0, X1
	// Take the top bit of each byte in X1 and put the result in DX.
	PMOVMSKB X1, DX
	// Count number of matching bytes
	POPCNTL DX, DX
	// Accumulate into R12
	ADDQ DX, R12
	// Advance to next block.
	ADDQ	$16, DI
sseloopentry:
	CMPQ	DI, AX
	JBE	sseloop

	// Get the number of bytes to consider in the last 16 bytes
	ANDQ $15, BX
	JZ end

	// Create mask to ignore overlap between previous 16 byte block
	// and the next.
	MOVQ $16,CX
	SUBQ BX, CX
	MOVQ $0xFFFF, R10
	SARQ CL, R10
	SALQ CL, R10

	// Process the last 16-byte chunk. This chunk may overlap with the
	// chunks we've already searched so we need to mask part of it.
	MOVOU	(AX), X1
	PCMPEQB	X0, X1
	PMOVMSKB X1, DX
	// Apply mask
	ANDQ R10, DX
	POPCNTL DX, DX
	ADDQ DX, R12
end:
	MOVQ R12, (R8)
	RET

// handle for lengths < 16
small:
	TESTQ	BX, BX
	JEQ	endzero

	// Check if we'll load across a page boundary.
	LEAQ	16(SI), AX
	TESTW	$0xff0, AX
	JEQ	endofpage

	// We must ignore high bytes as they aren't part of our slice.
	// Create mask.
	MOVB BX, CX
	MOVQ $1, R10
	SALQ CL, R10
	SUBQ $1, R10

	// Load data
	MOVOU	(SI), X1
	// Compare target byte with each byte in data.
	PCMPEQB	X0, X1
	// Move result bits to integer register.
	PMOVMSKB X1, DX
	// Apply mask
	ANDQ R10, DX
	POPCNTL DX, DX
	// Directly return DX, we don't need to accumulate
	// since we have <16 bytes.
	MOVQ	DX, (R8)
	RET
endzero:
	MOVQ $0, (R8)
	RET

endofpage:
	// We must ignore low bytes as they aren't part of our slice.
	MOVQ $16,CX
	SUBQ BX, CX
	MOVQ $0xFFFF, R10
	SARQ CL, R10
	SALQ CL, R10

	// Load data into the high end of X1.
	MOVOU	-16(SI)(BX*1), X1
	// Compare target byte with each byte in data.
	PCMPEQB	X0, X1
	// Move result bits to integer register.
	PMOVMSKB X1, DX
	// Apply mask
	ANDQ R10, DX
	// Directly return DX, we don't need to accumulate
	// since we have <16 bytes.
	POPCNTL DX, DX
	MOVQ	DX, (R8)
	RET

avx2:
#ifndef hasAVX2
	CMPB   internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
	JNE sse
#endif
	MOVD AX, X0
	LEAQ -64(SI)(BX*1), R11
	LEAQ (SI)(BX*1), R13
	VPBROADCASTB  X0, Y1
	PCALIGN $32
avx2_loop:
	VMOVDQU (DI), Y2
	VMOVDQU 32(DI), Y4
	VPCMPEQB Y1, Y2, Y3
	VPCMPEQB Y1, Y4, Y5
	VPMOVMSKB Y3, DX
	VPMOVMSKB Y5, CX
	POPCNTL DX, DX
	POPCNTL CX, CX
	ADDQ DX, R12
	ADDQ CX, R12
	ADDQ $64, DI
	CMPQ DI, R11
	JLE avx2_loop

	// If last block is already processed,
	// skip to the end.
	//
	// This check is NOT an optimization; if the input length is a
	// multiple of 64, we must not go through the last leg of the
	// function because the bit shift count passed to SALQ below would
	// be 64, which is outside of the 0-63 range supported by those
	// instructions.
	//
	// Tests in the bytes and strings packages with input lengths that
	// are multiples of 64 will break if this condition were removed.
	CMPQ DI, R13
	JEQ endavx

	// Load address of the last 64 bytes.
	// There is an overlap with the previous block.
	MOVQ R11, DI
	VMOVDQU (DI), Y2
	VMOVDQU 32(DI), Y4
	VPCMPEQB Y1, Y2, Y3
	VPCMPEQB Y1, Y4, Y5
	VPMOVMSKB Y3, DX
	VPMOVMSKB Y5, CX
	// Exit AVX mode.
	VZEROUPPER
	SALQ $32, CX
	ORQ CX, DX

	// Create mask to ignore overlap between previous 64 byte block
	// and the next.
	ANDQ $63, BX
	MOVQ $64, CX
	SUBQ BX, CX
	MOVQ $0xFFFFFFFFFFFFFFFF, R10
	SALQ CL, R10
	// Apply mask
	ANDQ R10, DX
	POPCNTQ DX, DX
	ADDQ DX, R12
	MOVQ R12, (R8)
	RET
endavx:
	// Exit AVX mode.
	VZEROUPPER
	MOVQ R12, (R8)
	RET

Current_dir [ NOT WRITEABLE ] Document_root [ WRITEABLE ]


[ Back ]
NAME
SIZE
LAST TOUCH
USER
CAN-I?
FUNCTIONS
..
--
16 Dec 2025 9.30 PM
root / root
0755
bytealg.go
3.186 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_386.s
2.628 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_amd64.s
4.295 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_arm.s
1.753 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_arm64.s
2.129 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_generic.go
1.49 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_loong64.s
5.86 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_mips64x.s
1.662 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_mipsx.s
1.393 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_native.go
0.709 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_ppc64x.s
6.686 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_riscv64.s
3.778 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_s390x.s
1.186 KB
4 Dec 2025 6.06 PM
root / root
0644
compare_wasm.s
1.436 KB
4 Dec 2025 6.06 PM
root / root
0644
count_amd64.s
4.673 KB
4 Dec 2025 6.06 PM
root / root
0644
count_arm.s
0.896 KB
4 Dec 2025 6.06 PM
root / root
0644
count_arm64.s
2.088 KB
4 Dec 2025 6.06 PM
root / root
0644
count_generic.go
0.512 KB
4 Dec 2025 6.06 PM
root / root
0644
count_loong64.s
4.01 KB
4 Dec 2025 6.06 PM
root / root
0644
count_mips64x.s
0.86 KB
4 Dec 2025 6.06 PM
root / root
0644
count_native.go
0.66 KB
4 Dec 2025 6.06 PM
root / root
0644
count_ppc64x.s
3.636 KB
4 Dec 2025 6.06 PM
root / root
0644
count_riscv64.s
0.685 KB
4 Dec 2025 6.06 PM
root / root
0644
count_s390x.s
5.385 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_386.s
2.129 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_amd64.s
2.775 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_arm.s
1.814 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_arm64.s
2.413 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_generic.go
0.613 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_loong64.s
4.726 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_mips64x.s
1.995 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_mipsx.s
1.069 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_native.go
0.771 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_ppc64x.s
4.873 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_riscv64.s
2.274 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_s390x.s
1.767 KB
4 Dec 2025 6.06 PM
root / root
0644
equal_wasm.s
1.099 KB
4 Dec 2025 6.06 PM
root / root
0644
index_amd64.go
0.603 KB
4 Dec 2025 6.06 PM
root / root
0644
index_amd64.s
5.053 KB
4 Dec 2025 6.06 PM
root / root
0644
index_arm64.go
0.689 KB
4 Dec 2025 6.06 PM
root / root
0644
index_arm64.s
3.96 KB
4 Dec 2025 6.06 PM
root / root
0644
index_generic.go
0.88 KB
4 Dec 2025 6.06 PM
root / root
0644
index_native.go
0.576 KB
4 Dec 2025 6.06 PM
root / root
0644
index_ppc64x.go
0.622 KB
4 Dec 2025 6.06 PM
root / root
0644
index_ppc64x.s
31.564 KB
4 Dec 2025 6.06 PM
root / root
0644
index_s390x.go
0.987 KB
4 Dec 2025 6.06 PM
root / root
0644
index_s390x.s
5.496 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_386.s
0.632 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_amd64.s
3.138 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_arm.s
0.929 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_arm64.s
3.313 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_generic.go
0.758 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_loong64.s
4.135 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_mips64x.s
0.962 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_mipsx.s
0.988 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_native.go
0.428 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_ppc64x.s
6.267 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_riscv64.s
2.786 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_s390x.s
2.489 KB
4 Dec 2025 6.06 PM
root / root
0644
indexbyte_wasm.s
2.539 KB
4 Dec 2025 6.06 PM
root / root
0644
lastindexbyte_generic.go
0.43 KB
4 Dec 2025 6.06 PM
root / root
0644

GRAYBYTE WORDPRESS FILE MANAGER @ 2026 CONTACT ME
Static GIF