diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 764ffe8f647973..5c5299678c66e0 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -18,6 +18,8 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMMOVE extern asmlinkage void *memmove(void *, const void *, size_t); extern asmlinkage void *__memmove(void *, const void *, size_t); +#define __HAVE_ARCH_MEMCMP +extern asmlinkage int memcmp(const void *, const void *, size_t); #if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) #define __HAVE_ARCH_STRCMP diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 6f767b2a349d76..b529e1be18b22a 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,7 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += memmove.o +lib-y += memcmp.o ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) lib-y += strcmp.o lib-y += strlen.o diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S new file mode 100644 index 00000000000000..a531e481cfd073 --- /dev/null +++ b/arch/riscv/lib/memcmp.S @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +/* int memcmp(const void *cs, const void *ct, size_t n) */ +SYM_FUNC_START(memcmp) + + __ALTERNATIVE_CFG("nop", "j memcmp_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) +/* + * Parameters + * a0 - Pointer to first memory block (cs), also return value + * a1 - Pointer to second memory block (ct) + * a2 - Number of bytes to compare (n), transformed to end pointer (a0 + n) + * + * Returns + * a0 - 0 if equal, positive if cs > ct, negative if cs < ct + * + * Clobbers + * t0, t1 + */ + beqz a2, 2f + add a2, a0, a2 +1: + lbu t0, 0(a0) + lbu t1, 0(a1) + bne t0, t1, 3f + addi a0, a0, 1 + addi a1, a1, 1 + bne a0, a2, 1b +2: + li a0, 0 + ret +3: + sub a0, t0, t1 + ret + +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) +memcmp_zbb: + +.option push +.option arch,+zbb +/* + * Parameters + * a0 - Pointer to first memory block (cs), also return value + * a1 - Pointer to second memory block (ct) + * a2 - Number of bytes to compare (n), decremented during loop + * + * Returns + * a0 - 0 if equal, positive if cs > ct, negative if cs < ct + * + * Clobbers + * t0, t1, t2, t3, t4 + */ + add t3, a0, a2 + or t0, a0, a1 + andi t0, t0, (SZREG - 1) + bnez t0, 5f + + addi t4, t3, -SZREG + bltu t4, a0, 7f + +1: + REG_L t1, 0(a0) + REG_L t2, 0(a1) + bne t1, t2, 2f + addi a0, a0, SZREG + addi a1, a1, SZREG + bleu a0, t4, 1b + +7: + beq a0, t3, 4f + REG_L t1, 0(a0) + REG_L t2, 0(a1) + + sub t0, t3, a0 + li t4, SZREG + sub t0, t4, t0 + slli t0, t0, 3 + +#ifndef CONFIG_CPU_BIG_ENDIAN + rev8 t1, t1 + rev8 t2, t2 +#endif + srl t1, t1, t0 + srl t2, t2, t0 + + bne t1, t2, 8f + li a0, 0 + ret +5: + beq a0, t3, 4f +6: + lbu t1, 0(a0) + lbu t2, 0(a1) + bne t1, t2, 3f + addi a0, a0, 1 + addi a1, a1, 1 + bne a0, t3, 6b + +4: li a0, 0 + ret +2: +#ifndef CONFIG_CPU_BIG_ENDIAN + rev8 t1, t1 + rev8 t2, t2 +#endif +8: + sltu a0, t2, t1 + sltu t0, t1, t2 + sub a0, a0, t0 + ret + +3: + sub a0, t1, t2 + ret + +.option pop +#endif +SYM_FUNC_END(memcmp) +SYM_FUNC_ALIAS(__pi_memcmp, memcmp) +EXPORT_SYMBOL(memcmp) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index b0358a78f11a83..456929971da7c6 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memcmp.o ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o endif @@ -41,6 +41,9 @@ $(obj)/strchr.o: $(srctree)/arch/riscv/lib/strchr.S FORCE $(obj)/strrchr.o: $(srctree)/arch/riscv/lib/strrchr.S FORCE $(call if_changed_rule,as_o_S) +$(obj)/memcmp.o: $(srctree)/arch/riscv/lib/memcmp.S FORCE + $(call if_changed_rule,as_o_S) + CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY CFLAGS_string.o := -D__DISABLE_EXPORTS CFLAGS_ctype.o := -D__DISABLE_EXPORTS diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c index 0819ace5b02757..95d65c25b2e417 100644 --- a/lib/tests/string_kunit.c +++ b/lib/tests/string_kunit.c @@ -881,6 +881,120 @@ static void string_bench_strrchr(struct kunit *test) STRING_BENCH_BUF(test, buf, len, strrchr, buf, '\0'); } +static void string_test_memcmp(struct kunit *test) +{ + const unsigned int max_offset = 16; + const unsigned int max_len = 32; + const unsigned int buf_size = max_offset + max_len + 32; + u8 *buf1, *buf2; + unsigned int i, j, len, k; + int res; + + buf1 = kunit_kzalloc(test, buf_size, GFP_KERNEL); + buf2 = kunit_kzalloc(test, buf_size, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf2); + + for (i = 0; i < max_offset; i++) { + for (j = 0; j < max_offset; j++) { + for (len = 0; len <= max_len; len++) { + memset(buf1, 'A', buf_size); + memset(buf2, 'A', buf_size); + KUNIT_EXPECT_EQ_MSG(test, memcmp(buf1 + i, buf2 + j, len), 0, + "Should be equal: i:%u j:%u len:%u", i, j, len); + for (k = 0; k < len; k++) { + memset(buf1, 'A', buf_size); + memset(buf2, 'A', buf_size); + buf2[j + k] = 'B'; + res = memcmp(buf1 + i, buf2 + j, len); + KUNIT_EXPECT_NE_MSG(test, res, 0, + "Should detect difference at k:%u (i:%u j:%u len:%u)", + k, i, j, len); + if (buf1[i + k] < buf2[j + k]) + KUNIT_EXPECT_LT(test, res, 0); + else + KUNIT_EXPECT_GT(test, res, 0); + } + } + } + } +} + +static void do_string_bench_memcmp(struct kunit *test) +{ + char *buf1 = NULL; + char *buf2 = NULL; + const u64 lengths[] = { 1, 7, 8, 16, 32, 64, 128, 512, 1024, 4096 }; + const int offsets[] = { 0, 1, 3, 7 }; + const u64 max_len = 4096 + 64; + unsigned int w, o, i; + unsigned int off; + u64 len; + char *p1; + char *p2; + u64 iterations; + u64 elapsed; + u64 ns_per_call; + u64 mbps; + u64 j; + + buf1 = vmalloc(max_len); + buf2 = vmalloc(max_len); + + if (!buf1 || !buf2) { + vfree(buf1); + vfree(buf2); + kunit_err(test, "vmalloc failed\n"); + return; + } + + memset(buf1, 'A', max_len); + memset(buf2, 'A', max_len); + + for (w = 0; w < 100000U; w++) + (void)memcmp(buf1, buf2, 4096); + + for (o = 0; o < ARRAY_SIZE(offsets); o++) { + off = offsets[o]; + + for (i = 0; i < ARRAY_SIZE(lengths); i++) { + len = lengths[i]; + p1 = buf1; + p2 = buf2 + off; + iterations = (len < 512) ? 100000ULL : 10000ULL; + + for (j = 0; j < iterations; j++) { + (void)memcmp(p1, p2, len); + barrier(); + } + + elapsed = STRING_BENCH(iterations, memcmp, p1, p2, len); + ns_per_call = div_u64(elapsed, iterations); + mbps = len ? div_u64(iterations * len * (NSEC_PER_SEC / MEGA), elapsed) : 0; + + if (off == 0) { + kunit_info(test, "bench_memcmp_aligned: len=%-4llu: %llu MB/s (%llu ns/call)\n", + len, mbps, ns_per_call); + } else { + kunit_info(test, "bench_memcmp_unaligned(off=%u): len=%-4llu: %llu MB/s (%llu ns/call)\n", + off, len, mbps, ns_per_call); + } + } + } + + vfree(buf1); + vfree(buf2); +} + +static void string_bench_memcmp(struct kunit *test) +{ + if (!IS_ENABLED(CONFIG_STRING_KUNIT_BENCH)) { + kunit_skip(test, "CONFIG_STRING_KUNIT_BENCH not enabled"); + return; + } + do_string_bench_memcmp(test); +} + static struct kunit_case string_test_cases[] = { KUNIT_CASE(string_test_memset16), KUNIT_CASE(string_test_memset32), @@ -910,6 +1024,8 @@ static struct kunit_case string_test_cases[] = { KUNIT_CASE(string_bench_strnlen), KUNIT_CASE(string_bench_strchr), KUNIT_CASE(string_bench_strrchr), + KUNIT_CASE(string_test_memcmp), + KUNIT_CASE_SLOW(string_bench_memcmp), {} };