Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, name, parameters=''):
"""
super().__init__(name, parameters)

self._bin_name = 'streamZen3.exe'
self._bin_name = 'stream'
self.__cpu_arch = ['other', 'zen3', 'zen4', 'neo2']

def add_parser_arguments(self):
Expand All @@ -32,7 +32,7 @@ def add_parser_arguments(self):
self._parser.add_argument(
'--cpu_arch',
type=str,
default='zen4',
default='other',
required=False,
help='The targeted cpu architectures to run \
STREAM. Default is zen4. Possible values are {}.'.format(' '.join(self.__cpu_arch))
Expand Down Expand Up @@ -76,17 +76,15 @@ def _preprocess(self):
envar = 'OMP_SCHEDULE=static && OMP_DYNAMIC=false && OMP_MAX_ACTIVE_LEVELS=1 && OMP_STACKSIZE=256M && \
OMP_PROC_BIND=true && OMP_NUM_THREADS={} && OMP_PLACES={}'.format(len(self._args.cores), omp_places)

# set the binary name based on cpu architecture
if self._args.cpu_arch == 'zen3':
exe = 'streamZen3.exe'
self._bin_name = 'streamZen3'
elif self._args.cpu_arch == 'zen4':
exe = 'streamZen4.exe'
self._bin_name = 'streamZen4'
elif self._args.cpu_arch == 'neo2':
exe = 'streamNeo2.exe'
else:
exe = 'streamx86.exe'
self._bin_name = 'streamNeo2'

command = envar + ' ' + os.path.join(self._args.bin_dir, exe)
self._bin_name = exe
command = envar + ' ' + os.path.join(self._args.bin_dir, self._bin_name)

if not self._set_binary_path():
logger.error(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/streamZen3.exe'])
cls.createMockFiles(cls, ['bin/stream'])
cls.createMockFiles(cls, ['bin/streamZen3'])
return True

@decorator.load_data('tests/data/streamResult.log')
Expand Down
8 changes: 4 additions & 4 deletions third_party/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ all: cuda rocm
cuda_with_msccl: cuda cuda_msccl
cuda: common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn megatron_lm megatron_deepspeed nvbandwidth
rocm: common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest rocm_hipblaslt megatron_deepspeed apex_rocm
cpu: common cpu_perftest cpu_stream
common: fio
cpu: common cpu_perftest
common: fio cpu_stream

# non aarch64 specific targets
ifneq ($(shell uname -m), aarch64)
common: fio cpu_hpl
common: cpu_hpl
Comment thread
abuccts marked this conversation as resolved.
directx_amd: directx_amf_encoding_latency
endif

Expand Down Expand Up @@ -184,7 +184,7 @@ ifneq (,$(wildcard stream-tests/Makefile))
cd ./stream-tests && \
wget https://www.cs.virginia.edu/stream/FTP/Code/stream.c && \
make all
cp -v ./stream-tests/stream*.exe $(SB_MICRO_PATH)/bin/
cp -v ./stream-tests/stream* $(SB_MICRO_PATH)/bin/
endif

# Build AMD Encoder Latency Test
Expand Down
48 changes: 27 additions & 21 deletions third_party/stream-tests/Makefile
Original file line number Diff line number Diff line change
@@ -1,41 +1,47 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

GENFLAGS := -DSTREAM_ARRAY_SIZE=400000000
GENFLAGS := -DSTREAM_ARRAY_SIZE=120000000
ZEN3FLAGS := -DSTREAM_ARRAY_SIZE=400000000 -march=znver3
ZEN4FLAGS := -DSTREAM_ARRAY_SIZE=800000000 -march=znver4
NEO2FLAGS := -DSTREAM_ARRAY_SIZE=120000000 -mcpu=neoverse-v2

GEN_OUTPUT := streamx86.exe
ZEN3_OUTPUT := streamZen3.exe
ZEN4_OUTPUT := streamZen4.exe
NEO2_OUTPUT := streamNeo2.exe
GEN_OUTPUT := stream
ZEN3_OUTPUT := streamZen3
ZEN4_OUTPUT := streamZen4
NEO2_OUTPUT := streamNeo2

ALL_TARGETS := GEN

ARCH := $(shell uname -m)

# ARM64 build gets NEO2 added
ifeq ($(ARCH), aarch64)
CFLAGS := -Ofast -fopenmp -DNTIMES=200
CC := gcc
all: NEO2
else
CC := /opt/AMD/aocc-compiler-4.0.0/bin/clang
CFLAGS := -Ofast -mcmodel=large -mavx2 -ffp-contract=fast -lomp -fopenmp -fnt-store=aggressive -DNTIMES=10
all: ZEN3 ZEN4 X86
CC := gcc
CFLAGS := -Ofast -fopenmp -DNTIMES=200
ALL_TARGETS += NEO2
endif

# AMD AOCC clang present? add ZEN3 and ZEN4
ifneq ("$(wildcard /opt/AMD/aocc-compiler-4.0.0/bin/clang)","")
CC := /opt/AMD/aocc-compiler-4.0.0/bin/clang
CFLAGS := -Ofast -mcmodel=large -mavx2 -ffp-contract=fast -lomp -fopenmp \
-fnt-store=aggressive -DNTIMES=10
ALL_TARGETS += ZEN3 ZEN4
endif

ZEN3: stream.c
# the one all: definition
all: $(ALL_TARGETS)

GEN:
$(CC) $(CFLAGS) $(GENFLAGS) stream.c -o $(GEN_OUTPUT)
ZEN3:
$(CC) $(CFLAGS) $(ZEN3FLAGS) stream.c -o $(ZEN3_OUTPUT)
ZEN4:
$(CC) $(CFLAGS) $(ZEN4FLAGS) stream.c -o $(ZEN4_OUTPUT)
X86:
$(CC) $(CFLAGS) $(GENFLAGS) stream.c -o $(GEN_OUTPUT)
NEO2:
$(CC) $(CFLAGS) $(NEO2FLAGS) stream.c -o $(NEO2_OUTPUT)

ifeq ($(ARCH), aarch64)
# clean up the generated files
clean:
rm $(NEO2_OUTPUT)
else
clean:
rm $(GEN_OUTPUT) $(ZEN3_OUTPUT) $(ZEN4_OUTPUT)
endif
rm -f $(GEN_OUTPUT) $(ZEN3_OUTPUT) $(ZEN4_OUTPUT) $(NEO2_OUTPUT)
Loading