Skip to content

Commit

Permalink
Merge branch 'release-2.4.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Aug 29, 2022
2 parents c134996 + c262d2f commit a9d653e
Show file tree
Hide file tree
Showing 22 changed files with 717 additions and 268 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@ exclude: '^tools/(build_utils/fypp)'
fail_fast: false
repos:
- repo: https://github.com/ambv/black
rev: 22.3.0
rev: 22.6.0
hooks:
- id: black
name: Reformat Python files with the black code formatter
files: '^.*(/PACKAGE)|(\.py)$'
- repo: https://gitlab.com/pycqa/flake8
rev: 4.0.1
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
exclude: >-
(?x)^(
.cp2k/.*|
)$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
rev: v4.3.0
hooks:
- id: check-ast
- id: check-yaml
Expand Down
4 changes: 2 additions & 2 deletions VERSION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
MAJOR = 2
MINOR = 3
MINOR = 4
PATCH = 0
# A specific DATE (YYYY-MM-DD) fixes an official release, otherwise
# it is considered Development version.
DATE = 2022-06-26
DATE = 2022-08-29


2 changes: 1 addition & 1 deletion docs/guide/2-user-guide/1-installation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ make
-DWITH_CUDA_PROFILING=<OFF|ON>
-DWITH_C_API=<ON|OFF>
-DWITH_EXAMPLES=<ON|OFF>
-DWITH_GPU=<P100|K20X|K40|K80|V100|Mi50|Mi100>
-DWITH_GPU=<P100|K20X|K40|K80|V100|Mi50|Mi100|Mi250>
-DCMAKE_BUILD_TYPE=<Release|Debug|Coverage>
-DBUILD_TESTING=<ON|OFF>
-DTEST_MPI_RANKS=<auto,N>
Expand Down
27 changes: 19 additions & 8 deletions src/acc/acc_bench_smm.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@
# else
# define ACC_BENCH_USEOMP(FUNC) (FUNC)
# endif
# if LIBXSMM_VERSION4(1, 17, 0, 2776) <= LIBXSMM_VERSION_NUMBER
# define ACC_BENCH_GEMM_BATCH(IPREC, OPREC, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, STRIDE_A, B, LDB, STRIDE_B, BETA, C, LDC, \
STRIDE_C, INDEX_STRIDE, INDEX_BASE, BATCHSIZE) \
ACC_BENCH_USEOMP(libxsmm_gemm_batch) \
(IPREC, OPREC, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, STRIDE_A, B, LDB, STRIDE_B, BETA, C, LDC, STRIDE_C, INDEX_STRIDE, \
INDEX_BASE, BATCHSIZE, 0 /*batchcheck*/)
# else
# define ACC_BENCH_GEMM_BATCH(IPREC, OPREC, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, STRIDE_A, B, LDB, STRIDE_B, BETA, C, LDC, \
STRIDE_C, INDEX_STRIDE, INDEX_BASE, BATCHSIZE) \
ACC_BENCH_USEOMP(libxsmm_gemm_batch) \
((libxsmm_gemm_precision)(IPREC), (libxsmm_gemm_precision)(OPREC), TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, \
LDC, INDEX_BASE, INDEX_STRIDE, STRIDE_A, STRIDE_B, STRIDE_C, BATCHSIZE)
# endif
# define PRINTF(...) \
do { \
const size_t print_buffer_size = sizeof(print_buffer) - print_offset; \
Expand Down Expand Up @@ -420,19 +433,17 @@ int main(int argc, char* argv[]) {
# endif
memset(gold_hst, 0, sizeof(ELEM_TYPE) * mn * nc);
for (r = 0; r < warmup; ++r) {
ACC_BENCH_USEOMP(libxsmm_gemm_batch)
(LIBXSMM_DATATYPE(ELEM_TYPE), LIBXSMM_DATATYPE(ELEM_TYPE), &transa, &transb, m, n, k, &alpha, amat_hst, &m /*lda*/,
bmat_hst, &k /*ldb*/, &beta, gold_hst, &m /*ldc*/, 1 /*index_base*/, sizeof(int) * 3, stack_hst + 0, stack_hst + 1,
stack_hst + 2, stack_size);
ACC_BENCH_GEMM_BATCH(LIBXSMM_DATATYPE(ELEM_TYPE), LIBXSMM_DATATYPE(ELEM_TYPE), &transa, &transb, m, n, k, &alpha,
amat_hst, &m /*lda*/, stack_hst + 0 /*stride_a*/, bmat_hst, &k /*ldb*/, stack_hst + 1 /*stride_b*/, &beta, gold_hst,
&m /*ldc*/, stack_hst + 2 /*stride_c*/, sizeof(int) * 3, 1 /*index_base*/, stack_size);
}
memset(gold_hst, 0, sizeof(ELEM_TYPE) * mn * nc);
start = libxsmm_timer_tick();
/* CPU-kernel operates on data that is not initialized in NUMA-aware fashion */
for (r = 0; r < (nrepeat * smm_nrepeat); ++r) {
ACC_BENCH_USEOMP(libxsmm_gemm_batch)
(LIBXSMM_DATATYPE(ELEM_TYPE), LIBXSMM_DATATYPE(ELEM_TYPE), &transa, &transb, m, n, k, &alpha, amat_hst, &m /*lda*/,
bmat_hst, &k /*ldb*/, &beta, gold_hst, &m /*ldc*/, 1 /*index_base*/, sizeof(int) * 3, stack_hst + 0, stack_hst + 1,
stack_hst + 2, stack_size);
ACC_BENCH_GEMM_BATCH(LIBXSMM_DATATYPE(ELEM_TYPE), LIBXSMM_DATATYPE(ELEM_TYPE), &transa, &transb, m, n, k, &alpha,
amat_hst, &m /*lda*/, stack_hst + 0 /*stride_a*/, bmat_hst, &k /*ldb*/, stack_hst + 1 /*stride_b*/, &beta, gold_hst,
&m /*ldc*/, stack_hst + 2 /*stride_c*/, sizeof(int) * 3, 1 /*index_base*/, stack_size);
}
duration = libxsmm_timer_duration(start, libxsmm_timer_tick());
PRINTF("host: %.2g ms %.1f GFLOPS/s\n", 1000.0 * duration / (nrepeat * smm_nrepeat),
Expand Down
81 changes: 36 additions & 45 deletions src/acc/libsmm_acc/generate_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@
# SPDX-License-Identifier: GPL-2.0+ #
####################################################################################################

from __future__ import print_function

import os
from os import path
import re
import argparse
from pathlib import Path

# ===============================================================================
# Helper variables
Expand All @@ -29,66 +26,63 @@
commented_line = r"\s*(//|/\*.*/*/)"
open_comment = r"\s*/\*"
close_comment = r".*\*/"
smm_acc_header = (
"/*------------------------------------------------------------------------------------------------*\n"
+ " * Copyright (C) by the DBCSR developers group - All rights reserved *\n"
+ " * This file is part of the DBCSR library. *\n"
+ " * *\n"
+ " * For information on the license, see the LICENSE file. *\n"
+ " * For further information please visit https://dbcsr.cp2k.org *\n"
+ " * SPDX-License-Identifier: GPL-2.0+ *\n"
+ " *------------------------------------------------------------------------------------------------*/\n"
+ "\n"
+ "/*****************************************************************************\n"
+ " * FILE GENERATED BY SCRIPT 'generate_kernels.py' DO NOT EDIT *\n"
+ " *****************************************************************************/\n"
+ "\n"
+ "#ifndef SMM_ACC_H\n"
+ "#define SMM_ACC_H\n"
+ "#include <string>\n"
)
smm_acc_header = """\
/*------------------------------------------------------------------------------------------------*
* Copyright (C) by the DBCSR developers group - All rights reserved *
* This file is part of the DBCSR library. *
* *
* For information on the license, see the LICENSE file. *
* For further information please visit https://dbcsr.cp2k.org *
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/
/*****************************************************************************
* FILE GENERATED BY SCRIPT 'generate_kernels.py' DO NOT EDIT *
*****************************************************************************/
#ifndef SMM_ACC_H
#define SMM_ACC_H
#include <string>
"""


# ===============================================================================
def main(kernels_folder):
def main(kernels_folder: Path):
"""
Find files corresponding to CUDA/HIP kernels and write them as strings into a
C++ header file to be read for JIT-ing
"""
# Find all files containing "smm_acc" kernels in the "kernel" subfolder
kernels_folder_files = os.listdir(kernels_folder)
kernels_folder_files = kernels_folder.iterdir()
kernel_files = list()
for f in kernels_folder_files:
if f[:8] == "smm_acc_" and f[-2:] == ".h":
kernel_files.append(os.path.join(kernels_folder, f))
print("Found {} kernel files:".format(len(kernel_files)))
print(*("<- {}".format(kf) for kf in kernel_files), sep="\n")
for kfile in kernels_folder_files:
if kfile.name.startswith("smm_acc_") and kfile.suffix == ".h":
kernel_files.append(kfile)
print(f"Found {len(kernel_files)} kernel files:")
print(*(f"<- {kf}" for kf in kernel_files), sep="\n")

# Read
kernels_h = (
dict()
) # key: path to kernel file (string), value: file content (list of string)
for kernel_file in kernel_files:
with open(kernel_file) as f:
kernels_h[kernel_file] = f.read().splitlines()
kernels_h[kernel_file] = kernel_file.read_text().splitlines()

# Construct file containing the kernels as strings
print("Re-write kernels as strings...")
file_h = smm_acc_header
for kernel_file, kernel in kernels_h.items():
kernel_name, _ = path.splitext(
path.basename(kernel_file)
) # use the filename as name for the kernel
file_h += "\n" + separator + cpp_function_to_string(kernel, kernel_name) + "\n"
kernel_name = kernel_file.stem # use the filename as name for the kernel
file_h += f"\n{separator}{cpp_function_to_string(kernel, kernel_name)}\n"
file_h += "#endif // SMM_ACC_H\n"
file_h += "//EOF"
file_h += "\n\n"

# Write
file_h_path = "smm_acc_kernels.h"
with open(file_h_path, "w") as f:
f.write(file_h)
print("Wrote kernel string to file\n-> {}".format(file_h_path))
with open(file_h_path, "w") as fhandle:
fhandle.write(file_h)
print(f"Wrote kernel string to file\n-> {file_h_path}")


# ===============================================================================
Expand All @@ -104,7 +98,7 @@ def cpp_function_to_string(cpp_file, kernel_name):
r"^[a-zA-Z]\w*", kernel_name
), "kernel_name must be a valid C/C++ variable name"

out = variable_declaration.format(var_name=kernel_name) + "\n"
out = f"{variable_declaration.format(var_name=kernel_name)}\n"
in_comment = False
for line in cpp_file:
if not in_comment:
Expand All @@ -124,11 +118,8 @@ def cpp_function_to_string(cpp_file, kernel_name):
)
+ "\n"
)
else: # in_comment == True
if re.match(close_comment, line) is not None:
in_comment = False
else:
pass
elif re.match(close_comment, line): # in_comment == True
in_comment = False

return out + end_string

Expand All @@ -138,7 +129,7 @@ def cpp_function_to_string(cpp_file, kernel_name):
parser.add_argument(
"kernels_folder",
metavar="KERNELS_FOLDER",
type=str,
type=Path,
nargs="?",
default="./kernels",
help="directory with the kernel header files. Default: %(default)s",
Expand Down
46 changes: 22 additions & 24 deletions src/acc/libsmm_acc/generate_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,47 @@
# SPDX-License-Identifier: GPL-2.0+ #
####################################################################################################

from __future__ import print_function

import json
import argparse
from os import path
from pathlib import Path

from kernels.smm_acc import params_dict_to_kernel, gpu_architectures


# ===============================================================================
def main(gpu_version, base_dir):
def main(gpu_version: str, base_dir: Path):
param_fn = base_dir / f"parameters_{gpu_version}.json"

try: # Read existing parameters
param_fn = path.join(base_dir, "parameters_{}.json".format(gpu_version))
with open(param_fn) as f:
print("GPU version: {}".format(gpu_version))
all_kernels = [params_dict_to_kernel(**params) for params in json.load(f)]
print(
"About to process {:,} kernels from file {}".format(
len(all_kernels), param_fn
)
)
with param_fn.open("r") as fhandle:
print(f"GPU version: {gpu_version}")
all_kernels = [
params_dict_to_kernel(**params) for params in json.load(fhandle)
]
print(f"About to process {len(all_kernels):,} kernels from file {param_fn}")
except: # noqa: E722
all_kernels = []
pass

try: # Read GPU properties (warp size)
gpu_props_fn = path.join(base_dir, "../kernels/gpu_properties.json")
arch_code = gpu_architectures[path.basename(param_fn)]
with open(gpu_props_fn) as f:
gpu_warp_size = json.load(f)[arch_code]["Threads_/_Warp"]
gpu_props_fn = base_dir / "../kernels/gpu_properties.json"
arch_code = gpu_architectures[param_fn.name]
with gpu_props_fn.open("r") as fhandle:
gpu_warp_size = json.load(fhandle)[arch_code]["Threads_/_Warp"]
except: # noqa: E722
gpu_warp_size = 32
pass
print("GPU warp size: {}".format(gpu_warp_size))

print(f"GPU warp size: {gpu_warp_size}")

# Construct output
out, all_pars = write_parameters_file(all_kernels, gpu_warp_size)
out = write_parameters_file(all_kernels, gpu_warp_size)

# Write to c++ header-file
file_h = "parameters.h"
if all_kernels:
print("Found {:,} kernels in file {}".format(len(all_kernels), param_fn))
print("Printing them to file {}".format(file_h))
print(f"Found {len(all_kernels):,} kernels in file {param_fn}")
print(f"Printing them to file {file_h}")
with open(file_h, "w") as f:
f.write(out)

Expand Down Expand Up @@ -103,7 +101,7 @@ def write_parameters_file(all_pars, gpu_warp_size):
"""

# Warp size
out += "extern const int warp_size = {};\n\n".format(gpu_warp_size)
out += f"extern const int warp_size = {gpu_warp_size};\n\n"

# Map of kernel parameters
out += """\
Expand All @@ -125,10 +123,9 @@ def write_parameters_file(all_pars, gpu_warp_size):
#endif
//EOF
\n\n
"""

return out, all_pars
return out


# ===============================================================================
Expand All @@ -149,6 +146,7 @@ def write_parameters_file(all_pars, gpu_warp_size):
"--base_dir",
metavar="BASE_DIR",
default="parameters/",
type=Path,
help="Set the base directory to look for the parameter files. Default: %(default)s",
)
args = parser.parse_args()
Expand Down
20 changes: 12 additions & 8 deletions src/acc/libsmm_acc/kernels/smm_acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"parameters_K80.json": "sm_37",
"parameters_P100.json": "sm_60",
"parameters_V100.json": "sm_70",
"parameters_A100.json": "sm_80",
"parameters_Vega10.json": "gfx900",
"parameters_Mi50.json": "gfx906",
"parameters_Mi100.json": "gfx908",
Expand All @@ -82,7 +83,7 @@ def compatible_mnk(algo, m, n, k):
compatible = False
else:
if algo != "medium":
assert False, "Cannot identify algorithm:" + str(algo)
assert False, f"Cannot identify algorithm:{str(algo)}"

return compatible

Expand Down Expand Up @@ -122,15 +123,18 @@ def descr_to_kernel(kernel_descr, source="autotuned"):
r"Kernel_dnt_(\w+)(\(.*\)) , # (\d+(?:\.\d+)?) GFlop/s"
)
kernel_descr_matched = re_kernel_descr.search(kernel_descr)
assert kernel_descr_matched is not None, (
'Could not match kernel description in "' + kernel_descr + '"'
)
assert (
kernel_descr_matched is not None
), f'Could not match kernel description in "{kernel_descr}"'
match = kernel_descr_matched.groups()
algo = match[0]
m = match[1].replace("=", "':")
m = m.replace(", ", ", '")
m = m.replace("(", "{'")
m = m.replace(")", "}")
m = (
match[1]
.replace("=", "':")
.replace(", ", ", '")
.replace("(", "{'")
.replace(")", "}")
)
params = dict(literal_eval(m))
params["perf"] = float(match[2])
params["source"] = source
Expand Down
Loading

0 comments on commit a9d653e

Please sign in to comment.