158 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			158 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
| """Repository rule for NCCL configuration.
 | |
| 
 | |
| `nccl_configure` depends on the following environment variables:
 | |
| 
 | |
|   * `TF_NCCL_VERSION`: Installed NCCL version or empty to build from source.
 | |
|   * `NCCL_INSTALL_PATH` (deprecated): The installation path of the NCCL library.
 | |
|   * `NCCL_HDR_PATH` (deprecated): The installation path of the NCCL header 
 | |
|     files.
 | |
|   * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is
 | |
|     `/usr/local/cuda,usr/`.
 | |
| 
 | |
| """
 | |
| 
 | |
| load(
 | |
|     "//third_party/gpus:cuda_configure.bzl",
 | |
|     "enable_cuda",
 | |
|     "find_cuda_config",
 | |
| )
 | |
| load(
 | |
|     "//third_party/remote_config:common.bzl",
 | |
|     "config_repo_label",
 | |
|     "get_cpu_value",
 | |
|     "get_host_environ",
 | |
| )
 | |
| 
 | |
| _CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
 | |
| _NCCL_HDR_PATH = "NCCL_HDR_PATH"
 | |
| _NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
 | |
| _TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
 | |
| _TF_NCCL_VERSION = "TF_NCCL_VERSION"
 | |
| _TF_NEED_CUDA = "TF_NEED_CUDA"
 | |
| 
 | |
| _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR"
 | |
| _DEFINE_NCCL_MINOR = "#define NCCL_MINOR"
 | |
| _DEFINE_NCCL_PATCH = "#define NCCL_PATCH"
 | |
| 
 | |
| _NCCL_DUMMY_BUILD_CONTENT = """
 | |
| filegroup(
 | |
|   name = "LICENSE",
 | |
|   visibility = ["//visibility:public"],
 | |
| )
 | |
| 
 | |
| cc_library(
 | |
|   name = "nccl",
 | |
|   visibility = ["//visibility:public"],
 | |
| )
 | |
| """
 | |
| 
 | |
| _NCCL_ARCHIVE_BUILD_CONTENT = """
 | |
| filegroup(
 | |
|   name = "LICENSE",
 | |
|   data = ["@nccl_archive//:LICENSE.txt"],
 | |
|   visibility = ["//visibility:public"],
 | |
| )
 | |
| 
 | |
| alias(
 | |
|   name = "nccl",
 | |
|   actual = "@nccl_archive//:nccl",
 | |
|   visibility = ["//visibility:public"],
 | |
| )
 | |
| """
 | |
| 
 | |
| def _label(file):
 | |
|     return Label("//third_party/nccl:{}".format(file))
 | |
| 
 | |
| def _create_local_nccl_repository(repository_ctx):
 | |
|     # Resolve all labels before doing any real work. Resolving causes the
 | |
|     # function to be restarted with all previous state being lost. This
 | |
|     # can easily lead to a O(n^2) runtime in the number of labels.
 | |
|     # See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778
 | |
|     find_cuda_config_path = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64"))
 | |
| 
 | |
|     nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "")
 | |
|     if nccl_version:
 | |
|         nccl_version = nccl_version.split(".")[0]
 | |
| 
 | |
|     cuda_config = find_cuda_config(repository_ctx, find_cuda_config_path, ["cuda"])
 | |
|     cuda_version = cuda_config["cuda_version"].split(".")
 | |
| 
 | |
|     if nccl_version == "":
 | |
|         # Alias to open source build from @nccl_archive.
 | |
|         repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
 | |
| 
 | |
|         repository_ctx.template(
 | |
|             "build_defs.bzl",
 | |
|             _label("build_defs.bzl.tpl"),
 | |
|             {"%{cuda_version}": "(%s, %s)" % tuple(cuda_version)},
 | |
|         )
 | |
|     else:
 | |
|         # Create target for locally installed NCCL.
 | |
|         config = find_cuda_config(repository_ctx, find_cuda_config_path, ["nccl"])
 | |
|         config_wrap = {
 | |
|             "%{nccl_version}": config["nccl_version"],
 | |
|             "%{nccl_header_dir}": config["nccl_include_dir"],
 | |
|             "%{nccl_library_dir}": config["nccl_library_dir"],
 | |
|         }
 | |
|         repository_ctx.template("BUILD", _label("system.BUILD.tpl"), config_wrap)
 | |
| 
 | |
| def _create_remote_nccl_repository(repository_ctx, remote_config_repo):
 | |
|     repository_ctx.template(
 | |
|         "BUILD",
 | |
|         config_repo_label(remote_config_repo, ":BUILD"),
 | |
|         {},
 | |
|     )
 | |
| 
 | |
|     nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "")
 | |
|     if nccl_version == "":
 | |
|         repository_ctx.template(
 | |
|             "build_defs.bzl",
 | |
|             config_repo_label(remote_config_repo, ":build_defs.bzl"),
 | |
|             {},
 | |
|         )
 | |
| 
 | |
| def _nccl_autoconf_impl(repository_ctx):
 | |
|     if (not enable_cuda(repository_ctx) or
 | |
|         get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD")):
 | |
|         # Add a dummy build file to make bazel query happy.
 | |
|         repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
 | |
|     elif get_host_environ(repository_ctx, "TF_NCCL_CONFIG_REPO") != None:
 | |
|         _create_remote_nccl_repository(repository_ctx, get_host_environ(repository_ctx, "TF_NCCL_CONFIG_REPO"))
 | |
|     else:
 | |
|         _create_local_nccl_repository(repository_ctx)
 | |
| 
 | |
| _ENVIRONS = [
 | |
|     _CUDA_TOOLKIT_PATH,
 | |
|     _NCCL_HDR_PATH,
 | |
|     _NCCL_INSTALL_PATH,
 | |
|     _TF_NCCL_VERSION,
 | |
|     _TF_CUDA_COMPUTE_CAPABILITIES,
 | |
|     _TF_NEED_CUDA,
 | |
|     "TF_CUDA_PATHS",
 | |
| ]
 | |
| 
 | |
| remote_nccl_configure = repository_rule(
 | |
|     implementation = _create_local_nccl_repository,
 | |
|     environ = _ENVIRONS,
 | |
|     remotable = True,
 | |
|     attrs = {
 | |
|         "environ": attr.string_dict(),
 | |
|     },
 | |
| )
 | |
| 
 | |
| nccl_configure = repository_rule(
 | |
|     implementation = _nccl_autoconf_impl,
 | |
|     environ = _ENVIRONS,
 | |
| )
 | |
| """Detects and configures the NCCL configuration.
 | |
| 
 | |
| Add the following to your WORKSPACE FILE:
 | |
| 
 | |
| ```python
 | |
| nccl_configure(name = "local_config_nccl")
 | |
| ```
 | |
| 
 | |
| Args:
 | |
|   name: A unique name for this workspace rule.
 | |
| """
 |