[arvados] created: 2.1.0-3154-g044654c3f
git repository hosting
git at public.arvados.org
Tue Dec 6 17:16:54 UTC 2022
at 044654c3fc20e8cd98cbea88f1681ee394347b8c (commit)
commit 044654c3fc20e8cd98cbea88f1681ee394347b8c
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Tue Dec 6 12:15:11 2022 -0500
19847: Add calculation for choosing keep disk cache size.
Add KeepCacheTypeRequirement to control which cache to use, to
facilitate performance comparison.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 550ecba1c..9135ff674 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -280,6 +280,7 @@ def add_arv_hints():
"http://commonwl.org/cwltool#CUDARequirement",
"http://arvados.org/cwl#UsePreemptible",
"http://arvados.org/cwl#OutputCollectionProperties",
+ "http://arvados.org/cwl#KeepCacheTypeRequirement",
])
def exit_signal_handler(sigcode, frame):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
index 54e0fc512..fc370eb81 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -420,3 +420,39 @@ $graph:
jsonldPredicate:
mapSubject: propertyName
mapPredicate: propertyValue
+
+
+- name: KeepCacheType
+ type: enum
+ symbols:
+ - ram_cache
+ - disk_cache
+ doc:
+ - |
+ ram_cache: Keep blocks will be cached in RAM only.
+ - |
+ disk_cache: Keep blocks will be cached to disk and
+ memory-mapped. The disk cache leverages the kernel's virtual
+ memory system so "hot" data will generally still be kept in
+ RAM.
+
+- name: KeepCacheTypeRequirement
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Choose keep cache strategy.
+ fields:
+ - name: class
+ type: string
+ doc: "'arv:KeepCacheTypeRequirement'"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ - name: keepCacheType
+ type: KeepCacheType?
+ doc: |
+ Whether Keep blocks loaded by arv-mount should be kept in RAM
+ only or written to disk and memory-mapped. The disk cache
+ leverages the kernel's virtual memory system so "hot" data will
+ generally still be kept in RAM.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
index b60d0ab1c..69c0ed6cf 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -363,3 +363,39 @@ $graph:
jsonldPredicate:
mapSubject: propertyName
mapPredicate: propertyValue
+
+
+- name: KeepCacheType
+ type: enum
+ symbols:
+ - ram_cache
+ - disk_cache
+ doc:
+ - |
+ ram_cache: Keep blocks will be cached in RAM only.
+ - |
+ disk_cache: Keep blocks will be cached to disk and
+ memory-mapped. The disk cache leverages the kernel's virtual
+ memory system so "hot" data will generally still be kept in
+ RAM.
+
+- name: KeepCacheTypeRequirement
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Choose keep cache strategy.
+ fields:
+ - name: class
+ type: string
+ doc: "'arv:KeepCacheTypeRequirement'"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ - name: keepCacheType
+ type: KeepCacheType?
+ doc: |
+ Whether Keep blocks loaded by arv-mount should be kept in RAM
+ only or written to disk and memory-mapped. The disk cache
+ leverages the kernel's virtual memory system so "hot" data will
+ generally still be kept in RAM.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index 2769244a5..86cd06eff 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -365,3 +365,39 @@ $graph:
jsonldPredicate:
mapSubject: propertyName
mapPredicate: propertyValue
+
+
+- name: KeepCacheType
+ type: enum
+ symbols:
+ - ram_cache
+ - disk_cache
+ doc:
+ - |
+ ram_cache: Keep blocks will be cached in RAM only.
+ - |
+ disk_cache: Keep blocks will be cached to disk and
+ memory-mapped. The disk cache leverages the kernel's virtual
+ memory system so "hot" data will generally still be kept in
+ RAM.
+
+- name: KeepCacheTypeRequirement
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Choose keep cache strategy.
+ fields:
+ - name: class
+ type: string
+ doc: "'arv:KeepCacheTypeRequirement'"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ - name: keepCacheType
+ type: KeepCacheType?
+ doc: |
+ Whether Keep blocks loaded by arv-mount should be kept in RAM
+ only or written to disk and memory-mapped. The disk cache
+ leverages the kernel's virtual memory system so "hot" data will
+ generally still be kept in RAM.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 6fcf366e0..fde9db384 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -264,10 +264,15 @@ class ArvadosContainer(JobBase):
if api_req:
runtime_constraints["API"] = True
+ use_disk_cache = (self.arvrunner.api.config()["Containers"].get("DefaultKeepCacheDisk", 0) > 0)
+
runtime_req, _ = self.get_requirement("http://arvados.org/cwl#RuntimeConstraints")
if runtime_req:
+ if "keepCacheType" in runtime_req:
+ if cache_type == "ram_cache":
+ use_disk_cache = False
if "keep_cache" in runtime_req:
- if self.arvrunner.api.config()["Containers"].get("DefaultKeepCacheDisk", 0) > 0:
+ if use_disk_cache:
# If DefaultKeepCacheDisk is non-zero it means we should use disk cache.
runtime_constraints["keep_cache_disk"] = math.ceil(runtime_req["keep_cache"] * 2**20)
else:
@@ -282,6 +287,13 @@ class ArvadosContainer(JobBase):
"writable": True
}
+ if use_disk_cache and "keep_cache_disk" not in runtime_constraints:
+ # Cache size wasn't explicitly set so calculate a default
+ # based on 2x RAM request or 1 GB per core, whichever is
+ # smaller. This is to avoid requesting 100s of GB of disk
+ # cache when requesting a node with a huge amount of RAM.
+ runtime_constraints["keep_cache_disk"] = min(runtime_constraints["ram"] * 2, runtime_constraints["vcpus"] * (1024*1024*1024))
+
partition_req, _ = self.get_requirement("http://arvados.org/cwl#PartitionRequirement")
if partition_req:
scheduling_parameters["partitions"] = aslist(partition_req["partition"])
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list