It is tricky but possible.
Problem
Firstly, it is not possible to programmatically set both the growth control and memory limit as below. Because memory growth control does not take effect.
def set_both_growth_and_limit(
memory_limit: Optional[int] = None,
):
gpus: List[tf.config.PhysicalDevice] = tf.config.list_physical_devices('GPU')
if not gpus:
return
_current: tf.config.PhysicalDevice = None
try:
for index, gpu in enumerate(gpus):
_current = gpu
# Set memory growth control
# Currently, memory growth needs to be the same across GPUs
print(f"setting memory_growth: index:[{index}] gpu:{gpu}")
tf.config.experimental.set_memory_growth(gpu, True)
# Set memory limit
print(f"setting memory_limit: index:[{index}] gpu:{gpu}")
tf.config.set_logical_device_configuration(
device=gpu,
logical_devices=[
tf.config.LogicalDeviceConfiguration(memory_limit=memory_limit)
]
)
logical_gpus = tf.config.list_logical_devices('GPU')
except RuntimeError as err:
print(f"Memory growth must be set before GPU [{_current}] have been initialized")
raise err
except ValueError as err:
print(f"Invalid GPU device [{_current}]")
raise err
set_both_growth_and_limit(1024)
setting memory_growth: index:[0] gpu:PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
setting memory_limit: index:[0] gpu:PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
Before creating a Tensor, check the GPU usage.
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| 0 N/A N/A 405738 C /home/user/venv/ml/bin/python3 80MiB |
+---------------------------------------------------------------------------------------+
Create a Tensor. The memory growth control should prevent allocating GPU memory up to the limit 1G but it will not.
x = tf.random.uniform([3, 3])
print("Is the Tensor on GPU #0: "),
print(x.device.endswith('GPU:0'))
Check the GPU usage. The memory growth control did not take effect and memory has been allocated up to the limit.
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| 0 N/A N/A 405738 C /home/user/venv/ml/bin/python3 -----> 1106MiB |
+---------------------------------------------------------------------------------------+
Setting memory growth control and memory limit respectively does not work, either.
from typing import (
Optional,
List
)
import tensorflow as tf
def list_logical_devices_both_set_control_and_limit(
memory_limit: Optional[int] = None,
):
gpus: List[tf.config.PhysicalDevice] = tf.config.list_physical_devices('GPU')
if not gpus:
return
_current: tf.config.PhysicalDevice = None
try:
for index, gpu in enumerate(gpus):
_current = gpu
# Set memory growth control
# Currently, memory growth needs to be the same across GPUs
print(f"setting memory_growth: index:[{index}] gpu:{gpu}")
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.list_logical_devices('GPU')
# Set memory limit
# Calling list_logical_devices above should prevent further configuration
# here when calling set_logical_device_configuration.
print(f"setting memory_limit: index:[{index}] gpu:{gpu}")
tf.config.set_logical_device_configuration(
device=gpu,
logical_devices=[
tf.config.LogicalDeviceConfiguration(memory_limit=memory_limit)
]
)
logical_gpus = tf.config.list_logical_devices('GPU')
except RuntimeError as err:
print(f"Memory growth must be set before GPU [{_current}] have been initialized")
raise err
except ValueError as err:
print(f"Invalid GPU device [{_current}]")
raise err
list_logical_devices_both_set_control_and_limit(1024)
-----
setting memory_growth: index:[0] gpu:PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
setting memory_limit: index:[0] gpu:PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
Memory growth must be set before GPU [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')] have been initialized
Solution
Hence, use the environment variable TF_FORCE_GPU_ALLOW_GROWTH=true
for the memory growth control, and use set_logical_device_configuration
for memory limit.
Set TF_FORCE_GPU_ALLOW_GROWTH
shell
$ export TF_FORCE_GPU_ALLOW_GROWTH=true
$ echo ${TF_FORCE_GPU_ALLOW_GROWTH}
-----
true
Or
Jupyter notebook
%env TF_FORCE_GPU_ALLOW_GROWTH=true
%env TF_FORCE_GPU_ALLOW_GROWTH
-----
env: TF_FORCE_GPU_ALLOW_GROWTH=true
Set memory limit
Make sure the TF_FORCE_GPU_ALLOW_GROWTH has been set.
import os
print(os.environ['TF_FORCE_GPU_ALLOW_GROWTH'])
-----
true
Then call set_logical_device_configuration
to set memory limit.
from typing import (
Optional,
List
)
import tensorflow as tf
def set_memory_limit(
memory_limit: Optional[int] = None,
):
gpus: List[tf.config.PhysicalDevice] = tf.config.list_physical_devices('GPU')
if not gpus:
return
_current: tf.config.PhysicalDevice = None
try:
for index, gpu in enumerate(gpus):
_current = gpu
# Set memory limit
print(f"setting memory_limit: index:[{index}] gpu:{gpu}")
tf.config.set_logical_device_configuration(
device=gpu,
logical_devices=[
tf.config.LogicalDeviceConfiguration(memory_limit=memory_limit)
]
)
logical_gpus = tf.config.list_logical_devices('GPU')
except RuntimeError as err:
print(f"Memory growth must be set before GPU [{_current}] have been initialized")
raise err
except ValueError as err:
print(f"Invalid GPU device [{_current}]")
raise err
Verify the growth control and memory limit
Create a tensor. The memory growth should control should prevent allocating GPU memory up to the limit 1G.
set_memory_limit(1024)
x = tf.random.uniform([3, 3])
print("Is the Tensor on GPU #0: "),
print(x.device.endswith('GPU:0'))
-----
Is the Tensor on GPU #0:
True
Check GPU and the memory growth has taken effect.
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| 0 N/A N/A 563664 C /home/user/venv/ml/bin/python3 ---> 84MiB |
+---------------------------------------------------------------------------------------+
Create a tensor with more than 1G limit (e.g. 2GB). It fails with ran out of memory.
import numpy as np
GIGA = tf.pow(1024, 3)
x = tf.ones(shape=(GIGA, tf.int8.size, 2), dtype=tf.int8)
-----
2023-11-25 22:08:51.910042: W tensorflow/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.00GiB (rounded to 2147483648)requested by op Fill
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation.
Create 500MB tensor which is less than the limit.
import numpy as np
MEGA = tf.pow(1024, 2)
x = tf.ones(shape=(MEGA, tf.int8.size, 500), dtype=tf.int8)
tf.shape(x)
-----
<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1048576, 1, 500], dtype=int32)>
Check GPU.
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| 0 N/A N/A 563664 C /home/user/venv/ml/bin/python3 ---> 596MiB |
+---------------------------------------------------------------------------------------+
References
To only allocate a subset of the available memory, or to only grow the memory usage as is needed by the process. TensorFlow provides two methods to control this.
The first option is to turn on memory growth by calling tf.config.experimental.set_memory_growth, which attempts to allocate only as much GPU memory as needed for the runtime allocations: it starts out allocating very little memory, and as the program gets run and more GPU memory is needed, the GPU memory region is extended for the TensorFlow process. Memory is not released since it can lead to memory fragmentation. Use the tf.config.set_visible_devices method. Another way to enable this option is to set the environmental variable TF_FORCE_GPU_ALLOW_GROWTH
to true. This configuration is platform specific.
gpus = tf.config.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
Configure a virtual GPU device with tf.config.set_logical_device_configuration and set a hard limit on the total memory to allocate on the GPU.
gpus = tf.config.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only allocate 1GB of memory on the first GPU
try:
tf.config.set_logical_device_configuration(
gpus[0],
[tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Virtual devices must be set before GPUs have been initialized
print(e)
If memory growth is enabled for a PhysicalDevice, the runtime initialization will not allocate all memory on the device.
Set the logical device configuration for a tf.config.PhysicalDevice. A visible tf.config.PhysicalDevice will by default have a single tf.config.LogicalDevice associated with it once the runtime is initialized. Specifying a list of tf.config.LogicalDeviceConfiguration objects allows multiple devices to be created on the same tf.config.PhysicalDevice.
Logical device configurations can be modified by calling this function as long as the runtime is uninitialized. After the runtime is initialized calling this function raises a RuntimeError.
Return a list of logical devices created by runtime. Calling tf.config.list_logical_devices triggers the runtime to configure any tf.config.PhysicalDevice visible to the runtime, thereby preventing further configuration.