2 core::fmt::{self, Debug},
\r
3 gpu_alloc_types::{MemoryPropertyFlags, MemoryType},
\r
6 bitflags::bitflags! {
\r
7 /// Memory usage type.
\r
8 /// Bits set define intended usage for requested memory.
\r
9 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
\r
10 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
\r
11 pub struct UsageFlags: u8 {
\r
12 /// Hints for allocator to find memory with faster device access.
\r
13 /// If no flags is specified than `FAST_DEVICE_ACCESS` is implied.
\r
14 const FAST_DEVICE_ACCESS = 0x01;
\r
16 /// Memory will be accessed from host.
\r
17 /// This flags guarantees that host memory operations will be available.
\r
18 /// Otherwise implementation is encouraged to use non-host-accessible memory.
\r
19 const HOST_ACCESS = 0x02;
\r
21 /// Hints allocator that memory will be used for data downloading.
\r
22 /// Allocator will strongly prefer host-cached memory.
\r
23 /// Implies `HOST_ACCESS` flag.
\r
24 const DOWNLOAD = 0x04;
\r
26 /// Hints allocator that memory will be used for data uploading.
\r
27 /// If `DOWNLOAD` flag is not set then allocator will assume that
\r
28 /// host will access memory in write-only manner and may
\r
29 /// pick not host-cached.
\r
30 /// Implies `HOST_ACCESS` flag.
\r
31 const UPLOAD = 0x08;
\r
33 /// Hints allocator that memory will be used for short duration
\r
34 /// allowing to use faster algorithm with less memory overhead.
\r
35 /// If use holds returned memory block for too long then
\r
36 /// effective memory overhead increases instead.
\r
37 /// Best use case is for staging buffer for single batch of operations.
\r
38 const TRANSIENT = 0x10;
\r
40 /// Requests memory that can be addressed with `u64`.
\r
41 /// Allows fetching device address for resources bound to that memory.
\r
42 const DEVICE_ADDRESS = 0x20;
\r
46 #[derive(Clone, Copy, Debug)]
\r
47 struct MemoryForOneUsage {
\r
53 pub(crate) struct MemoryForUsage {
\r
54 usages: [MemoryForOneUsage; 64],
\r
57 impl Debug for MemoryForUsage {
\r
58 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
\r
59 fmt.debug_struct("MemoryForUsage")
\r
60 .field("usages", &&self.usages[..])
\r
65 impl MemoryForUsage {
\r
66 pub fn new(memory_types: &[MemoryType]) -> Self {
\r
68 memory_types.len() <= 32,
\r
69 "Only up to 32 memory types supported"
\r
72 let mut mfu = MemoryForUsage {
\r
73 usages: [MemoryForOneUsage {
\r
80 for usage in 0..64 {
\r
81 mfu.usages[usage as usize] =
\r
82 one_usage(UsageFlags::from_bits_truncate(usage), memory_types);
\r
88 /// Returns mask with bits set for memory type indices that support the
\r
90 pub fn mask(&self, usage: UsageFlags) -> u32 {
\r
91 self.usages[usage.bits() as usize].mask
\r
94 /// Returns slice of memory type indices that support the usage.
\r
95 /// Earlier memory type has priority over later.
\r
96 pub fn types(&self, usage: UsageFlags) -> &[u32] {
\r
97 let usage = &self.usages[usage.bits() as usize];
\r
98 &usage.types[..usage.types_count as usize]
\r
102 fn one_usage(usage: UsageFlags, memory_types: &[MemoryType]) -> MemoryForOneUsage {
\r
103 let mut types = [0; 32];
\r
104 let mut types_count = 0;
\r
106 for (index, mt) in memory_types.iter().enumerate() {
\r
107 if compatible(usage, mt.props) {
\r
108 types[types_count as usize] = index as u32;
\r
113 types[..types_count as usize]
\r
114 .sort_unstable_by_key(|&index| reverse_priority(usage, memory_types[index as usize].props));
\r
116 let mask = types[..types_count as usize]
\r
118 .fold(0u32, |mask, index| mask | 1u32 << index);
\r
120 MemoryForOneUsage {
\r
127 fn compatible(usage: UsageFlags, flags: MemoryPropertyFlags) -> bool {
\r
128 type Flags = MemoryPropertyFlags;
\r
129 if flags.contains(Flags::LAZILY_ALLOCATED) || flags.contains(Flags::PROTECTED) {
\r
132 } else if usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
\r
134 // Requires HOST_VISIBLE
\r
135 flags.contains(Flags::HOST_VISIBLE)
\r
141 /// Returns reversed priority of memory with specified flags for specified usage.
\r
142 /// Lesser value returned = more prioritized.
\r
143 fn reverse_priority(usage: UsageFlags, flags: MemoryPropertyFlags) -> u32 {
\r
144 type Flags = MemoryPropertyFlags;
\r
146 // Highly prefer device local memory when `FAST_DEVICE_ACCESS` usage is specified
\r
147 // or usage is empty.
\r
148 let device_local: bool = flags.contains(Flags::DEVICE_LOCAL)
\r
149 ^ (usage.is_empty() || usage.contains(UsageFlags::FAST_DEVICE_ACCESS));
\r
152 flags.contains(Flags::HOST_VISIBLE)
\r
154 .intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
\r
157 // Prefer non-host-visible memory when host access is not required.
\r
158 let host_visible: bool = flags.contains(Flags::HOST_VISIBLE)
\r
159 ^ usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD);
\r
161 // Prefer cached memory for downloads.
\r
162 // Or non-cached if downloads are not expected.
\r
163 let host_cached: bool =
\r
164 flags.contains(Flags::HOST_CACHED) ^ usage.contains(UsageFlags::DOWNLOAD);
\r
166 // Prefer coherent for both uploads and downloads.
\r
167 // Prefer non-coherent if neither flags is set.
\r
168 let host_coherent: bool = flags.contains(Flags::HOST_COHERENT)
\r
169 ^ (usage.intersects(UsageFlags::UPLOAD | UsageFlags::DOWNLOAD));
\r
171 // Each boolean is false if flags are preferred.
\r
172 device_local as u32 * 8
\r
173 + host_visible as u32 * 4
\r
174 + host_cached as u32 * 2
\r
175 + host_coherent as u32
\r