| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * CPU-agnostic AMD IO page table allocator. | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) 2020 Advanced Micro Devices, Inc. | 
|---|
| 6 | * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | #define pr_fmt(fmt)     "AMD-Vi: " fmt | 
|---|
| 10 | #define dev_fmt(fmt)    pr_fmt(fmt) | 
|---|
| 11 |  | 
|---|
| 12 | #include <linux/atomic.h> | 
|---|
| 13 | #include <linux/bitops.h> | 
|---|
| 14 | #include <linux/io-pgtable.h> | 
|---|
| 15 | #include <linux/kernel.h> | 
|---|
| 16 | #include <linux/sizes.h> | 
|---|
| 17 | #include <linux/slab.h> | 
|---|
| 18 | #include <linux/types.h> | 
|---|
| 19 | #include <linux/dma-mapping.h> | 
|---|
| 20 | #include <linux/seqlock.h> | 
|---|
| 21 |  | 
|---|
| 22 | #include <asm/barrier.h> | 
|---|
| 23 |  | 
|---|
| 24 | #include "amd_iommu_types.h" | 
|---|
| 25 | #include "amd_iommu.h" | 
|---|
| 26 | #include "../iommu-pages.h" | 
|---|
| 27 |  | 
|---|
| 28 | /* | 
|---|
| 29 | * Helper function to get the first pte of a large mapping | 
|---|
| 30 | */ | 
|---|
| 31 | static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, | 
|---|
| 32 | unsigned long *count) | 
|---|
| 33 | { | 
|---|
| 34 | unsigned long pte_mask, pg_size, cnt; | 
|---|
| 35 | u64 *fpte; | 
|---|
| 36 |  | 
|---|
| 37 | pg_size  = PTE_PAGE_SIZE(*pte); | 
|---|
| 38 | cnt      = PAGE_SIZE_PTE_COUNT(pg_size); | 
|---|
| 39 | pte_mask = ~((cnt << 3) - 1); | 
|---|
| 40 | fpte     = (u64 *)(((unsigned long)pte) & pte_mask); | 
|---|
| 41 |  | 
|---|
| 42 | if (page_size) | 
|---|
| 43 | *page_size = pg_size; | 
|---|
| 44 |  | 
|---|
| 45 | if (count) | 
|---|
| 46 | *count = cnt; | 
|---|
| 47 |  | 
|---|
| 48 | return fpte; | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl) | 
|---|
| 52 | { | 
|---|
| 53 | u64 *p; | 
|---|
| 54 | int i; | 
|---|
| 55 |  | 
|---|
| 56 | for (i = 0; i < 512; ++i) { | 
|---|
| 57 | /* PTE present? */ | 
|---|
| 58 | if (!IOMMU_PTE_PRESENT(pt[i])) | 
|---|
| 59 | continue; | 
|---|
| 60 |  | 
|---|
| 61 | /* Large PTE? */ | 
|---|
| 62 | if (PM_PTE_LEVEL(pt[i]) == 0 || | 
|---|
| 63 | PM_PTE_LEVEL(pt[i]) == 7) | 
|---|
| 64 | continue; | 
|---|
| 65 |  | 
|---|
| 66 | /* | 
|---|
| 67 | * Free the next level. No need to look at l1 tables here since | 
|---|
| 68 | * they can only contain leaf PTEs; just free them directly. | 
|---|
| 69 | */ | 
|---|
| 70 | p = IOMMU_PTE_PAGE(pt[i]); | 
|---|
| 71 | if (lvl > 2) | 
|---|
| 72 | free_pt_lvl(pt: p, freelist, lvl: lvl - 1); | 
|---|
| 73 | else | 
|---|
| 74 | iommu_pages_list_add(list: freelist, virt: p); | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | iommu_pages_list_add(list: freelist, virt: pt); | 
|---|
| 78 | } | 
|---|
| 79 |  | 
|---|
| 80 | static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist) | 
|---|
| 81 | { | 
|---|
| 82 | switch (mode) { | 
|---|
| 83 | case PAGE_MODE_NONE: | 
|---|
| 84 | case PAGE_MODE_7_LEVEL: | 
|---|
| 85 | break; | 
|---|
| 86 | case PAGE_MODE_1_LEVEL: | 
|---|
| 87 | iommu_pages_list_add(list: freelist, virt: root); | 
|---|
| 88 | break; | 
|---|
| 89 | case PAGE_MODE_2_LEVEL: | 
|---|
| 90 | case PAGE_MODE_3_LEVEL: | 
|---|
| 91 | case PAGE_MODE_4_LEVEL: | 
|---|
| 92 | case PAGE_MODE_5_LEVEL: | 
|---|
| 93 | case PAGE_MODE_6_LEVEL: | 
|---|
| 94 | free_pt_lvl(pt: root, freelist, lvl: mode); | 
|---|
| 95 | break; | 
|---|
| 96 | default: | 
|---|
| 97 | BUG(); | 
|---|
| 98 | } | 
|---|
| 99 | } | 
|---|
| 100 |  | 
|---|
| 101 | /* | 
|---|
| 102 | * This function is used to add another level to an IO page table. Adding | 
|---|
| 103 | * another level increases the size of the address space by 9 bits to a size up | 
|---|
| 104 | * to 64 bits. | 
|---|
| 105 | */ | 
|---|
| 106 | static bool increase_address_space(struct amd_io_pgtable *pgtable, | 
|---|
| 107 | unsigned long address, | 
|---|
| 108 | unsigned int page_size_level, | 
|---|
| 109 | gfp_t gfp) | 
|---|
| 110 | { | 
|---|
| 111 | struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; | 
|---|
| 112 | struct protection_domain *domain = | 
|---|
| 113 | container_of(pgtable, struct protection_domain, iop); | 
|---|
| 114 | unsigned long flags; | 
|---|
| 115 | bool ret = true; | 
|---|
| 116 | u64 *pte; | 
|---|
| 117 |  | 
|---|
| 118 | pte = iommu_alloc_pages_node_sz(nid: cfg->amd.nid, gfp, SZ_4K); | 
|---|
| 119 | if (!pte) | 
|---|
| 120 | return false; | 
|---|
| 121 |  | 
|---|
| 122 | spin_lock_irqsave(&domain->lock, flags); | 
|---|
| 123 |  | 
|---|
| 124 | if (address <= PM_LEVEL_SIZE(pgtable->mode) && | 
|---|
| 125 | pgtable->mode - 1 >= page_size_level) | 
|---|
| 126 | goto out; | 
|---|
| 127 |  | 
|---|
| 128 | ret = false; | 
|---|
| 129 | if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level)) | 
|---|
| 130 | goto out; | 
|---|
| 131 |  | 
|---|
| 132 | *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root)); | 
|---|
| 133 |  | 
|---|
| 134 | write_seqcount_begin(&pgtable->seqcount); | 
|---|
| 135 | pgtable->root  = pte; | 
|---|
| 136 | pgtable->mode += 1; | 
|---|
| 137 | write_seqcount_end(&pgtable->seqcount); | 
|---|
| 138 |  | 
|---|
| 139 | amd_iommu_update_and_flush_device_table(domain); | 
|---|
| 140 |  | 
|---|
| 141 | pte = NULL; | 
|---|
| 142 | ret = true; | 
|---|
| 143 |  | 
|---|
| 144 | out: | 
|---|
| 145 | spin_unlock_irqrestore(lock: &domain->lock, flags); | 
|---|
| 146 | iommu_free_pages(virt: pte); | 
|---|
| 147 |  | 
|---|
| 148 | return ret; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | static u64 *alloc_pte(struct amd_io_pgtable *pgtable, | 
|---|
| 152 | unsigned long address, | 
|---|
| 153 | unsigned long page_size, | 
|---|
| 154 | u64 **pte_page, | 
|---|
| 155 | gfp_t gfp, | 
|---|
| 156 | bool *updated) | 
|---|
| 157 | { | 
|---|
| 158 | unsigned long last_addr = address + (page_size - 1); | 
|---|
| 159 | struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; | 
|---|
| 160 | unsigned int seqcount; | 
|---|
| 161 | int level, end_lvl; | 
|---|
| 162 | u64 *pte, *page; | 
|---|
| 163 |  | 
|---|
| 164 | BUG_ON(!is_power_of_2(page_size)); | 
|---|
| 165 |  | 
|---|
| 166 | while (last_addr > PM_LEVEL_SIZE(pgtable->mode) || | 
|---|
| 167 | pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) { | 
|---|
| 168 | /* | 
|---|
| 169 | * Return an error if there is no memory to update the | 
|---|
| 170 | * page-table. | 
|---|
| 171 | */ | 
|---|
| 172 | if (!increase_address_space(pgtable, address: last_addr, | 
|---|
| 173 | PAGE_SIZE_LEVEL(page_size), gfp)) | 
|---|
| 174 | return NULL; | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 |  | 
|---|
| 178 | do { | 
|---|
| 179 | seqcount = read_seqcount_begin(&pgtable->seqcount); | 
|---|
| 180 |  | 
|---|
| 181 | level   = pgtable->mode - 1; | 
|---|
| 182 | pte     = &pgtable->root[PM_LEVEL_INDEX(level, address)]; | 
|---|
| 183 | } while (read_seqcount_retry(&pgtable->seqcount, seqcount)); | 
|---|
| 184 |  | 
|---|
| 185 |  | 
|---|
| 186 | address = PAGE_SIZE_ALIGN(address, page_size); | 
|---|
| 187 | end_lvl = PAGE_SIZE_LEVEL(page_size); | 
|---|
| 188 |  | 
|---|
| 189 | while (level > end_lvl) { | 
|---|
| 190 | u64 __pte, __npte; | 
|---|
| 191 | int pte_level; | 
|---|
| 192 |  | 
|---|
| 193 | __pte     = *pte; | 
|---|
| 194 | pte_level = PM_PTE_LEVEL(__pte); | 
|---|
| 195 |  | 
|---|
| 196 | /* | 
|---|
| 197 | * If we replace a series of large PTEs, we need | 
|---|
| 198 | * to tear down all of them. | 
|---|
| 199 | */ | 
|---|
| 200 | if (IOMMU_PTE_PRESENT(__pte) && | 
|---|
| 201 | pte_level == PAGE_MODE_7_LEVEL) { | 
|---|
| 202 | unsigned long count, i; | 
|---|
| 203 | u64 *lpte; | 
|---|
| 204 |  | 
|---|
| 205 | lpte = first_pte_l7(pte, NULL, count: &count); | 
|---|
| 206 |  | 
|---|
| 207 | /* | 
|---|
| 208 | * Unmap the replicated PTEs that still match the | 
|---|
| 209 | * original large mapping | 
|---|
| 210 | */ | 
|---|
| 211 | for (i = 0; i < count; ++i) | 
|---|
| 212 | cmpxchg64(&lpte[i], __pte, 0ULL); | 
|---|
| 213 |  | 
|---|
| 214 | *updated = true; | 
|---|
| 215 | continue; | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | if (!IOMMU_PTE_PRESENT(__pte) || | 
|---|
| 219 | pte_level == PAGE_MODE_NONE) { | 
|---|
| 220 | page = iommu_alloc_pages_node_sz(nid: cfg->amd.nid, gfp, | 
|---|
| 221 | SZ_4K); | 
|---|
| 222 |  | 
|---|
| 223 | if (!page) | 
|---|
| 224 | return NULL; | 
|---|
| 225 |  | 
|---|
| 226 | __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); | 
|---|
| 227 |  | 
|---|
| 228 | /* pte could have been changed somewhere. */ | 
|---|
| 229 | if (!try_cmpxchg64(pte, &__pte, __npte)) | 
|---|
| 230 | iommu_free_pages(virt: page); | 
|---|
| 231 | else if (IOMMU_PTE_PRESENT(__pte)) | 
|---|
| 232 | *updated = true; | 
|---|
| 233 |  | 
|---|
| 234 | continue; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | /* No level skipping support yet */ | 
|---|
| 238 | if (pte_level != level) | 
|---|
| 239 | return NULL; | 
|---|
| 240 |  | 
|---|
| 241 | level -= 1; | 
|---|
| 242 |  | 
|---|
| 243 | pte = IOMMU_PTE_PAGE(__pte); | 
|---|
| 244 |  | 
|---|
| 245 | if (pte_page && level == end_lvl) | 
|---|
| 246 | *pte_page = pte; | 
|---|
| 247 |  | 
|---|
| 248 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | 
|---|
| 249 | } | 
|---|
| 250 |  | 
|---|
| 251 | return pte; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | /* | 
|---|
| 255 | * This function checks if there is a PTE for a given dma address. If | 
|---|
| 256 | * there is one, it returns the pointer to it. | 
|---|
| 257 | */ | 
|---|
| 258 | static u64 *fetch_pte(struct amd_io_pgtable *pgtable, | 
|---|
| 259 | unsigned long address, | 
|---|
| 260 | unsigned long *page_size) | 
|---|
| 261 | { | 
|---|
| 262 | int level; | 
|---|
| 263 | unsigned int seqcount; | 
|---|
| 264 | u64 *pte; | 
|---|
| 265 |  | 
|---|
| 266 | *page_size = 0; | 
|---|
| 267 |  | 
|---|
| 268 | if (address > PM_LEVEL_SIZE(pgtable->mode)) | 
|---|
| 269 | return NULL; | 
|---|
| 270 |  | 
|---|
| 271 | do { | 
|---|
| 272 | seqcount = read_seqcount_begin(&pgtable->seqcount); | 
|---|
| 273 | level	   =  pgtable->mode - 1; | 
|---|
| 274 | pte	   = &pgtable->root[PM_LEVEL_INDEX(level, address)]; | 
|---|
| 275 | } while (read_seqcount_retry(&pgtable->seqcount, seqcount)); | 
|---|
| 276 |  | 
|---|
| 277 | *page_size =  PTE_LEVEL_PAGE_SIZE(level); | 
|---|
| 278 |  | 
|---|
| 279 | while (level > 0) { | 
|---|
| 280 |  | 
|---|
| 281 | /* Not Present */ | 
|---|
| 282 | if (!IOMMU_PTE_PRESENT(*pte)) | 
|---|
| 283 | return NULL; | 
|---|
| 284 |  | 
|---|
| 285 | /* Large PTE */ | 
|---|
| 286 | if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL || | 
|---|
| 287 | PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE) | 
|---|
| 288 | break; | 
|---|
| 289 |  | 
|---|
| 290 | /* No level skipping support yet */ | 
|---|
| 291 | if (PM_PTE_LEVEL(*pte) != level) | 
|---|
| 292 | return NULL; | 
|---|
| 293 |  | 
|---|
| 294 | level -= 1; | 
|---|
| 295 |  | 
|---|
| 296 | /* Walk to the next level */ | 
|---|
| 297 | pte	   = IOMMU_PTE_PAGE(*pte); | 
|---|
| 298 | pte	   = &pte[PM_LEVEL_INDEX(level, address)]; | 
|---|
| 299 | *page_size = PTE_LEVEL_PAGE_SIZE(level); | 
|---|
| 300 | } | 
|---|
| 301 |  | 
|---|
| 302 | /* | 
|---|
| 303 | * If we have a series of large PTEs, make | 
|---|
| 304 | * sure to return a pointer to the first one. | 
|---|
| 305 | */ | 
|---|
| 306 | if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) | 
|---|
| 307 | pte = first_pte_l7(pte, page_size, NULL); | 
|---|
| 308 |  | 
|---|
| 309 | return pte; | 
|---|
| 310 | } | 
|---|
| 311 |  | 
|---|
| 312 | static void free_clear_pte(u64 *pte, u64 pteval, | 
|---|
| 313 | struct iommu_pages_list *freelist) | 
|---|
| 314 | { | 
|---|
| 315 | u64 *pt; | 
|---|
| 316 | int mode; | 
|---|
| 317 |  | 
|---|
| 318 | while (!try_cmpxchg64(pte, &pteval, 0)) | 
|---|
| 319 | pr_warn( "AMD-Vi: IOMMU pte changed since we read it\n"); | 
|---|
| 320 |  | 
|---|
| 321 | if (!IOMMU_PTE_PRESENT(pteval)) | 
|---|
| 322 | return; | 
|---|
| 323 |  | 
|---|
| 324 | pt   = IOMMU_PTE_PAGE(pteval); | 
|---|
| 325 | mode = IOMMU_PTE_MODE(pteval); | 
|---|
| 326 |  | 
|---|
| 327 | free_sub_pt(root: pt, mode, freelist); | 
|---|
| 328 | } | 
|---|
| 329 |  | 
|---|
| 330 | /* | 
|---|
| 331 | * Generic mapping functions. It maps a physical address into a DMA | 
|---|
| 332 | * address space. It allocates the page table pages if necessary. | 
|---|
| 333 | * In the future it can be extended to a generic mapping function | 
|---|
| 334 | * supporting all features of AMD IOMMU page tables like level skipping | 
|---|
| 335 | * and full 64 bit address spaces. | 
|---|
| 336 | */ | 
|---|
| 337 | static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, | 
|---|
| 338 | phys_addr_t paddr, size_t pgsize, size_t pgcount, | 
|---|
| 339 | int prot, gfp_t gfp, size_t *mapped) | 
|---|
| 340 | { | 
|---|
| 341 | struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); | 
|---|
| 342 | struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); | 
|---|
| 343 | bool updated = false; | 
|---|
| 344 | u64 __pte, *pte; | 
|---|
| 345 | int ret, i, count; | 
|---|
| 346 | size_t size = pgcount << __ffs(pgsize); | 
|---|
| 347 | unsigned long o_iova = iova; | 
|---|
| 348 |  | 
|---|
| 349 | BUG_ON(!IS_ALIGNED(iova, pgsize)); | 
|---|
| 350 | BUG_ON(!IS_ALIGNED(paddr, pgsize)); | 
|---|
| 351 |  | 
|---|
| 352 | ret = -EINVAL; | 
|---|
| 353 | if (!(prot & IOMMU_PROT_MASK)) | 
|---|
| 354 | goto out; | 
|---|
| 355 |  | 
|---|
| 356 | while (pgcount > 0) { | 
|---|
| 357 | count = PAGE_SIZE_PTE_COUNT(pgsize); | 
|---|
| 358 | pte   = alloc_pte(pgtable, address: iova, page_size: pgsize, NULL, gfp, updated: &updated); | 
|---|
| 359 |  | 
|---|
| 360 | ret = -ENOMEM; | 
|---|
| 361 | if (!pte) | 
|---|
| 362 | goto out; | 
|---|
| 363 |  | 
|---|
| 364 | for (i = 0; i < count; ++i) | 
|---|
| 365 | free_clear_pte(pte: &pte[i], pteval: pte[i], freelist: &freelist); | 
|---|
| 366 |  | 
|---|
| 367 | if (!iommu_pages_list_empty(list: &freelist)) | 
|---|
| 368 | updated = true; | 
|---|
| 369 |  | 
|---|
| 370 | if (count > 1) { | 
|---|
| 371 | __pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize); | 
|---|
| 372 | __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; | 
|---|
| 373 | } else | 
|---|
| 374 | __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; | 
|---|
| 375 |  | 
|---|
| 376 | if (prot & IOMMU_PROT_IR) | 
|---|
| 377 | __pte |= IOMMU_PTE_IR; | 
|---|
| 378 | if (prot & IOMMU_PROT_IW) | 
|---|
| 379 | __pte |= IOMMU_PTE_IW; | 
|---|
| 380 |  | 
|---|
| 381 | for (i = 0; i < count; ++i) | 
|---|
| 382 | pte[i] = __pte; | 
|---|
| 383 |  | 
|---|
| 384 | iova  += pgsize; | 
|---|
| 385 | paddr += pgsize; | 
|---|
| 386 | pgcount--; | 
|---|
| 387 | if (mapped) | 
|---|
| 388 | *mapped += pgsize; | 
|---|
| 389 | } | 
|---|
| 390 |  | 
|---|
| 391 | ret = 0; | 
|---|
| 392 |  | 
|---|
| 393 | out: | 
|---|
| 394 | if (updated) { | 
|---|
| 395 | struct protection_domain *dom = io_pgtable_ops_to_domain(ops); | 
|---|
| 396 | unsigned long flags; | 
|---|
| 397 |  | 
|---|
| 398 | spin_lock_irqsave(&dom->lock, flags); | 
|---|
| 399 | /* | 
|---|
| 400 | * Flush domain TLB(s) and wait for completion. Any Device-Table | 
|---|
| 401 | * Updates and flushing already happened in | 
|---|
| 402 | * increase_address_space(). | 
|---|
| 403 | */ | 
|---|
| 404 | amd_iommu_domain_flush_pages(domain: dom, address: o_iova, size); | 
|---|
| 405 | spin_unlock_irqrestore(lock: &dom->lock, flags); | 
|---|
| 406 | } | 
|---|
| 407 |  | 
|---|
| 408 | /* Everything flushed out, free pages now */ | 
|---|
| 409 | iommu_put_pages_list(list: &freelist); | 
|---|
| 410 |  | 
|---|
| 411 | return ret; | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops, | 
|---|
| 415 | unsigned long iova, | 
|---|
| 416 | size_t pgsize, size_t pgcount, | 
|---|
| 417 | struct iommu_iotlb_gather *gather) | 
|---|
| 418 | { | 
|---|
| 419 | struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); | 
|---|
| 420 | unsigned long long unmapped; | 
|---|
| 421 | unsigned long unmap_size; | 
|---|
| 422 | u64 *pte; | 
|---|
| 423 | size_t size = pgcount << __ffs(pgsize); | 
|---|
| 424 |  | 
|---|
| 425 | BUG_ON(!is_power_of_2(pgsize)); | 
|---|
| 426 |  | 
|---|
| 427 | unmapped = 0; | 
|---|
| 428 |  | 
|---|
| 429 | while (unmapped < size) { | 
|---|
| 430 | pte = fetch_pte(pgtable, address: iova, page_size: &unmap_size); | 
|---|
| 431 | if (pte) { | 
|---|
| 432 | int i, count; | 
|---|
| 433 |  | 
|---|
| 434 | count = PAGE_SIZE_PTE_COUNT(unmap_size); | 
|---|
| 435 | for (i = 0; i < count; i++) | 
|---|
| 436 | pte[i] = 0ULL; | 
|---|
| 437 | } else { | 
|---|
| 438 | return unmapped; | 
|---|
| 439 | } | 
|---|
| 440 |  | 
|---|
| 441 | iova = (iova & ~(unmap_size - 1)) + unmap_size; | 
|---|
| 442 | unmapped += unmap_size; | 
|---|
| 443 | } | 
|---|
| 444 |  | 
|---|
| 445 | return unmapped; | 
|---|
| 446 | } | 
|---|
| 447 |  | 
|---|
| 448 | static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) | 
|---|
| 449 | { | 
|---|
| 450 | struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); | 
|---|
| 451 | unsigned long offset_mask, pte_pgsize; | 
|---|
| 452 | u64 *pte, __pte; | 
|---|
| 453 |  | 
|---|
| 454 | pte = fetch_pte(pgtable, address: iova, page_size: &pte_pgsize); | 
|---|
| 455 |  | 
|---|
| 456 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 
|---|
| 457 | return 0; | 
|---|
| 458 |  | 
|---|
| 459 | offset_mask = pte_pgsize - 1; | 
|---|
| 460 | __pte	    = __sme_clr(*pte & PM_ADDR_MASK); | 
|---|
| 461 |  | 
|---|
| 462 | return (__pte & ~offset_mask) | (iova & offset_mask); | 
|---|
| 463 | } | 
|---|
| 464 |  | 
|---|
| 465 | static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size, | 
|---|
| 466 | unsigned long flags) | 
|---|
| 467 | { | 
|---|
| 468 | bool test_only = flags & IOMMU_DIRTY_NO_CLEAR; | 
|---|
| 469 | bool dirty = false; | 
|---|
| 470 | int i, count; | 
|---|
| 471 |  | 
|---|
| 472 | /* | 
|---|
| 473 | * 2.2.3.2 Host Dirty Support | 
|---|
| 474 | * When a non-default page size is used , software must OR the | 
|---|
| 475 | * Dirty bits in all of the replicated host PTEs used to map | 
|---|
| 476 | * the page. The IOMMU does not guarantee the Dirty bits are | 
|---|
| 477 | * set in all of the replicated PTEs. Any portion of the page | 
|---|
| 478 | * may have been written even if the Dirty bit is set in only | 
|---|
| 479 | * one of the replicated PTEs. | 
|---|
| 480 | */ | 
|---|
| 481 | count = PAGE_SIZE_PTE_COUNT(size); | 
|---|
| 482 | for (i = 0; i < count && test_only; i++) { | 
|---|
| 483 | if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) { | 
|---|
| 484 | dirty = true; | 
|---|
| 485 | break; | 
|---|
| 486 | } | 
|---|
| 487 | } | 
|---|
| 488 |  | 
|---|
| 489 | for (i = 0; i < count && !test_only; i++) { | 
|---|
| 490 | if (test_and_clear_bit(IOMMU_PTE_HD_BIT, | 
|---|
| 491 | addr: (unsigned long *)&ptep[i])) { | 
|---|
| 492 | dirty = true; | 
|---|
| 493 | } | 
|---|
| 494 | } | 
|---|
| 495 |  | 
|---|
| 496 | return dirty; | 
|---|
| 497 | } | 
|---|
| 498 |  | 
|---|
| 499 | static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, | 
|---|
| 500 | unsigned long iova, size_t size, | 
|---|
| 501 | unsigned long flags, | 
|---|
| 502 | struct iommu_dirty_bitmap *dirty) | 
|---|
| 503 | { | 
|---|
| 504 | struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); | 
|---|
| 505 | unsigned long end = iova + size - 1; | 
|---|
| 506 |  | 
|---|
| 507 | do { | 
|---|
| 508 | unsigned long pgsize = 0; | 
|---|
| 509 | u64 *ptep, pte; | 
|---|
| 510 |  | 
|---|
| 511 | ptep = fetch_pte(pgtable, address: iova, page_size: &pgsize); | 
|---|
| 512 | if (ptep) | 
|---|
| 513 | pte = READ_ONCE(*ptep); | 
|---|
| 514 | if (!ptep || !IOMMU_PTE_PRESENT(pte)) { | 
|---|
| 515 | pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0); | 
|---|
| 516 | iova += pgsize; | 
|---|
| 517 | continue; | 
|---|
| 518 | } | 
|---|
| 519 |  | 
|---|
| 520 | /* | 
|---|
| 521 | * Mark the whole IOVA range as dirty even if only one of | 
|---|
| 522 | * the replicated PTEs were marked dirty. | 
|---|
| 523 | */ | 
|---|
| 524 | if (pte_test_and_clear_dirty(ptep, size: pgsize, flags)) | 
|---|
| 525 | iommu_dirty_bitmap_record(dirty, iova, length: pgsize); | 
|---|
| 526 | iova += pgsize; | 
|---|
| 527 | } while (iova < end); | 
|---|
| 528 |  | 
|---|
| 529 | return 0; | 
|---|
| 530 | } | 
|---|
| 531 |  | 
|---|
| 532 | /* | 
|---|
| 533 | * ---------------------------------------------------- | 
|---|
| 534 | */ | 
|---|
| 535 | static void v1_free_pgtable(struct io_pgtable *iop) | 
|---|
| 536 | { | 
|---|
| 537 | struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); | 
|---|
| 538 | struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); | 
|---|
| 539 |  | 
|---|
| 540 | if (pgtable->mode == PAGE_MODE_NONE) | 
|---|
| 541 | return; | 
|---|
| 542 |  | 
|---|
| 543 | /* Page-table is not visible to IOMMU anymore, so free it */ | 
|---|
| 544 | BUG_ON(pgtable->mode < PAGE_MODE_NONE || | 
|---|
| 545 | pgtable->mode > amd_iommu_hpt_level); | 
|---|
| 546 |  | 
|---|
| 547 | free_sub_pt(root: pgtable->root, mode: pgtable->mode, freelist: &freelist); | 
|---|
| 548 | iommu_put_pages_list(list: &freelist); | 
|---|
| 549 | } | 
|---|
| 550 |  | 
|---|
| 551 | static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) | 
|---|
| 552 | { | 
|---|
| 553 | struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); | 
|---|
| 554 |  | 
|---|
| 555 | pgtable->root = | 
|---|
| 556 | iommu_alloc_pages_node_sz(nid: cfg->amd.nid, GFP_KERNEL, SZ_4K); | 
|---|
| 557 | if (!pgtable->root) | 
|---|
| 558 | return NULL; | 
|---|
| 559 | pgtable->mode = PAGE_MODE_3_LEVEL; | 
|---|
| 560 | seqcount_init(&pgtable->seqcount); | 
|---|
| 561 |  | 
|---|
| 562 | cfg->pgsize_bitmap  = amd_iommu_pgsize_bitmap; | 
|---|
| 563 | cfg->ias            = IOMMU_IN_ADDR_BIT_SIZE; | 
|---|
| 564 | cfg->oas            = IOMMU_OUT_ADDR_BIT_SIZE; | 
|---|
| 565 |  | 
|---|
| 566 | pgtable->pgtbl.ops.map_pages    = iommu_v1_map_pages; | 
|---|
| 567 | pgtable->pgtbl.ops.unmap_pages  = iommu_v1_unmap_pages; | 
|---|
| 568 | pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys; | 
|---|
| 569 | pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; | 
|---|
| 570 |  | 
|---|
| 571 | return &pgtable->pgtbl; | 
|---|
| 572 | } | 
|---|
| 573 |  | 
|---|
| 574 | struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { | 
|---|
| 575 | .alloc	= v1_alloc_pgtable, | 
|---|
| 576 | .free	= v1_free_pgtable, | 
|---|
| 577 | }; | 
|---|
| 578 |  | 
|---|