| 1 | // SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 2 | /* | 
|---|
| 3 | * RDMA resource limiting controller for cgroups. | 
|---|
| 4 | * | 
|---|
| 5 | * Used to allow a cgroup hierarchy to stop processes from consuming | 
|---|
| 6 | * additional RDMA resources after a certain limit is reached. | 
|---|
| 7 | * | 
|---|
| 8 | * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> | 
|---|
| 9 | */ | 
|---|
| 10 |  | 
|---|
| 11 | #include <linux/bitops.h> | 
|---|
| 12 | #include <linux/slab.h> | 
|---|
| 13 | #include <linux/seq_file.h> | 
|---|
| 14 | #include <linux/cgroup.h> | 
|---|
| 15 | #include <linux/parser.h> | 
|---|
| 16 | #include <linux/cgroup_rdma.h> | 
|---|
| 17 |  | 
|---|
| 18 | #define RDMACG_MAX_STR "max" | 
|---|
| 19 |  | 
|---|
| 20 | /* | 
|---|
| 21 | * Protects list of resource pools maintained on per cgroup basis | 
|---|
| 22 | * and rdma device list. | 
|---|
| 23 | */ | 
|---|
| 24 | static DEFINE_MUTEX(rdmacg_mutex); | 
|---|
| 25 | static LIST_HEAD(rdmacg_devices); | 
|---|
| 26 |  | 
|---|
| 27 | enum rdmacg_file_type { | 
|---|
| 28 | RDMACG_RESOURCE_TYPE_MAX, | 
|---|
| 29 | RDMACG_RESOURCE_TYPE_STAT, | 
|---|
| 30 | }; | 
|---|
| 31 |  | 
|---|
| 32 | /* | 
|---|
| 33 | * resource table definition as to be seen by the user. | 
|---|
| 34 | * Need to add entries to it when more resources are | 
|---|
| 35 | * added/defined at IB verb/core layer. | 
|---|
| 36 | */ | 
|---|
| 37 | static char const *rdmacg_resource_names[] = { | 
|---|
| 38 | [RDMACG_RESOURCE_HCA_HANDLE]	= "hca_handle", | 
|---|
| 39 | [RDMACG_RESOURCE_HCA_OBJECT]	= "hca_object", | 
|---|
| 40 | }; | 
|---|
| 41 |  | 
|---|
| 42 | /* resource tracker for each resource of rdma cgroup */ | 
|---|
| 43 | struct rdmacg_resource { | 
|---|
| 44 | int max; | 
|---|
| 45 | int usage; | 
|---|
| 46 | }; | 
|---|
| 47 |  | 
|---|
| 48 | /* | 
|---|
| 49 | * resource pool object which represents per cgroup, per device | 
|---|
| 50 | * resources. There are multiple instances of this object per cgroup, | 
|---|
| 51 | * therefore it cannot be embedded within rdma_cgroup structure. It | 
|---|
| 52 | * is maintained as list. | 
|---|
| 53 | */ | 
|---|
| 54 | struct rdmacg_resource_pool { | 
|---|
| 55 | struct rdmacg_device	*device; | 
|---|
| 56 | struct rdmacg_resource	resources[RDMACG_RESOURCE_MAX]; | 
|---|
| 57 |  | 
|---|
| 58 | struct list_head	cg_node; | 
|---|
| 59 | struct list_head	dev_node; | 
|---|
| 60 |  | 
|---|
| 61 | /* count active user tasks of this pool */ | 
|---|
| 62 | u64			usage_sum; | 
|---|
| 63 | /* total number counts which are set to max */ | 
|---|
| 64 | int			num_max_cnt; | 
|---|
| 65 | }; | 
|---|
| 66 |  | 
|---|
| 67 | static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) | 
|---|
| 68 | { | 
|---|
| 69 | return container_of(css, struct rdma_cgroup, css); | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg) | 
|---|
| 73 | { | 
|---|
| 74 | return css_rdmacg(css: cg->css.parent); | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | static inline struct rdma_cgroup *get_current_rdmacg(void) | 
|---|
| 78 | { | 
|---|
| 79 | return css_rdmacg(css: task_get_css(current, subsys_id: rdma_cgrp_id)); | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | static void set_resource_limit(struct rdmacg_resource_pool *rpool, | 
|---|
| 83 | int index, int new_max) | 
|---|
| 84 | { | 
|---|
| 85 | if (new_max == S32_MAX) { | 
|---|
| 86 | if (rpool->resources[index].max != S32_MAX) | 
|---|
| 87 | rpool->num_max_cnt++; | 
|---|
| 88 | } else { | 
|---|
| 89 | if (rpool->resources[index].max == S32_MAX) | 
|---|
| 90 | rpool->num_max_cnt--; | 
|---|
| 91 | } | 
|---|
| 92 | rpool->resources[index].max = new_max; | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool) | 
|---|
| 96 | { | 
|---|
| 97 | int i; | 
|---|
| 98 |  | 
|---|
| 99 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) | 
|---|
| 100 | set_resource_limit(rpool, index: i, S32_MAX); | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) | 
|---|
| 104 | { | 
|---|
| 105 | lockdep_assert_held(&rdmacg_mutex); | 
|---|
| 106 |  | 
|---|
| 107 | list_del(entry: &rpool->cg_node); | 
|---|
| 108 | list_del(entry: &rpool->dev_node); | 
|---|
| 109 | kfree(objp: rpool); | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | static struct rdmacg_resource_pool * | 
|---|
| 113 | find_cg_rpool_locked(struct rdma_cgroup *cg, | 
|---|
| 114 | struct rdmacg_device *device) | 
|---|
| 115 |  | 
|---|
| 116 | { | 
|---|
| 117 | struct rdmacg_resource_pool *pool; | 
|---|
| 118 |  | 
|---|
| 119 | lockdep_assert_held(&rdmacg_mutex); | 
|---|
| 120 |  | 
|---|
| 121 | list_for_each_entry(pool, &cg->rpools, cg_node) | 
|---|
| 122 | if (pool->device == device) | 
|---|
| 123 | return pool; | 
|---|
| 124 |  | 
|---|
| 125 | return NULL; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | static struct rdmacg_resource_pool * | 
|---|
| 129 | get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) | 
|---|
| 130 | { | 
|---|
| 131 | struct rdmacg_resource_pool *rpool; | 
|---|
| 132 |  | 
|---|
| 133 | rpool = find_cg_rpool_locked(cg, device); | 
|---|
| 134 | if (rpool) | 
|---|
| 135 | return rpool; | 
|---|
| 136 |  | 
|---|
| 137 | rpool = kzalloc(sizeof(*rpool), GFP_KERNEL); | 
|---|
| 138 | if (!rpool) | 
|---|
| 139 | return ERR_PTR(error: -ENOMEM); | 
|---|
| 140 |  | 
|---|
| 141 | rpool->device = device; | 
|---|
| 142 | set_all_resource_max_limit(rpool); | 
|---|
| 143 |  | 
|---|
| 144 | INIT_LIST_HEAD(list: &rpool->cg_node); | 
|---|
| 145 | INIT_LIST_HEAD(list: &rpool->dev_node); | 
|---|
| 146 | list_add_tail(new: &rpool->cg_node, head: &cg->rpools); | 
|---|
| 147 | list_add_tail(new: &rpool->dev_node, head: &device->rpools); | 
|---|
| 148 | return rpool; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | /** | 
|---|
| 152 | * uncharge_cg_locked - uncharge resource for rdma cgroup | 
|---|
| 153 | * @cg: pointer to cg to uncharge and all parents in hierarchy | 
|---|
| 154 | * @device: pointer to rdmacg device | 
|---|
| 155 | * @index: index of the resource to uncharge in cg (resource pool) | 
|---|
| 156 | * | 
|---|
| 157 | * It also frees the resource pool which was created as part of | 
|---|
| 158 | * charging operation when there are no resources attached to | 
|---|
| 159 | * resource pool. | 
|---|
| 160 | */ | 
|---|
| 161 | static void | 
|---|
| 162 | uncharge_cg_locked(struct rdma_cgroup *cg, | 
|---|
| 163 | struct rdmacg_device *device, | 
|---|
| 164 | enum rdmacg_resource_type index) | 
|---|
| 165 | { | 
|---|
| 166 | struct rdmacg_resource_pool *rpool; | 
|---|
| 167 |  | 
|---|
| 168 | rpool = find_cg_rpool_locked(cg, device); | 
|---|
| 169 |  | 
|---|
| 170 | /* | 
|---|
| 171 | * rpool cannot be null at this stage. Let kernel operate in case | 
|---|
| 172 | * if there a bug in IB stack or rdma controller, instead of crashing | 
|---|
| 173 | * the system. | 
|---|
| 174 | */ | 
|---|
| 175 | if (unlikely(!rpool)) { | 
|---|
| 176 | pr_warn( "Invalid device %p or rdma cgroup %p\n", cg, device); | 
|---|
| 177 | return; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | rpool->resources[index].usage--; | 
|---|
| 181 |  | 
|---|
| 182 | /* | 
|---|
| 183 | * A negative count (or overflow) is invalid, | 
|---|
| 184 | * it indicates a bug in the rdma controller. | 
|---|
| 185 | */ | 
|---|
| 186 | WARN_ON_ONCE(rpool->resources[index].usage < 0); | 
|---|
| 187 | rpool->usage_sum--; | 
|---|
| 188 | if (rpool->usage_sum == 0 && | 
|---|
| 189 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { | 
|---|
| 190 | /* | 
|---|
| 191 | * No user of the rpool and all entries are set to max, so | 
|---|
| 192 | * safe to delete this rpool. | 
|---|
| 193 | */ | 
|---|
| 194 | free_cg_rpool_locked(rpool); | 
|---|
| 195 | } | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | /** | 
|---|
| 199 | * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count | 
|---|
| 200 | * @cg: pointer to cg to uncharge and all parents in hierarchy | 
|---|
| 201 | * @device: pointer to rdmacg device | 
|---|
| 202 | * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup | 
|---|
| 203 | *           stop uncharging | 
|---|
| 204 | * @index: index of the resource to uncharge in cg in given resource pool | 
|---|
| 205 | */ | 
|---|
| 206 | static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, | 
|---|
| 207 | struct rdmacg_device *device, | 
|---|
| 208 | struct rdma_cgroup *stop_cg, | 
|---|
| 209 | enum rdmacg_resource_type index) | 
|---|
| 210 | { | 
|---|
| 211 | struct rdma_cgroup *p; | 
|---|
| 212 |  | 
|---|
| 213 | mutex_lock(lock: &rdmacg_mutex); | 
|---|
| 214 |  | 
|---|
| 215 | for (p = cg; p != stop_cg; p = parent_rdmacg(cg: p)) | 
|---|
| 216 | uncharge_cg_locked(cg: p, device, index); | 
|---|
| 217 |  | 
|---|
| 218 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 219 |  | 
|---|
| 220 | css_put(css: &cg->css); | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | /** | 
|---|
| 224 | * rdmacg_uncharge - hierarchically uncharge rdma resource count | 
|---|
| 225 | * @cg: pointer to cg to uncharge and all parents in hierarchy | 
|---|
| 226 | * @device: pointer to rdmacg device | 
|---|
| 227 | * @index: index of the resource to uncharge in cgroup in given resource pool | 
|---|
| 228 | */ | 
|---|
| 229 | void rdmacg_uncharge(struct rdma_cgroup *cg, | 
|---|
| 230 | struct rdmacg_device *device, | 
|---|
| 231 | enum rdmacg_resource_type index) | 
|---|
| 232 | { | 
|---|
| 233 | if (index >= RDMACG_RESOURCE_MAX) | 
|---|
| 234 | return; | 
|---|
| 235 |  | 
|---|
| 236 | rdmacg_uncharge_hierarchy(cg, device, NULL, index); | 
|---|
| 237 | } | 
|---|
| 238 | EXPORT_SYMBOL(rdmacg_uncharge); | 
|---|
| 239 |  | 
|---|
| 240 | /** | 
|---|
| 241 | * rdmacg_try_charge - hierarchically try to charge the rdma resource | 
|---|
| 242 | * @rdmacg: pointer to rdma cgroup which will own this resource | 
|---|
| 243 | * @device: pointer to rdmacg device | 
|---|
| 244 | * @index: index of the resource to charge in cgroup (resource pool) | 
|---|
| 245 | * | 
|---|
| 246 | * This function follows charging resource in hierarchical way. | 
|---|
| 247 | * It will fail if the charge would cause the new value to exceed the | 
|---|
| 248 | * hierarchical limit. | 
|---|
| 249 | * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. | 
|---|
| 250 | * Returns pointer to rdmacg for this resource when charging is successful. | 
|---|
| 251 | * | 
|---|
| 252 | * Charger needs to account resources on two criteria. | 
|---|
| 253 | * (a) per cgroup & (b) per device resource usage. | 
|---|
| 254 | * Per cgroup resource usage ensures that tasks of cgroup doesn't cross | 
|---|
| 255 | * the configured limits. Per device provides granular configuration | 
|---|
| 256 | * in multi device usage. It allocates resource pool in the hierarchy | 
|---|
| 257 | * for each parent it come across for first resource. Later on resource | 
|---|
| 258 | * pool will be available. Therefore it will be much faster thereon | 
|---|
| 259 | * to charge/uncharge. | 
|---|
| 260 | */ | 
|---|
| 261 | int rdmacg_try_charge(struct rdma_cgroup **rdmacg, | 
|---|
| 262 | struct rdmacg_device *device, | 
|---|
| 263 | enum rdmacg_resource_type index) | 
|---|
| 264 | { | 
|---|
| 265 | struct rdma_cgroup *cg, *p; | 
|---|
| 266 | struct rdmacg_resource_pool *rpool; | 
|---|
| 267 | s64 new; | 
|---|
| 268 | int ret = 0; | 
|---|
| 269 |  | 
|---|
| 270 | if (index >= RDMACG_RESOURCE_MAX) | 
|---|
| 271 | return -EINVAL; | 
|---|
| 272 |  | 
|---|
| 273 | /* | 
|---|
| 274 | * hold on to css, as cgroup can be removed but resource | 
|---|
| 275 | * accounting happens on css. | 
|---|
| 276 | */ | 
|---|
| 277 | cg = get_current_rdmacg(); | 
|---|
| 278 |  | 
|---|
| 279 | mutex_lock(lock: &rdmacg_mutex); | 
|---|
| 280 | for (p = cg; p; p = parent_rdmacg(cg: p)) { | 
|---|
| 281 | rpool = get_cg_rpool_locked(cg: p, device); | 
|---|
| 282 | if (IS_ERR(ptr: rpool)) { | 
|---|
| 283 | ret = PTR_ERR(ptr: rpool); | 
|---|
| 284 | goto err; | 
|---|
| 285 | } else { | 
|---|
| 286 | new = rpool->resources[index].usage + 1; | 
|---|
| 287 | if (new > rpool->resources[index].max) { | 
|---|
| 288 | ret = -EAGAIN; | 
|---|
| 289 | goto err; | 
|---|
| 290 | } else { | 
|---|
| 291 | rpool->resources[index].usage = new; | 
|---|
| 292 | rpool->usage_sum++; | 
|---|
| 293 | } | 
|---|
| 294 | } | 
|---|
| 295 | } | 
|---|
| 296 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 297 |  | 
|---|
| 298 | *rdmacg = cg; | 
|---|
| 299 | return 0; | 
|---|
| 300 |  | 
|---|
| 301 | err: | 
|---|
| 302 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 303 | rdmacg_uncharge_hierarchy(cg, device, stop_cg: p, index); | 
|---|
| 304 | return ret; | 
|---|
| 305 | } | 
|---|
| 306 | EXPORT_SYMBOL(rdmacg_try_charge); | 
|---|
| 307 |  | 
|---|
| 308 | /** | 
|---|
| 309 | * rdmacg_register_device - register rdmacg device to rdma controller. | 
|---|
| 310 | * @device: pointer to rdmacg device whose resources need to be accounted. | 
|---|
| 311 | * | 
|---|
| 312 | * If IB stack wish a device to participate in rdma cgroup resource | 
|---|
| 313 | * tracking, it must invoke this API to register with rdma cgroup before | 
|---|
| 314 | * any user space application can start using the RDMA resources. | 
|---|
| 315 | */ | 
|---|
| 316 | void rdmacg_register_device(struct rdmacg_device *device) | 
|---|
| 317 | { | 
|---|
| 318 | INIT_LIST_HEAD(list: &device->dev_node); | 
|---|
| 319 | INIT_LIST_HEAD(list: &device->rpools); | 
|---|
| 320 |  | 
|---|
| 321 | mutex_lock(lock: &rdmacg_mutex); | 
|---|
| 322 | list_add_tail(new: &device->dev_node, head: &rdmacg_devices); | 
|---|
| 323 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 324 | } | 
|---|
| 325 | EXPORT_SYMBOL(rdmacg_register_device); | 
|---|
| 326 |  | 
|---|
| 327 | /** | 
|---|
| 328 | * rdmacg_unregister_device - unregister rdmacg device from rdma controller. | 
|---|
| 329 | * @device: pointer to rdmacg device which was previously registered with rdma | 
|---|
| 330 | *          controller using rdmacg_register_device(). | 
|---|
| 331 | * | 
|---|
| 332 | * IB stack must invoke this after all the resources of the IB device | 
|---|
| 333 | * are destroyed and after ensuring that no more resources will be created | 
|---|
| 334 | * when this API is invoked. | 
|---|
| 335 | */ | 
|---|
| 336 | void rdmacg_unregister_device(struct rdmacg_device *device) | 
|---|
| 337 | { | 
|---|
| 338 | struct rdmacg_resource_pool *rpool, *tmp; | 
|---|
| 339 |  | 
|---|
| 340 | /* | 
|---|
| 341 | * Synchronize with any active resource settings, | 
|---|
| 342 | * usage query happening via configfs. | 
|---|
| 343 | */ | 
|---|
| 344 | mutex_lock(lock: &rdmacg_mutex); | 
|---|
| 345 | list_del_init(entry: &device->dev_node); | 
|---|
| 346 |  | 
|---|
| 347 | /* | 
|---|
| 348 | * Now that this device is off the cgroup list, its safe to free | 
|---|
| 349 | * all the rpool resources. | 
|---|
| 350 | */ | 
|---|
| 351 | list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node) | 
|---|
| 352 | free_cg_rpool_locked(rpool); | 
|---|
| 353 |  | 
|---|
| 354 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 355 | } | 
|---|
| 356 | EXPORT_SYMBOL(rdmacg_unregister_device); | 
|---|
| 357 |  | 
|---|
| 358 | static int parse_resource(char *c, int *intval) | 
|---|
| 359 | { | 
|---|
| 360 | substring_t argstr; | 
|---|
| 361 | char *name, *value = c; | 
|---|
| 362 | size_t len; | 
|---|
| 363 | int ret, i; | 
|---|
| 364 |  | 
|---|
| 365 | name = strsep(&value, "="); | 
|---|
| 366 | if (!name || !value) | 
|---|
| 367 | return -EINVAL; | 
|---|
| 368 |  | 
|---|
| 369 | i = match_string(array: rdmacg_resource_names, n: RDMACG_RESOURCE_MAX, string: name); | 
|---|
| 370 | if (i < 0) | 
|---|
| 371 | return i; | 
|---|
| 372 |  | 
|---|
| 373 | len = strlen(value); | 
|---|
| 374 |  | 
|---|
| 375 | argstr.from = value; | 
|---|
| 376 | argstr.to = value + len; | 
|---|
| 377 |  | 
|---|
| 378 | ret = match_int(&argstr, result: intval); | 
|---|
| 379 | if (ret >= 0) { | 
|---|
| 380 | if (*intval < 0) | 
|---|
| 381 | return -EINVAL; | 
|---|
| 382 | return i; | 
|---|
| 383 | } | 
|---|
| 384 | if (strncmp(value, RDMACG_MAX_STR, len) == 0) { | 
|---|
| 385 | *intval = S32_MAX; | 
|---|
| 386 | return i; | 
|---|
| 387 | } | 
|---|
| 388 | return -EINVAL; | 
|---|
| 389 | } | 
|---|
| 390 |  | 
|---|
| 391 | static int rdmacg_parse_limits(char *options, | 
|---|
| 392 | int *new_limits, unsigned long *enables) | 
|---|
| 393 | { | 
|---|
| 394 | char *c; | 
|---|
| 395 | int err = -EINVAL; | 
|---|
| 396 |  | 
|---|
| 397 | /* parse resource options */ | 
|---|
| 398 | while ((c = strsep(&options, " ")) != NULL) { | 
|---|
| 399 | int index, intval; | 
|---|
| 400 |  | 
|---|
| 401 | index = parse_resource(c, intval: &intval); | 
|---|
| 402 | if (index < 0) | 
|---|
| 403 | goto err; | 
|---|
| 404 |  | 
|---|
| 405 | new_limits[index] = intval; | 
|---|
| 406 | *enables |= BIT(index); | 
|---|
| 407 | } | 
|---|
| 408 | return 0; | 
|---|
| 409 |  | 
|---|
| 410 | err: | 
|---|
| 411 | return err; | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | static struct rdmacg_device *rdmacg_get_device_locked(const char *name) | 
|---|
| 415 | { | 
|---|
| 416 | struct rdmacg_device *device; | 
|---|
| 417 |  | 
|---|
| 418 | lockdep_assert_held(&rdmacg_mutex); | 
|---|
| 419 |  | 
|---|
| 420 | list_for_each_entry(device, &rdmacg_devices, dev_node) | 
|---|
| 421 | if (!strcmp(name, device->name)) | 
|---|
| 422 | return device; | 
|---|
| 423 |  | 
|---|
| 424 | return NULL; | 
|---|
| 425 | } | 
|---|
| 426 |  | 
|---|
| 427 | static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, | 
|---|
| 428 | char *buf, size_t nbytes, loff_t off) | 
|---|
| 429 | { | 
|---|
| 430 | struct rdma_cgroup *cg = css_rdmacg(css: of_css(of)); | 
|---|
| 431 | const char *dev_name; | 
|---|
| 432 | struct rdmacg_resource_pool *rpool; | 
|---|
| 433 | struct rdmacg_device *device; | 
|---|
| 434 | char *options = strstrip(str: buf); | 
|---|
| 435 | int *new_limits; | 
|---|
| 436 | unsigned long enables = 0; | 
|---|
| 437 | int i = 0, ret = 0; | 
|---|
| 438 |  | 
|---|
| 439 | /* extract the device name first */ | 
|---|
| 440 | dev_name = strsep(&options, " "); | 
|---|
| 441 | if (!dev_name) { | 
|---|
| 442 | ret = -EINVAL; | 
|---|
| 443 | goto err; | 
|---|
| 444 | } | 
|---|
| 445 |  | 
|---|
| 446 | new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL); | 
|---|
| 447 | if (!new_limits) { | 
|---|
| 448 | ret = -ENOMEM; | 
|---|
| 449 | goto err; | 
|---|
| 450 | } | 
|---|
| 451 |  | 
|---|
| 452 | ret = rdmacg_parse_limits(options, new_limits, enables: &enables); | 
|---|
| 453 | if (ret) | 
|---|
| 454 | goto parse_err; | 
|---|
| 455 |  | 
|---|
| 456 | /* acquire lock to synchronize with hot plug devices */ | 
|---|
| 457 | mutex_lock(lock: &rdmacg_mutex); | 
|---|
| 458 |  | 
|---|
| 459 | device = rdmacg_get_device_locked(name: dev_name); | 
|---|
| 460 | if (!device) { | 
|---|
| 461 | ret = -ENODEV; | 
|---|
| 462 | goto dev_err; | 
|---|
| 463 | } | 
|---|
| 464 |  | 
|---|
| 465 | rpool = get_cg_rpool_locked(cg, device); | 
|---|
| 466 | if (IS_ERR(ptr: rpool)) { | 
|---|
| 467 | ret = PTR_ERR(ptr: rpool); | 
|---|
| 468 | goto dev_err; | 
|---|
| 469 | } | 
|---|
| 470 |  | 
|---|
| 471 | /* now set the new limits of the rpool */ | 
|---|
| 472 | for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX) | 
|---|
| 473 | set_resource_limit(rpool, index: i, new_max: new_limits[i]); | 
|---|
| 474 |  | 
|---|
| 475 | if (rpool->usage_sum == 0 && | 
|---|
| 476 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { | 
|---|
| 477 | /* | 
|---|
| 478 | * No user of the rpool and all entries are set to max, so | 
|---|
| 479 | * safe to delete this rpool. | 
|---|
| 480 | */ | 
|---|
| 481 | free_cg_rpool_locked(rpool); | 
|---|
| 482 | } | 
|---|
| 483 |  | 
|---|
| 484 | dev_err: | 
|---|
| 485 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 486 |  | 
|---|
| 487 | parse_err: | 
|---|
| 488 | kfree(objp: new_limits); | 
|---|
| 489 |  | 
|---|
| 490 | err: | 
|---|
| 491 | return ret ?: nbytes; | 
|---|
| 492 | } | 
|---|
| 493 |  | 
|---|
| 494 | static void print_rpool_values(struct seq_file *sf, | 
|---|
| 495 | struct rdmacg_resource_pool *rpool) | 
|---|
| 496 | { | 
|---|
| 497 | enum rdmacg_file_type sf_type; | 
|---|
| 498 | int i; | 
|---|
| 499 | u32 value; | 
|---|
| 500 |  | 
|---|
| 501 | sf_type = seq_cft(seq: sf)->private; | 
|---|
| 502 |  | 
|---|
| 503 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { | 
|---|
| 504 | seq_puts(m: sf, s: rdmacg_resource_names[i]); | 
|---|
| 505 | seq_putc(m: sf, c: '='); | 
|---|
| 506 | if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { | 
|---|
| 507 | if (rpool) | 
|---|
| 508 | value = rpool->resources[i].max; | 
|---|
| 509 | else | 
|---|
| 510 | value = S32_MAX; | 
|---|
| 511 | } else { | 
|---|
| 512 | if (rpool) | 
|---|
| 513 | value = rpool->resources[i].usage; | 
|---|
| 514 | else | 
|---|
| 515 | value = 0; | 
|---|
| 516 | } | 
|---|
| 517 |  | 
|---|
| 518 | if (value == S32_MAX) | 
|---|
| 519 | seq_puts(m: sf, RDMACG_MAX_STR); | 
|---|
| 520 | else | 
|---|
| 521 | seq_printf(m: sf, fmt: "%d", value); | 
|---|
| 522 | seq_putc(m: sf, c: ' '); | 
|---|
| 523 | } | 
|---|
| 524 | } | 
|---|
| 525 |  | 
|---|
| 526 | static int rdmacg_resource_read(struct seq_file *sf, void *v) | 
|---|
| 527 | { | 
|---|
| 528 | struct rdmacg_device *device; | 
|---|
| 529 | struct rdmacg_resource_pool *rpool; | 
|---|
| 530 | struct rdma_cgroup *cg = css_rdmacg(css: seq_css(seq: sf)); | 
|---|
| 531 |  | 
|---|
| 532 | mutex_lock(lock: &rdmacg_mutex); | 
|---|
| 533 |  | 
|---|
| 534 | list_for_each_entry(device, &rdmacg_devices, dev_node) { | 
|---|
| 535 | seq_printf(m: sf, fmt: "%s ", device->name); | 
|---|
| 536 |  | 
|---|
| 537 | rpool = find_cg_rpool_locked(cg, device); | 
|---|
| 538 | print_rpool_values(sf, rpool); | 
|---|
| 539 |  | 
|---|
| 540 | seq_putc(m: sf, c: '\n'); | 
|---|
| 541 | } | 
|---|
| 542 |  | 
|---|
| 543 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 544 | return 0; | 
|---|
| 545 | } | 
|---|
| 546 |  | 
|---|
| 547 | static struct cftype rdmacg_files[] = { | 
|---|
| 548 | { | 
|---|
| 549 | .name = "max", | 
|---|
| 550 | .write = rdmacg_resource_set_max, | 
|---|
| 551 | .seq_show = rdmacg_resource_read, | 
|---|
| 552 | .private = RDMACG_RESOURCE_TYPE_MAX, | 
|---|
| 553 | .flags = CFTYPE_NOT_ON_ROOT, | 
|---|
| 554 | }, | 
|---|
| 555 | { | 
|---|
| 556 | .name = "current", | 
|---|
| 557 | .seq_show = rdmacg_resource_read, | 
|---|
| 558 | .private = RDMACG_RESOURCE_TYPE_STAT, | 
|---|
| 559 | .flags = CFTYPE_NOT_ON_ROOT, | 
|---|
| 560 | }, | 
|---|
| 561 | { }	/* terminate */ | 
|---|
| 562 | }; | 
|---|
| 563 |  | 
|---|
| 564 | static struct cgroup_subsys_state * | 
|---|
| 565 | rdmacg_css_alloc(struct cgroup_subsys_state *parent) | 
|---|
| 566 | { | 
|---|
| 567 | struct rdma_cgroup *cg; | 
|---|
| 568 |  | 
|---|
| 569 | cg = kzalloc(sizeof(*cg), GFP_KERNEL); | 
|---|
| 570 | if (!cg) | 
|---|
| 571 | return ERR_PTR(error: -ENOMEM); | 
|---|
| 572 |  | 
|---|
| 573 | INIT_LIST_HEAD(list: &cg->rpools); | 
|---|
| 574 | return &cg->css; | 
|---|
| 575 | } | 
|---|
| 576 |  | 
|---|
| 577 | static void rdmacg_css_free(struct cgroup_subsys_state *css) | 
|---|
| 578 | { | 
|---|
| 579 | struct rdma_cgroup *cg = css_rdmacg(css); | 
|---|
| 580 |  | 
|---|
| 581 | kfree(objp: cg); | 
|---|
| 582 | } | 
|---|
| 583 |  | 
|---|
| 584 | /** | 
|---|
| 585 | * rdmacg_css_offline - cgroup css_offline callback | 
|---|
| 586 | * @css: css of interest | 
|---|
| 587 | * | 
|---|
| 588 | * This function is called when @css is about to go away and responsible | 
|---|
| 589 | * for shooting down all rdmacg associated with @css. As part of that it | 
|---|
| 590 | * marks all the resource pool entries to max value, so that when resources are | 
|---|
| 591 | * uncharged, associated resource pool can be freed as well. | 
|---|
| 592 | */ | 
|---|
| 593 | static void rdmacg_css_offline(struct cgroup_subsys_state *css) | 
|---|
| 594 | { | 
|---|
| 595 | struct rdma_cgroup *cg = css_rdmacg(css); | 
|---|
| 596 | struct rdmacg_resource_pool *rpool; | 
|---|
| 597 |  | 
|---|
| 598 | mutex_lock(lock: &rdmacg_mutex); | 
|---|
| 599 |  | 
|---|
| 600 | list_for_each_entry(rpool, &cg->rpools, cg_node) | 
|---|
| 601 | set_all_resource_max_limit(rpool); | 
|---|
| 602 |  | 
|---|
| 603 | mutex_unlock(lock: &rdmacg_mutex); | 
|---|
| 604 | } | 
|---|
| 605 |  | 
|---|
| 606 | struct cgroup_subsys rdma_cgrp_subsys = { | 
|---|
| 607 | .css_alloc	= rdmacg_css_alloc, | 
|---|
| 608 | .css_free	= rdmacg_css_free, | 
|---|
| 609 | .css_offline	= rdmacg_css_offline, | 
|---|
| 610 | .legacy_cftypes	= rdmacg_files, | 
|---|
| 611 | .dfl_cftypes	= rdmacg_files, | 
|---|
| 612 | }; | 
|---|
| 613 |  | 
|---|