Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 | /*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2015, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LUSTRE_LU_OBJECT_H
#define __LUSTRE_LU_OBJECT_H
#include <stdarg.h>
#include "../../include/linux/libcfs/libcfs.h"
#include "lustre/lustre_idl.h"
#include "lu_ref.h"
struct seq_file;
struct lustre_cfg;
struct lprocfs_stats;
/** \defgroup lu lu
* lu_* data-types represent server-side entities shared by data and meta-data
* stacks.
*
* Design goals:
*
* -# support for layering.
*
* Server side object is split into layers, one per device in the
* corresponding device stack. Individual layer is represented by struct
* lu_object. Compound layered object --- by struct lu_object_header. Most
* interface functions take lu_object as an argument and operate on the
* whole compound object. This decision was made due to the following
* reasons:
*
* - it's envisaged that lu_object will be used much more often than
* lu_object_header;
*
* - we want lower (non-top) layers to be able to initiate operations
* on the whole object.
*
* Generic code supports layering more complex than simple stacking, e.g.,
* it is possible that at some layer object "spawns" multiple sub-objects
* on the lower layer.
*
* -# fid-based identification.
*
* Compound object is uniquely identified by its fid. Objects are indexed
* by their fids (hash table is used for index).
*
* -# caching and life-cycle management.
*
* Object's life-time is controlled by reference counting. When reference
* count drops to 0, object is returned to cache. Cached objects still
* retain their identity (i.e., fid), and can be recovered from cache.
*
* Objects are kept in the global LRU list, and lu_site_purge() function
* can be used to reclaim given number of unused objects from the tail of
* the LRU.
*
* -# avoiding recursion.
*
* Generic code tries to replace recursion through layers by iterations
* where possible. Additionally to the end of reducing stack consumption,
* data, when practically possible, are allocated through lu_context_key
* interface rather than on stack.
* @{
*/
struct lu_site;
struct lu_object;
struct lu_device;
struct lu_object_header;
struct lu_context;
struct lu_env;
/**
* Operations common for data and meta-data devices.
*/
struct lu_device_operations {
/**
* Allocate object for the given device (without lower-layer
* parts). This is called by lu_object_operations::loo_object_init()
* from the parent layer, and should setup at least lu_object::lo_dev
* and lu_object::lo_ops fields of resulting lu_object.
*
* Object creation protocol.
*
* Due to design goal of avoiding recursion, object creation (see
* lu_object_alloc()) is somewhat involved:
*
* - first, lu_device_operations::ldo_object_alloc() method of the
* top-level device in the stack is called. It should allocate top
* level object (including lu_object_header), but without any
* lower-layer sub-object(s).
*
* - then lu_object_alloc() sets fid in the header of newly created
* object.
*
* - then lu_object_operations::loo_object_init() is called. It has
* to allocate lower-layer object(s). To do this,
* lu_object_operations::loo_object_init() calls ldo_object_alloc()
* of the lower-layer device(s).
*
* - for all new objects allocated by
* lu_object_operations::loo_object_init() (and inserted into object
* stack), lu_object_operations::loo_object_init() is called again
* repeatedly, until no new objects are created.
*
* \post ergo(!IS_ERR(result), result->lo_dev == d &&
* result->lo_ops != NULL);
*/
struct lu_object *(*ldo_object_alloc)(const struct lu_env *env,
const struct lu_object_header *h,
struct lu_device *d);
/**
* process config specific for device.
*/
int (*ldo_process_config)(const struct lu_env *env,
struct lu_device *, struct lustre_cfg *);
int (*ldo_recovery_complete)(const struct lu_env *,
struct lu_device *);
/**
* initialize local objects for device. this method called after layer has
* been initialized (after LCFG_SETUP stage) and before it starts serving
* user requests.
*/
int (*ldo_prepare)(const struct lu_env *,
struct lu_device *parent,
struct lu_device *dev);
};
/**
* For lu_object_conf flags
*/
enum loc_flags {
/* This is a new object to be allocated, or the file
* corresponding to the object does not exists.
*/
LOC_F_NEW = 0x00000001,
};
/**
* Object configuration, describing particulars of object being created. On
* server this is not used, as server objects are full identified by fid. On
* client configuration contains struct lustre_md.
*/
struct lu_object_conf {
/**
* Some hints for obj find and alloc.
*/
enum loc_flags loc_flags;
};
/**
* Type of "printer" function used by lu_object_operations::loo_object_print()
* method.
*
* Printer function is needed to provide some flexibility in (semi-)debugging
* output: possible implementations: printk, CDEBUG, sysfs/seq_file
*/
typedef int (*lu_printer_t)(const struct lu_env *env,
void *cookie, const char *format, ...)
__printf(3, 4);
/**
* Operations specific for particular lu_object.
*/
struct lu_object_operations {
/**
* Allocate lower-layer parts of the object by calling
* lu_device_operations::ldo_object_alloc() of the corresponding
* underlying device.
*
* This method is called once for each object inserted into object
* stack. It's responsibility of this method to insert lower-layer
* object(s) it create into appropriate places of object stack.
*/
int (*loo_object_init)(const struct lu_env *env,
struct lu_object *o,
const struct lu_object_conf *conf);
/**
* Called (in top-to-bottom order) during object allocation after all
* layers were allocated and initialized. Can be used to perform
* initialization depending on lower layers.
*/
int (*loo_object_start)(const struct lu_env *env,
struct lu_object *o);
/**
* Called before lu_object_operations::loo_object_free() to signal
* that object is being destroyed. Dual to
* lu_object_operations::loo_object_init().
*/
void (*loo_object_delete)(const struct lu_env *env,
struct lu_object *o);
/**
* Dual to lu_device_operations::ldo_object_alloc(). Called when
* object is removed from memory.
*/
void (*loo_object_free)(const struct lu_env *env,
struct lu_object *o);
/**
* Called when last active reference to the object is released (and
* object returns to the cache). This method is optional.
*/
void (*loo_object_release)(const struct lu_env *env,
struct lu_object *o);
/**
* Optional debugging helper. Print given object.
*/
int (*loo_object_print)(const struct lu_env *env, void *cookie,
lu_printer_t p, const struct lu_object *o);
/**
* Optional debugging method. Returns true iff method is internally
* consistent.
*/
int (*loo_object_invariant)(const struct lu_object *o);
};
/**
* Type of lu_device.
*/
struct lu_device_type;
/**
* Device: a layer in the server side abstraction stacking.
*/
struct lu_device {
/**
* reference count. This is incremented, in particular, on each object
* created at this layer.
*
* \todo XXX which means that atomic_t is probably too small.
*/
atomic_t ld_ref;
/**
* Pointer to device type. Never modified once set.
*/
struct lu_device_type *ld_type;
/**
* Operation vector for this device.
*/
const struct lu_device_operations *ld_ops;
/**
* Stack this device belongs to.
*/
struct lu_site *ld_site;
/** \todo XXX: temporary back pointer into obd. */
struct obd_device *ld_obd;
/**
* A list of references to this object, for debugging.
*/
struct lu_ref ld_reference;
/**
* Link the device to the site.
**/
struct list_head ld_linkage;
};
struct lu_device_type_operations;
/**
* Tag bits for device type. They are used to distinguish certain groups of
* device types.
*/
enum lu_device_tag {
/** this is meta-data device */
LU_DEVICE_MD = (1 << 0),
/** this is data device */
LU_DEVICE_DT = (1 << 1),
/** data device in the client stack */
LU_DEVICE_CL = (1 << 2)
};
/**
* Type of device.
*/
struct lu_device_type {
/**
* Tag bits. Taken from enum lu_device_tag. Never modified once set.
*/
__u32 ldt_tags;
/**
* Name of this class. Unique system-wide. Never modified once set.
*/
char *ldt_name;
/**
* Operations for this type.
*/
const struct lu_device_type_operations *ldt_ops;
/**
* \todo XXX: temporary pointer to associated obd_type.
*/
struct obd_type *ldt_obd_type;
/**
* \todo XXX: temporary: context tags used by obd_*() calls.
*/
__u32 ldt_ctx_tags;
/**
* Number of existing device type instances.
*/
atomic_t ldt_device_nr;
/**
* Linkage into a global list of all device types.
*
* \see lu_device_types.
*/
struct list_head ldt_linkage;
};
/**
* Operations on a device type.
*/
struct lu_device_type_operations {
/**
* Allocate new device.
*/
struct lu_device *(*ldto_device_alloc)(const struct lu_env *env,
struct lu_device_type *t,
struct lustre_cfg *lcfg);
/**
* Free device. Dual to
* lu_device_type_operations::ldto_device_alloc(). Returns pointer to
* the next device in the stack.
*/
struct lu_device *(*ldto_device_free)(const struct lu_env *,
struct lu_device *);
/**
* Initialize the devices after allocation
*/
int (*ldto_device_init)(const struct lu_env *env,
struct lu_device *, const char *,
struct lu_device *);
/**
* Finalize device. Dual to
* lu_device_type_operations::ldto_device_init(). Returns pointer to
* the next device in the stack.
*/
struct lu_device *(*ldto_device_fini)(const struct lu_env *env,
struct lu_device *);
/**
* Initialize device type. This is called on module load.
*/
int (*ldto_init)(struct lu_device_type *t);
/**
* Finalize device type. Dual to
* lu_device_type_operations::ldto_init(). Called on module unload.
*/
void (*ldto_fini)(struct lu_device_type *t);
/**
* Called when the first device is created.
*/
void (*ldto_start)(struct lu_device_type *t);
/**
* Called when number of devices drops to 0.
*/
void (*ldto_stop)(struct lu_device_type *t);
};
static inline int lu_device_is_md(const struct lu_device *d)
{
return ergo(d, d->ld_type->ldt_tags & LU_DEVICE_MD);
}
/**
* Common object attributes.
*/
struct lu_attr {
/** size in bytes */
__u64 la_size;
/** modification time in seconds since Epoch */
s64 la_mtime;
/** access time in seconds since Epoch */
s64 la_atime;
/** change time in seconds since Epoch */
s64 la_ctime;
/** 512-byte blocks allocated to object */
__u64 la_blocks;
/** permission bits and file type */
__u32 la_mode;
/** owner id */
__u32 la_uid;
/** group id */
__u32 la_gid;
/** object flags */
__u32 la_flags;
/** number of persistent references to this object */
__u32 la_nlink;
/** blk bits of the object*/
__u32 la_blkbits;
/** blk size of the object*/
__u32 la_blksize;
/** real device */
__u32 la_rdev;
/**
* valid bits
*
* \see enum la_valid
*/
__u64 la_valid;
};
/** Bit-mask of valid attributes */
enum la_valid {
LA_ATIME = 1 << 0,
LA_MTIME = 1 << 1,
LA_CTIME = 1 << 2,
LA_SIZE = 1 << 3,
LA_MODE = 1 << 4,
LA_UID = 1 << 5,
LA_GID = 1 << 6,
LA_BLOCKS = 1 << 7,
LA_TYPE = 1 << 8,
LA_FLAGS = 1 << 9,
LA_NLINK = 1 << 10,
LA_RDEV = 1 << 11,
LA_BLKSIZE = 1 << 12,
LA_KILL_SUID = 1 << 13,
LA_KILL_SGID = 1 << 14,
};
/**
* Layer in the layered object.
*/
struct lu_object {
/**
* Header for this object.
*/
struct lu_object_header *lo_header;
/**
* Device for this layer.
*/
struct lu_device *lo_dev;
/**
* Operations for this object.
*/
const struct lu_object_operations *lo_ops;
/**
* Linkage into list of all layers.
*/
struct list_head lo_linkage;
/**
* Link to the device, for debugging.
*/
struct lu_ref_link lo_dev_ref;
};
enum lu_object_header_flags {
/**
* Don't keep this object in cache. Object will be destroyed as soon
* as last reference to it is released. This flag cannot be cleared
* once set.
*/
LU_OBJECT_HEARD_BANSHEE = 0,
/**
* Mark this object has already been taken out of cache.
*/
LU_OBJECT_UNHASHED = 1,
};
enum lu_object_header_attr {
LOHA_EXISTS = 1 << 0,
LOHA_REMOTE = 1 << 1,
/**
* UNIX file type is stored in S_IFMT bits.
*/
LOHA_FT_START = 001 << 12, /**< S_IFIFO */
LOHA_FT_END = 017 << 12, /**< S_IFMT */
};
/**
* "Compound" object, consisting of multiple layers.
*
* Compound object with given fid is unique with given lu_site.
*
* Note, that object does *not* necessary correspond to the real object in the
* persistent storage: object is an anchor for locking and method calling, so
* it is created for things like not-yet-existing child created by mkdir or
* create calls. lu_object_operations::loo_exists() can be used to check
* whether object is backed by persistent storage entity.
*/
struct lu_object_header {
/**
* Fid, uniquely identifying this object.
*/
struct lu_fid loh_fid;
/**
* Object flags from enum lu_object_header_flags. Set and checked
* atomically.
*/
unsigned long loh_flags;
/**
* Object reference count. Protected by lu_site::ls_guard.
*/
atomic_t loh_ref;
/**
* Common object attributes, cached for efficiency. From enum
* lu_object_header_attr.
*/
__u32 loh_attr;
/**
* Linkage into per-site hash table. Protected by lu_site::ls_guard.
*/
struct hlist_node loh_hash;
/**
* Linkage into per-site LRU list. Protected by lu_site::ls_guard.
*/
struct list_head loh_lru;
/**
* Linkage into list of layers. Never modified once set (except lately
* during object destruction). No locking is necessary.
*/
struct list_head loh_layers;
/**
* A list of references to this object, for debugging.
*/
struct lu_ref loh_reference;
};
struct fld;
struct lu_site_bkt_data {
/**
* number of object in this bucket on the lsb_lru list.
*/
long lsb_lru_len;
/**
* LRU list, updated on each access to object. Protected by
* bucket lock of lu_site::ls_obj_hash.
*
* "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
* moved to the lu_site::ls_lru.prev (this is due to the non-existence
* of list_for_each_entry_safe_reverse()).
*/
struct list_head lsb_lru;
/**
* Wait-queue signaled when an object in this site is ultimately
* destroyed (lu_object_free()). It is used by lu_object_find() to
* wait before re-trying when object in the process of destruction is
* found in the hash table.
*
* \see htable_lookup().
*/
wait_queue_head_t lsb_marche_funebre;
};
enum {
LU_SS_CREATED = 0,
LU_SS_CACHE_HIT,
LU_SS_CACHE_MISS,
LU_SS_CACHE_RACE,
LU_SS_CACHE_DEATH_RACE,
LU_SS_LRU_PURGED,
LU_SS_LRU_LEN, /* # of objects in lsb_lru lists */
LU_SS_LAST_STAT
};
/**
* lu_site is a "compartment" within which objects are unique, and LRU
* discipline is maintained.
*
* lu_site exists so that multiple layered stacks can co-exist in the same
* address space.
*
* lu_site has the same relation to lu_device as lu_object_header to
* lu_object.
*/
struct lu_site {
/**
* objects hash table
*/
struct cfs_hash *ls_obj_hash;
/**
* index of bucket on hash table while purging
*/
unsigned int ls_purge_start;
/**
* Top-level device for this stack.
*/
struct lu_device *ls_top_dev;
/**
* Bottom-level device for this stack
*/
struct lu_device *ls_bottom_dev;
/**
* Linkage into global list of sites.
*/
struct list_head ls_linkage;
/**
* List for lu device for this site, protected
* by ls_ld_lock.
**/
struct list_head ls_ld_linkage;
spinlock_t ls_ld_lock;
/**
* Lock to serialize site purge.
*/
struct mutex ls_purge_mutex;
/**
* lu_site stats
*/
struct lprocfs_stats *ls_stats;
/**
* XXX: a hack! fld has to find md_site via site, remove when possible
*/
struct seq_server_site *ld_seq_site;
};
static inline struct lu_site_bkt_data *
lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
{
struct cfs_hash_bd bd;
cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
}
static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
{
return s->ld_seq_site;
}
/** \name ctors
* Constructors/destructors.
* @{
*/
int lu_site_init(struct lu_site *s, struct lu_device *d);
void lu_site_fini(struct lu_site *s);
int lu_site_init_finish(struct lu_site *s);
void lu_stack_fini(const struct lu_env *env, struct lu_device *top);
void lu_device_get(struct lu_device *d);
void lu_device_put(struct lu_device *d);
int lu_device_init(struct lu_device *d, struct lu_device_type *t);
void lu_device_fini(struct lu_device *d);
int lu_object_header_init(struct lu_object_header *h);
void lu_object_header_fini(struct lu_object_header *h);
int lu_object_init(struct lu_object *o,
struct lu_object_header *h, struct lu_device *d);
void lu_object_fini(struct lu_object *o);
void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
void lu_object_add(struct lu_object *before, struct lu_object *o);
/**
* Helpers to initialize and finalize device types.
*/
int lu_device_type_init(struct lu_device_type *ldt);
void lu_device_type_fini(struct lu_device_type *ldt);
/** @} ctors */
/** \name caching
* Caching and reference counting.
* @{
*/
/**
* Acquire additional reference to the given object. This function is used to
* attain additional reference. To acquire initial reference use
* lu_object_find().
*/
static inline void lu_object_get(struct lu_object *o)
{
LASSERT(atomic_read(&o->lo_header->loh_ref) > 0);
atomic_inc(&o->lo_header->loh_ref);
}
/**
* Return true of object will not be cached after last reference to it is
* released.
*/
static inline int lu_object_is_dying(const struct lu_object_header *h)
{
return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
}
void lu_object_put(const struct lu_env *env, struct lu_object *o);
void lu_object_unhash(const struct lu_env *env, struct lu_object *o);
int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr);
void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
lu_printer_t printer);
struct lu_object *lu_object_find_at(const struct lu_env *env,
struct lu_device *dev,
const struct lu_fid *f,
const struct lu_object_conf *conf);
struct lu_object *lu_object_find_slice(const struct lu_env *env,
struct lu_device *dev,
const struct lu_fid *f,
const struct lu_object_conf *conf);
/** @} caching */
/** \name helpers
* Helpers.
* @{
*/
/**
* First (topmost) sub-object of given compound object
*/
static inline struct lu_object *lu_object_top(struct lu_object_header *h)
{
LASSERT(!list_empty(&h->loh_layers));
return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
}
/**
* Next sub-object in the layering
*/
static inline struct lu_object *lu_object_next(const struct lu_object *o)
{
return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage);
}
/**
* Pointer to the fid of this object.
*/
static inline const struct lu_fid *lu_object_fid(const struct lu_object *o)
{
return &o->lo_header->loh_fid;
}
/**
* return device operations vector for this object
*/
static inline const struct lu_device_operations *
lu_object_ops(const struct lu_object *o)
{
return o->lo_dev->ld_ops;
}
/**
* Given a compound object, find its slice, corresponding to the device type
* \a dtype.
*/
struct lu_object *lu_object_locate(struct lu_object_header *h,
const struct lu_device_type *dtype);
/**
* Printer function emitting messages through libcfs_debug_msg().
*/
int lu_cdebug_printer(const struct lu_env *env,
void *cookie, const char *format, ...);
/**
* Print object description followed by a user-supplied message.
*/
#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \
do { \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
CDEBUG(mask, format "\n", ## __VA_ARGS__); \
} \
} while (0)
/**
* Print short object description followed by a user-supplied message.
*/
#define LU_OBJECT_HEADER(mask, env, object, format, ...) \
do { \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
(object)->lo_header); \
lu_cdebug_printer(env, &msgdata, "\n"); \
CDEBUG(mask, format, ## __VA_ARGS__); \
} \
} while (0)
void lu_object_print (const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct lu_object *o);
void lu_object_header_print(const struct lu_env *env, void *cookie,
lu_printer_t printer,
const struct lu_object_header *hdr);
/**
* Check object consistency.
*/
int lu_object_invariant(const struct lu_object *o);
/**
* Check whether object exists, no matter on local or remote storage.
* Note: LOHA_EXISTS will be set once some one created the object,
* and it does not needs to be committed to storage.
*/
#define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS)
/**
* Check whether object on the remote storage.
*/
#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE)
static inline int lu_object_assert_exists(const struct lu_object *o)
{
return lu_object_exists(o);
}
static inline int lu_object_assert_not_exists(const struct lu_object *o)
{
return !lu_object_exists(o);
}
/**
* Attr of this object.
*/
static inline __u32 lu_object_attr(const struct lu_object *o)
{
LASSERT(lu_object_exists(o) != 0);
return o->lo_header->loh_attr;
}
static inline void lu_object_ref_add(struct lu_object *o,
const char *scope,
const void *source)
{
lu_ref_add(&o->lo_header->loh_reference, scope, source);
}
static inline void lu_object_ref_add_at(struct lu_object *o,
struct lu_ref_link *link,
const char *scope,
const void *source)
{
lu_ref_add_at(&o->lo_header->loh_reference, link, scope, source);
}
static inline void lu_object_ref_del(struct lu_object *o,
const char *scope, const void *source)
{
lu_ref_del(&o->lo_header->loh_reference, scope, source);
}
static inline void lu_object_ref_del_at(struct lu_object *o,
struct lu_ref_link *link,
const char *scope, const void *source)
{
lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source);
}
/** input params, should be filled out by mdt */
struct lu_rdpg {
/** hash */
__u64 rp_hash;
/** count in bytes */
unsigned int rp_count;
/** number of pages */
unsigned int rp_npages;
/** requested attr */
__u32 rp_attrs;
/** pointers to pages */
struct page **rp_pages;
};
enum lu_xattr_flags {
LU_XATTR_REPLACE = (1 << 0),
LU_XATTR_CREATE = (1 << 1)
};
/** @} helpers */
/** \name lu_context
* @{
*/
/** For lu_context health-checks */
enum lu_context_state {
LCS_INITIALIZED = 1,
LCS_ENTERED,
LCS_LEFT,
LCS_FINALIZED
};
/**
* lu_context. Execution context for lu_object methods. Currently associated
* with thread.
*
* All lu_object methods, except device and device type methods (called during
* system initialization and shutdown) are executed "within" some
* lu_context. This means, that pointer to some "current" lu_context is passed
* as an argument to all methods.
*
* All service ptlrpc threads create lu_context as part of their
* initialization. It is possible to create "stand-alone" context for other
* execution environments (like system calls).
*
* lu_object methods mainly use lu_context through lu_context_key interface
* that allows each layer to associate arbitrary pieces of data with each
* context (see pthread_key_create(3) for similar interface).
*
* On a client, lu_context is bound to a thread, see cl_env_get().
*
* \see lu_context_key
*/
struct lu_context {
/**
* lu_context is used on the client side too. Yet we don't want to
* allocate values of server-side keys for the client contexts and
* vice versa.
*
* To achieve this, set of tags in introduced. Contexts and keys are
* marked with tags. Key value are created only for context whose set
* of tags has non-empty intersection with one for key. Tags are taken
* from enum lu_context_tag.
*/
__u32 lc_tags;
enum lu_context_state lc_state;
/**
* Pointer to the home service thread. NULL for other execution
* contexts.
*/
struct ptlrpc_thread *lc_thread;
/**
* Pointer to an array with key values. Internal implementation
* detail.
*/
void **lc_value;
/**
* Linkage into a list of all remembered contexts. Only
* `non-transient' contexts, i.e., ones created for service threads
* are placed here.
*/
struct list_head lc_remember;
/**
* Version counter used to skip calls to lu_context_refill() when no
* keys were registered.
*/
unsigned lc_version;
/**
* Debugging cookie.
*/
unsigned lc_cookie;
};
/**
* lu_context_key interface. Similar to pthread_key.
*/
enum lu_context_tag {
/**
* Thread on md server
*/
LCT_MD_THREAD = 1 << 0,
/**
* Thread on dt server
*/
LCT_DT_THREAD = 1 << 1,
/**
* Context for transaction handle
*/
LCT_TX_HANDLE = 1 << 2,
/**
* Thread on client
*/
LCT_CL_THREAD = 1 << 3,
/**
* A per-request session on a server, and a per-system-call session on
* a client.
*/
LCT_SESSION = 1 << 4,
/**
* A per-request data on OSP device
*/
LCT_OSP_THREAD = 1 << 5,
/**
* MGS device thread
*/
LCT_MG_THREAD = 1 << 6,
/**
* Context for local operations
*/
LCT_LOCAL = 1 << 7,
/**
* session for server thread
**/
LCT_SERVER_SESSION = BIT(8),
/**
* Set when at least one of keys, having values in this context has
* non-NULL lu_context_key::lct_exit() method. This is used to
* optimize lu_context_exit() call.
*/
LCT_HAS_EXIT = 1 << 28,
/**
* Don't add references for modules creating key values in that context.
* This is only for contexts used internally by lu_object framework.
*/
LCT_NOREF = 1 << 29,
/**
* Key is being prepared for retiring, don't create new values for it.
*/
LCT_QUIESCENT = 1 << 30,
/**
* Context should be remembered.
*/
LCT_REMEMBER = 1 << 31,
/**
* Contexts usable in cache shrinker thread.
*/
LCT_SHRINKER = LCT_MD_THREAD | LCT_DT_THREAD | LCT_CL_THREAD |
LCT_NOREF
};
/**
* Key. Represents per-context value slot.
*
* Keys are usually registered when module owning the key is initialized, and
* de-registered when module is unloaded. Once key is registered, all new
* contexts with matching tags, will get key value. "Old" contexts, already
* initialized at the time of key registration, can be forced to get key value
* by calling lu_context_refill().
*
* Every key value is counted in lu_context_key::lct_used and acquires a
* reference on an owning module. This means, that all key values have to be
* destroyed before module can be unloaded. This is usually achieved by
* stopping threads started by the module, that created contexts in their
* entry functions. Situation is complicated by the threads shared by multiple
* modules, like ptlrpcd daemon on a client. To work around this problem,
* contexts, created in such threads, are `remembered' (see
* LCT_REMEMBER)---i.e., added into a global list. When module is preparing
* for unloading it does the following:
*
* - marks its keys as `quiescent' (lu_context_tag::LCT_QUIESCENT)
* preventing new key values from being allocated in the new contexts,
* and
*
* - scans a list of remembered contexts, destroying values of module
* keys, thus releasing references to the module.
*
* This is done by lu_context_key_quiesce(). If module is re-activated
* before key has been de-registered, lu_context_key_revive() call clears
* `quiescent' marker.
*
* lu_context code doesn't provide any internal synchronization for these
* activities---it's assumed that startup (including threads start-up) and
* shutdown are serialized by some external means.
*
* \see lu_context
*/
struct lu_context_key {
/**
* Set of tags for which values of this key are to be instantiated.
*/
__u32 lct_tags;
/**
* Value constructor. This is called when new value is created for a
* context. Returns pointer to new value of error pointer.
*/
void *(*lct_init)(const struct lu_context *ctx,
struct lu_context_key *key);
/**
* Value destructor. Called when context with previously allocated
* value of this slot is destroyed. \a data is a value that was returned
* by a matching call to lu_context_key::lct_init().
*/
void (*lct_fini)(const struct lu_context *ctx,
struct lu_context_key *key, void *data);
/**
* Optional method called on lu_context_exit() for all allocated
* keys. Can be used by debugging code checking that locks are
* released, etc.
*/
void (*lct_exit)(const struct lu_context *ctx,
struct lu_context_key *key, void *data);
/**
* Internal implementation detail: index within lu_context::lc_value[]
* reserved for this key.
*/
int lct_index;
/**
* Internal implementation detail: number of values created for this
* key.
*/
atomic_t lct_used;
/**
* Internal implementation detail: module for this key.
*/
struct module *lct_owner;
/**
* References to this key. For debugging.
*/
struct lu_ref lct_reference;
};
#define LU_KEY_INIT(mod, type) \
static void *mod##_key_init(const struct lu_context *ctx, \
struct lu_context_key *key) \
{ \
type *value; \
\
CLASSERT(PAGE_SIZE >= sizeof(*value)); \
\
value = kzalloc(sizeof(*value), GFP_NOFS); \
if (!value) \
value = ERR_PTR(-ENOMEM); \
\
return value; \
} \
struct __##mod##__dummy_init {; } /* semicolon catcher */
#define LU_KEY_FINI(mod, type) \
static void mod##_key_fini(const struct lu_context *ctx, \
struct lu_context_key *key, void *data) \
{ \
type *info = data; \
\
kfree(info); \
} \
struct __##mod##__dummy_fini {; } /* semicolon catcher */
#define LU_KEY_INIT_FINI(mod, type) \
LU_KEY_INIT(mod, type); \
LU_KEY_FINI(mod, type)
#define LU_CONTEXT_KEY_DEFINE(mod, tags) \
struct lu_context_key mod##_thread_key = { \
.lct_tags = tags, \
.lct_init = mod##_key_init, \
.lct_fini = mod##_key_fini \
}
#define LU_CONTEXT_KEY_INIT(key) \
do { \
(key)->lct_owner = THIS_MODULE; \
} while (0)
int lu_context_key_register(struct lu_context_key *key);
void lu_context_key_degister(struct lu_context_key *key);
void *lu_context_key_get(const struct lu_context *ctx,
const struct lu_context_key *key);
void lu_context_key_quiesce(struct lu_context_key *key);
void lu_context_key_revive(struct lu_context_key *key);
/*
* LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
* owning module.
*/
#define LU_KEY_INIT_GENERIC(mod) \
static void mod##_key_init_generic(struct lu_context_key *k, ...) \
{ \
struct lu_context_key *key = k; \
va_list args; \
\
va_start(args, k); \
do { \
LU_CONTEXT_KEY_INIT(key); \
key = va_arg(args, struct lu_context_key *); \
} while (key); \
va_end(args); \
}
#define LU_TYPE_INIT(mod, ...) \
LU_KEY_INIT_GENERIC(mod) \
static int mod##_type_init(struct lu_device_type *t) \
{ \
mod##_key_init_generic(__VA_ARGS__, NULL); \
return lu_context_key_register_many(__VA_ARGS__, NULL); \
} \
struct __##mod##_dummy_type_init {; }
#define LU_TYPE_FINI(mod, ...) \
static void mod##_type_fini(struct lu_device_type *t) \
{ \
lu_context_key_degister_many(__VA_ARGS__, NULL); \
} \
struct __##mod##_dummy_type_fini {; }
#define LU_TYPE_START(mod, ...) \
static void mod##_type_start(struct lu_device_type *t) \
{ \
lu_context_key_revive_many(__VA_ARGS__, NULL); \
} \
struct __##mod##_dummy_type_start {; }
#define LU_TYPE_STOP(mod, ...) \
static void mod##_type_stop(struct lu_device_type *t) \
{ \
lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
} \
struct __##mod##_dummy_type_stop {; }
#define LU_TYPE_INIT_FINI(mod, ...) \
LU_TYPE_INIT(mod, __VA_ARGS__); \
LU_TYPE_FINI(mod, __VA_ARGS__); \
LU_TYPE_START(mod, __VA_ARGS__); \
LU_TYPE_STOP(mod, __VA_ARGS__)
int lu_context_init(struct lu_context *ctx, __u32 tags);
void lu_context_fini(struct lu_context *ctx);
void lu_context_enter(struct lu_context *ctx);
void lu_context_exit(struct lu_context *ctx);
int lu_context_refill(struct lu_context *ctx);
/*
* Helper functions to operate on multiple keys. These are used by the default
* device type operations, defined by LU_TYPE_INIT_FINI().
*/
int lu_context_key_register_many(struct lu_context_key *k, ...);
void lu_context_key_degister_many(struct lu_context_key *k, ...);
void lu_context_key_revive_many(struct lu_context_key *k, ...);
void lu_context_key_quiesce_many(struct lu_context_key *k, ...);
/**
* Environment.
*/
struct lu_env {
/**
* "Local" context, used to store data instead of stack.
*/
struct lu_context le_ctx;
/**
* "Session" context for per-request data.
*/
struct lu_context *le_ses;
};
int lu_env_init(struct lu_env *env, __u32 tags);
void lu_env_fini(struct lu_env *env);
int lu_env_refill(struct lu_env *env);
/** @} lu_context */
/**
* Output site statistical counters into a buffer. Suitable for
* ll_rd_*()-style functions.
*/
int lu_site_stats_print(const struct lu_site *s, struct seq_file *m);
/**
* Common name structure to be passed around for various name related methods.
*/
struct lu_name {
const char *ln_name;
int ln_namelen;
};
/**
* Validate names (path components)
*
* To be valid \a name must be non-empty, '\0' terminated of length \a
* name_len, and not contain '/'. The maximum length of a name (before
* say -ENAMETOOLONG will be returned) is really controlled by llite
* and the server. We only check for something insane coming from bad
* integer handling here.
*/
static inline bool lu_name_is_valid_2(const char *name, size_t name_len)
{
return name && name_len > 0 && name_len < INT_MAX &&
name[name_len] == '\0' && strlen(name) == name_len &&
!memchr(name, '/', name_len);
}
/**
* Common buffer structure to be passed around for various xattr_{s,g}et()
* methods.
*/
struct lu_buf {
void *lb_buf;
size_t lb_len;
};
#define DLUBUF "(%p %zu)"
#define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len
/**
* One-time initializers, called at obdclass module initialization, not
* exported.
*/
/**
* Initialization of global lu_* data.
*/
int lu_global_init(void);
/**
* Dual to lu_global_init().
*/
void lu_global_fini(void);
struct lu_kmem_descr {
struct kmem_cache **ckd_cache;
const char *ckd_name;
const size_t ckd_size;
};
int lu_kmem_init(struct lu_kmem_descr *caches);
void lu_kmem_fini(struct lu_kmem_descr *caches);
void lu_buf_free(struct lu_buf *buf);
void lu_buf_alloc(struct lu_buf *buf, size_t size);
void lu_buf_realloc(struct lu_buf *buf, size_t size);
int lu_buf_check_and_grow(struct lu_buf *buf, size_t len);
struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len);
/** @} lu */
#endif /* __LUSTRE_LU_OBJECT_H */
|