llite_lib.c 90.3 KB
Newer Older
1
/*
kalpak's avatar
   
kalpak committed
2
3
4
 * GPL HEADER START
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
 *
kalpak's avatar
   
kalpak committed
6
7
8
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 only,
 * as published by the Free Software Foundation.
9
 *
kalpak's avatar
   
kalpak committed
10
11
12
13
14
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License version 2 for more details (a copy is included
 * in the LICENSE file that accompanied this code).
15
 *
kalpak's avatar
   
kalpak committed
16
 * You should have received a copy of the GNU General Public License
kalpak's avatar
   
kalpak committed
17
 * version 2 along with this program; If not, see
18
 * http://www.gnu.org/licenses/gpl-2.0.html
19
 *
kalpak's avatar
   
kalpak committed
20
21
22
 * GPL HEADER END
 */
/*
23
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
kalpak's avatar
   
kalpak committed
24
 * Use is subject to license terms.
25
 *
26
 * Copyright (c) 2011, 2017, Intel Corporation.
27
 */
kalpak's avatar
   
kalpak committed
28
29
30
31
32
33
34
/*
 * This file is part of Lustre, http://www.lustre.org/
 * Lustre is a trademark of Sun Microsystems, Inc.
 *
 * lustre/llite/llite_lib.c
 *
 * Lustre Light Super operations
35
36
37
38
 */

#define DEBUG_SUBSYSTEM S_LLITE

39
#include <linux/cpu.h>
40
#include <linux/module.h>
41
#include <linux/random.h>
42
#include <linux/statfs.h>
James Simmons's avatar
James Simmons committed
43
#include <linux/time.h>
zab's avatar
b=2776    
zab committed
44
#include <linux/types.h>
45
#include <libcfs/linux/linux-uuid.h>
46
#include <linux/version.h>
anserper's avatar
anserper committed
47
#include <linux/mm.h>
48
#include <linux/user_namespace.h>
49
#include <linux/delay.h>
50
#include <linux/uidgid.h>
51
#include <linux/security.h>
52
#include <linux/fs_struct.h>
nathan's avatar
nathan committed
53

54
55
56
#ifndef HAVE_CPUS_READ_LOCK
#include <libcfs/linux/linux-cpu.h>
#endif
57
#include <uapi/linux/lustre/lustre_ioctl.h>
58
59
60
61
#ifdef HAVE_UAPI_LINUX_MOUNT_H
#include <uapi/linux/mount.h>
#endif

nathan's avatar
nathan committed
62
63
64
65
#include <lustre_ha.h>
#include <lustre_dlm.h>
#include <lprocfs_status.h>
#include <lustre_disk.h>
66
#include <uapi/linux/lustre/lustre_param.h>
tappro's avatar
tappro committed
67
#include <lustre_log.h>
nikita's avatar
nikita committed
68
#include <cl_object.h>
shadow's avatar
shadow committed
69
#include <obd_cksum.h>
70
71
#include "llite_internal.h"

72
struct kmem_cache *ll_file_data_slab;
nathan's avatar
nathan committed
73

74
#ifndef log2
75
#define log2(n) ffz(~(n))
76
77
#endif

78
79
80
81
82
83
84
85
86
87
88
/**
 * If there is only one number of core visible to Lustre,
 * async readahead will be disabled, to avoid massive over
 * subscription, we use 1/2 of active cores as default max
 * async readahead requests.
 */
static inline unsigned int ll_get_ra_async_max_active(void)
{
	return cfs_cpt_weight(cfs_cpt_tab, CFS_CPT_ANY) >> 1;
}

nathan's avatar
nathan committed
89
static struct ll_sb_info *ll_init_sbi(void)
90
{
91
92
93
	struct ll_sb_info *sbi = NULL;
	unsigned long pages;
	unsigned long lru_page_max;
94
	struct sysinfo si;
95
	int rc;
96
	int i;
97

98
	ENTRY;
99

100
101
	OBD_ALLOC_PTR(sbi);
	if (sbi == NULL)
102
103
104
105
106
		RETURN(ERR_PTR(-ENOMEM));

	rc = pcc_super_init(&sbi->ll_pcc_super);
	if (rc < 0)
		GOTO(out_sbi, rc);
107

108
109
110
111
	spin_lock_init(&sbi->ll_lock);
	mutex_init(&sbi->ll_lco.lco_lock);
	spin_lock_init(&sbi->ll_pp_extent_lock);
	spin_lock_init(&sbi->ll_process_lock);
tappro's avatar
tappro committed
112
        sbi->ll_rw_stats_on = 0;
113
	sbi->ll_statfs_max_age = OBD_STATFS_CACHE_SECONDS;
anserper's avatar
anserper committed
114
115
116

        si_meminfo(&si);
        pages = si.totalram - si.totalhigh;
117
	lru_page_max = pages / 2;
118

119
	sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active();
120
	sbi->ll_ra_info.ll_readahead_wq =
121
122
123
124
125
		cfs_cpt_bind_workqueue("ll-readahead-wq", cfs_cpt_tab,
				       0, CFS_CPT_ANY,
				       sbi->ll_ra_info.ra_async_max_active);
	if (IS_ERR(sbi->ll_ra_info.ll_readahead_wq))
		GOTO(out_pcc, rc = PTR_ERR(sbi->ll_ra_info.ll_readahead_wq));
126

127
	/* initialize ll_cache data */
128
	sbi->ll_cache = cl_cache_init(lru_page_max);
129
	if (sbi->ll_cache == NULL)
130
		GOTO(out_destroy_ra, rc = -ENOMEM);
131

132
	sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
133
						    SBI_DEFAULT_READ_AHEAD_MAX);
134
135
	sbi->ll_ra_info.ra_async_pages_per_file_threshold =
				sbi->ll_ra_info.ra_max_pages_per_file;
136
	sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
137
	sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
138
	atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
nathan's avatar
nathan committed
139

140
        sbi->ll_flags |= LL_SBI_VERBOSE;
pravin's avatar
pravin committed
141
#ifdef ENABLE_CHECKSUM
liuy's avatar
liuy committed
142
143
        sbi->ll_flags |= LL_SBI_CHECKSUM;
#endif
144
145
146
#ifdef ENABLE_FLOCK
	sbi->ll_flags |= LL_SBI_FLOCK;
#endif
liuy's avatar
liuy committed
147

yury's avatar
b=13696    
yury committed
148
149
150
#ifdef HAVE_LRU_RESIZE_SUPPORT
        sbi->ll_flags |= LL_SBI_LRU_RESIZE;
#endif
151
	sbi->ll_flags |= LL_SBI_LAZYSTATFS;
yury's avatar
b=13696    
yury committed
152

liuy's avatar
liuy committed
153
        for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
154
155
156
157
		spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
			       pp_r_hist.oh_lock);
		spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
			       pp_w_hist.oh_lock);
nathan's avatar
nathan committed
158
159
        }

160
	/* metadata statahead is enabled by default */
161
	sbi->ll_sa_running_max = LL_SA_RUNNING_DEF;
162
163
164
	sbi->ll_sa_max = LL_SA_RPC_DEF;
	atomic_set(&sbi->ll_sa_total, 0);
	atomic_set(&sbi->ll_sa_wrong, 0);
165
	atomic_set(&sbi->ll_sa_running, 0);
166
167
	atomic_set(&sbi->ll_agl_total, 0);
	sbi->ll_flags |= LL_SBI_AGL_ENABLED;
168
	sbi->ll_flags |= LL_SBI_FAST_READ;
169
	sbi->ll_flags |= LL_SBI_TINY_WRITE;
170
	ll_sbi_set_encrypt(sbi, true);
171

172
173
174
175
	/* root squash */
	sbi->ll_squash.rsi_uid = 0;
	sbi->ll_squash.rsi_gid = 0;
	INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
176
	spin_lock_init(&sbi->ll_squash.rsi_lock);
177

Li Xi's avatar
Li Xi committed
178
179
180
	/* Per-filesystem file heat */
	sbi->ll_heat_decay_weight = SBI_DEFAULT_HEAT_DECAY_WEIGHT;
	sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
181
	RETURN(sbi);
182
183
out_destroy_ra:
	destroy_workqueue(sbi->ll_ra_info.ll_readahead_wq);
184
185
186
187
188
out_pcc:
	pcc_super_fini(&sbi->ll_pcc_super);
out_sbi:
	OBD_FREE_PTR(sbi);
	RETURN(ERR_PTR(rc));
189
190
}

191
static void ll_free_sbi(struct super_block *sb)
192
{
193
194
	struct ll_sb_info *sbi = ll_s2sbi(sb);
	ENTRY;
195

196
	if (sbi != NULL) {
197
198
		if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
			cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
199
200
		if (sbi->ll_ra_info.ll_readahead_wq)
			destroy_workqueue(sbi->ll_ra_info.ll_readahead_wq);
201
202
203
204
		if (sbi->ll_cache != NULL) {
			cl_cache_decref(sbi->ll_cache);
			sbi->ll_cache = NULL;
		}
205
		pcc_super_fini(&sbi->ll_pcc_super);
206
207
208
		OBD_FREE(sbi, sizeof(*sbi));
	}
	EXIT;
209
210
}

211
static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
212
{
213
	struct inode *root = NULL;
214
215
216
217
218
219
220
	struct ll_sb_info *sbi = ll_s2sbi(sb);
	struct obd_statfs *osfs = NULL;
	struct ptlrpc_request *request = NULL;
	struct obd_connect_data *data = NULL;
	struct obd_uuid *uuid;
	struct md_op_data *op_data;
	struct lustre_md lmd;
221
	u64 valid;
222
	int size, err, checksum;
nathan's avatar
nathan committed
223

224
225
226
	ENTRY;
	sbi->ll_md_obd = class_name2obd(md);
	if (!sbi->ll_md_obd) {
tappro's avatar
tappro committed
227
                CERROR("MD %s: not setup or attached\n", md);
228
229
230
                RETURN(-EINVAL);
        }

tappro's avatar
tappro committed
231
        OBD_ALLOC_PTR(data);
nathan's avatar
nathan committed
232
233
        if (data == NULL)
                RETURN(-ENOMEM);
ericm's avatar
ericm committed
234

235
236
237
238
239
240
        OBD_ALLOC_PTR(osfs);
        if (osfs == NULL) {
                OBD_FREE_PTR(data);
                RETURN(-ENOMEM);
        }

241
242
243
244
	/* pass client page size via ocd_grant_blkbits, the server should report
	 * back its backend blocksize for grant calculation purpose */
	data->ocd_grant_blkbits = PAGE_SHIFT;

245
	/* indicate MDT features supported by this client */
246
247
248
	data->ocd_connect_flags = OBD_CONNECT_IBITS    | OBD_CONNECT_NODEVOH  |
				  OBD_CONNECT_ATTRFID  | OBD_CONNECT_GRANT |
				  OBD_CONNECT_VERSION  | OBD_CONNECT_BRW_SIZE |
249
				  OBD_CONNECT_SRVLOCK  | OBD_CONNECT_TRUNCLOCK|
250
251
252
				  OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA |
				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID     |
				  OBD_CONNECT_AT       | OBD_CONNECT_LOV_V3   |
253
254
				  OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
				  OBD_CONNECT_64BITHASH |
Niu Yawei's avatar
Niu Yawei committed
255
				  OBD_CONNECT_EINPROGRESS |
256
				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
257
				  OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS|
258
				  OBD_CONNECT_MAX_EASIZE |
259
				  OBD_CONNECT_FLOCK_DEAD |
260
				  OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
261
				  OBD_CONNECT_OPEN_BY_FID |
262
				  OBD_CONNECT_DIR_STRIPE |
263
				  OBD_CONNECT_BULK_MBITS | OBD_CONNECT_CKSUM |
264
				  OBD_CONNECT_SUBTREE |
265
				  OBD_CONNECT_MULTIMODRPCS |
266
267
				  OBD_CONNECT_GRANT_PARAM |
				  OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2;
268

269
	data->ocd_connect_flags2 = OBD_CONNECT2_DIR_MIGRATE |
270
				   OBD_CONNECT2_SUM_STATFS |
271
				   OBD_CONNECT2_OVERSTRIPING |
272
273
				   OBD_CONNECT2_FLR |
				   OBD_CONNECT2_LOCK_CONVERT |
274
				   OBD_CONNECT2_ARCHIVE_ID_ARRAY |
275
				   OBD_CONNECT2_INC_XID |
276
				   OBD_CONNECT2_LSOM |
277
				   OBD_CONNECT2_ASYNC_DISCARD |
278
				   OBD_CONNECT2_PCC |
279
280
				   OBD_CONNECT2_CRUSH |
				   OBD_CONNECT2_GETATTR_PFID;
281

yury's avatar
b=2262    
yury committed
282
#ifdef HAVE_LRU_RESIZE_SUPPORT
yury's avatar
b=13696    
yury committed
283
284
        if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
                data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
yury's avatar
b=2262    
yury committed
285
#endif
286
	data->ocd_connect_flags |= OBD_CONNECT_ACL_FLAGS;
287

288
	data->ocd_cksum_types = obd_cksum_types_supported_client();
289

290
291
292
	if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
		/* flag mdc connection as lightweight, only used for test
		 * purpose, use with care */
293
		data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
294

295
296
	data->ocd_ibits_known = MDS_INODELOCK_FULL;
	data->ocd_version = LUSTRE_VERSION_CODE;
nathan's avatar
nathan committed
297

298
299
300
301
	if (sb->s_flags & SB_RDONLY)
		data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
	if (sbi->ll_flags & LL_SBI_USER_XATTR)
		data->ocd_connect_flags |= OBD_CONNECT_XATTR;
302

303
#ifdef SB_NOSEC
304
305
306
	/* Setting this indicates we correctly support S_NOSEC (See kernel
	 * commit 9e1f1de02c2275d7172e18dc4e7c2065777611bf)
	 */
307
	sb->s_flags |= SB_NOSEC;
308
309
#endif

310
311
312
313
314
315
	if (sbi->ll_flags & LL_SBI_FLOCK)
		sbi->ll_fop = &ll_file_operations_flock;
	else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
		sbi->ll_fop = &ll_file_operations;
	else
		sbi->ll_fop = &ll_file_operations_noflock;
tappro's avatar
tappro committed
316

317
318
319
320
	/* always ping even if server suppress_pings */
	if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
		data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;

321
	obd_connect_set_secctx(data);
322
323
	if (ll_sbi_has_encrypt(sbi))
		obd_connect_set_enc(data);
324

325
326
327
328
#if defined(CONFIG_SECURITY)
	data->ocd_connect_flags2 |= OBD_CONNECT2_SELINUX_POLICY;
#endif

329
	data->ocd_brw_size = MD_MAX_BRW_SIZE;
330

331
	err = obd_connect(NULL, &sbi->ll_md_exp, sbi->ll_md_obd,
332
			  &sbi->ll_sb_uuid, data, sbi->ll_cache);
333
334
335
336
337
338
339
340
341
342
	if (err == -EBUSY) {
		LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing "
				   "recovery, of which this client is not a "
				   "part. Please wait for recovery to complete,"
				   " abort, or time out.\n", md);
		GOTO(out, err);
	} else if (err) {
		CERROR("cannot connect to %s: rc = %d\n", md, err);
		GOTO(out, err);
	}
tappro's avatar
tappro committed
343

344
345
	sbi->ll_md_exp->exp_connect_data = *data;

346
347
348
349
350
351
352
353
	err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp,
			   LUSTRE_SEQ_METADATA);
	if (err) {
		CERROR("%s: Can't init metadata layer FID infrastructure, "
		       "rc = %d\n", sbi->ll_md_exp->exp_obd->obd_name, err);
		GOTO(out_md, err);
	}

wangdi's avatar
wangdi committed
354
355
356
	/* For mount, we only need fs info from MDT0, and also in DNE, it
	 * can make sure the client can be mounted as long as MDT0 is
	 * avaible */
357
	err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
358
			ktime_get_seconds() - sbi->ll_statfs_max_age,
wangdi's avatar
wangdi committed
359
			OBD_STATFS_FOR_MDT0);
360
	if (err)
361
		GOTO(out_md_fid, err);
362
363
364
365
366
367
368
369

	/* This needs to be after statfs to ensure connect has finished.
	 * Note that "data" does NOT contain the valid connect reply.
	 * If connecting to a 1.8 server there will be no LMV device, so
	 * we can access the MDC export directly and exp_connect_flags will
	 * be non-zero, but if accessing an upgraded 2.1 server it will
	 * have the correct flags filled in.
	 * XXX: fill in the LMV exp_connect_flags from MDC(s). */
370
371
	valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD;
	if (exp_connect_flags(sbi->ll_md_exp) != 0 &&
372
373
374
	    valid != CLIENT_CONNECT_MDT_REQD) {
		char *buf;

375
376
		OBD_ALLOC_WAIT(buf, PAGE_SIZE);
		obd_connect_flags2str(buf, PAGE_SIZE,
377
				      valid ^ CLIENT_CONNECT_MDT_REQD, 0, ",");
378
379
380
381
382
		LCONSOLE_ERROR_MSG(0x170, "Server %s does not support "
				   "feature(s) needed for correct operation "
				   "of this client (%s). Please upgrade "
				   "server or downgrade client.\n",
				   sbi->ll_md_exp->exp_obd->obd_name, buf);
383
		OBD_FREE(buf, PAGE_SIZE);
384
		GOTO(out_md_fid, err = -EPROTO);
385
	}
386

387
388
	size = sizeof(*data);
	err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
389
			   KEY_CONN_DATA,  &size, data);
390
391
392
	if (err) {
		CERROR("%s: Get connect data failed: rc = %d\n",
		       sbi->ll_md_exp->exp_obd->obd_name, err);
393
		GOTO(out_md_fid, err);
394
	}
395

wangdi's avatar
wangdi committed
396
	LASSERT(osfs->os_bsize);
397
398
399
400
401
402
	sb->s_blocksize = osfs->os_bsize;
	sb->s_blocksize_bits = log2(osfs->os_bsize);
	sb->s_magic = LL_SUPER_MAGIC;
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sbi->ll_namelen = osfs->os_namelen;
	sbi->ll_mnt.mnt = current->fs->root.mnt;
403

404
405
406
407
408
409
	if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
	    !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
		LCONSOLE_INFO("Disabling user_xattr feature because "
			      "it is not supported on the server\n");
		sbi->ll_flags &= ~LL_SBI_USER_XATTR;
	}
ericm's avatar
ericm committed
410

411
412
413
	if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
#ifdef SB_POSIXACL
		sb->s_flags |= SB_POSIXACL;
nathan's avatar
nathan committed
414
#endif
415
416
417
418
419
		sbi->ll_flags |= LL_SBI_ACL;
	} else {
		LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
#ifdef SB_POSIXACL
		sb->s_flags &= ~SB_POSIXACL;
tappro's avatar
tappro committed
420
#endif
421
422
		sbi->ll_flags &= ~LL_SBI_ACL;
	}
nathan's avatar
nathan committed
423

424
425
	if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
		sbi->ll_flags |= LL_SBI_64BIT_HASH;
426

427
	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
428
429
		sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;

430
431
432
	if (obd_connect_has_secctx(data))
		sbi->ll_flags |= LL_SBI_FILE_SECCTX;

433
434
435
436
437
438
439
440
	if (ll_sbi_has_encrypt(sbi) && !obd_connect_has_enc(data)) {
		if (ll_sbi_has_test_dummy_encryption(sbi))
			LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
				      sbi->ll_fsname,
				      sbi->ll_md_exp->exp_obd->obd_name);
		ll_sbi_set_encrypt(sbi, false);
	}

441
442
443
444
	if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
		if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
			LCONSOLE_INFO("%s: disabling xattr cache due to "
				      "unknown maximum xattr size.\n", dt);
445
446
447
		} else if (!sbi->ll_xattr_cache_set) {
			/* If xattr_cache is already set (no matter 0 or 1)
			 * during processing llog, it won't be enabled here. */
448
449
450
451
452
			sbi->ll_flags |= LL_SBI_XATTR_CACHE;
			sbi->ll_xattr_cache_enabled = 1;
		}
	}

453
454
	sbi->ll_dt_obd = class_name2obd(dt);
	if (!sbi->ll_dt_obd) {
455
		CERROR("DT %s: not setup or attached\n", dt);
456
		GOTO(out_md_fid, err = -ENODEV);
457
	}
458

459
460
461
462
	/* pass client page size via ocd_grant_blkbits, the server should report
	 * back its backend blocksize for grant calculation purpose */
	data->ocd_grant_blkbits = PAGE_SHIFT;

463
	/* indicate OST features supported by this client */
464
	data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
465
				  OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
466
467
468
469
470
				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
				  OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
				  OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA |
				  OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
				  OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
Niu Yawei's avatar
Niu Yawei committed
471
				  OBD_CONNECT_EINPROGRESS |
472
				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
473
				  OBD_CONNECT_LAYOUTLOCK |
474
				  OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
475
				  OBD_CONNECT_BULK_MBITS | OBD_CONNECT_SHORTIO |
476
				  OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK;
477
478
	data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD |
				   OBD_CONNECT2_INC_XID;
479

480
481
482
	if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM))
		data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM;

483
484
	/* OBD_CONNECT_CKSUM should always be set, even if checksums are
	 * disabled by default, because it can still be enabled on the
485
486
487
	 * fly via /sys. As a consequence, we still need to come to an
	 * agreement on the supported algorithms at connect time
	 */
488
489
490
491
492
	data->ocd_connect_flags |= OBD_CONNECT_CKSUM;

	if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
		data->ocd_cksum_types = OBD_CKSUM_ADLER;
	else
493
		data->ocd_cksum_types = obd_cksum_types_supported_client();
johann's avatar
johann committed
494

yury's avatar
b=2262    
yury committed
495
#ifdef HAVE_LRU_RESIZE_SUPPORT
496
	data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
yury's avatar
b=2262    
yury committed
497
#endif
498
499
500
501
	/* always ping even if server suppress_pings */
	if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
		data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;

502
503
504
	if (ll_sbi_has_encrypt(sbi))
		obd_connect_set_enc(data);

505
	CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d "
506
507
	       "ocd_grant: %d\n", data->ocd_connect_flags,
	       data->ocd_version, data->ocd_grant);
ericm's avatar
ericm committed
508

509
510
	sbi->ll_dt_obd->obd_upcall.onu_owner = &sbi->ll_lco;
	sbi->ll_dt_obd->obd_upcall.onu_upcall = cl_ocd_update;
511

512
	data->ocd_brw_size = DT_MAX_BRW_SIZE;
513

514
	err = obd_connect(NULL, &sbi->ll_dt_exp, sbi->ll_dt_obd,
515
			  &sbi->ll_sb_uuid, data, sbi->ll_cache);
516
517
518
519
520
521
522
523
524
525
526
	if (err == -EBUSY) {
		LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing "
				   "recovery, of which this client is not a "
				   "part.  Please wait for recovery to "
				   "complete, abort, or time out.\n", dt);
		GOTO(out_md, err);
	} else if (err) {
		CERROR("%s: Cannot connect to %s: rc = %d\n",
		       sbi->ll_dt_exp->exp_obd->obd_name, dt, err);
		GOTO(out_md, err);
	}
bobijam's avatar
bobijam committed
527

528
529
530
531
532
533
534
535
536
537
	if (ll_sbi_has_encrypt(sbi) &&
	    !obd_connect_has_enc(&sbi->ll_dt_obd->u.lov.lov_ocd)) {
		if (ll_sbi_has_test_dummy_encryption(sbi))
			LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
				      sbi->ll_fsname, dt);
		ll_sbi_set_encrypt(sbi, false);
	} else if (ll_sbi_has_test_dummy_encryption(sbi)) {
		LCONSOLE_WARN("Test dummy encryption mode enabled\n");
	}

538
539
	sbi->ll_dt_exp->exp_connect_data = *data;

540
	/* Don't change value if it was specified in the config log */
541
	if (sbi->ll_ra_info.ra_max_read_ahead_whole_pages == -1) {
542
		sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
543
			max_t(unsigned long, SBI_DEFAULT_READ_AHEAD_WHOLE_MAX,
544
			      (data->ocd_brw_size >> PAGE_SHIFT));
545
546
547
548
549
		if (sbi->ll_ra_info.ra_max_read_ahead_whole_pages >
		    sbi->ll_ra_info.ra_max_pages_per_file)
			sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
				sbi->ll_ra_info.ra_max_pages_per_file;
	}
550

551
552
553
554
555
556
557
558
	err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp,
			   LUSTRE_SEQ_METADATA);
	if (err) {
		CERROR("%s: Can't init data layer FID infrastructure, "
		       "rc = %d\n", sbi->ll_dt_exp->exp_obd->obd_name, err);
		GOTO(out_dt, err);
	}

559
	mutex_lock(&sbi->ll_lco.lco_lock);
560
561
562
	sbi->ll_lco.lco_flags = data->ocd_connect_flags;
	sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
	sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
563
	mutex_unlock(&sbi->ll_lco.lco_lock);
yury's avatar
yury committed
564

565
	fid_zero(&sbi->ll_root_fid);
566
567
	err = md_get_root(sbi->ll_md_exp, get_mount_fileset(sb),
			   &sbi->ll_root_fid);
568
569
	if (err) {
		CERROR("cannot mds_connect: rc = %d\n", err);
570
		GOTO(out_lock_cn_cb, err);
571
572
573
574
575
	}
	if (!fid_is_sane(&sbi->ll_root_fid)) {
		CERROR("%s: Invalid root fid "DFID" during mount\n",
		       sbi->ll_md_exp->exp_obd->obd_name,
		       PFID(&sbi->ll_root_fid));
576
		GOTO(out_lock_cn_cb, err = -EINVAL);
577
578
	}
	CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
579

580
	sb->s_op = &lustre_super_operations;
581
	sb->s_xattr = ll_xattr_handlers;
582
#if THREAD_SIZE >= 8192 /*b=17630*/
583
	sb->s_export_op = &lustre_export_operations;
584
#endif
585
586
587
#ifdef HAVE_LUSTRE_CRYPTO
	llcrypt_set_ops(sb, &lustre_cryptops);
#endif
588

589
590
	/* make root inode
	 * XXX: move this to after cbd setup? */
591
	valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
592
	if (sbi->ll_flags & LL_SBI_ACL)
593
594
595
596
		valid |= OBD_MD_FLACL;

	OBD_ALLOC_PTR(op_data);
	if (op_data == NULL)
597
		GOTO(out_lock_cn_cb, err = -ENOMEM);
598
599
600
601
602
603

	op_data->op_fid1 = sbi->ll_root_fid;
	op_data->op_mode = 0;
	op_data->op_valid = valid;

	err = md_getattr(sbi->ll_md_exp, op_data, &request);
604

605
606
607
608
	OBD_FREE_PTR(op_data);
	if (err) {
		CERROR("%s: md_getattr failed for root: rc = %d\n",
		       sbi->ll_md_exp->exp_obd->obd_name, err);
609
		GOTO(out_lock_cn_cb, err);
610
611
612
613
614
615
616
	}

	err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
			       sbi->ll_md_exp, &lmd);
	if (err) {
		CERROR("failed to understand root inode md: rc = %d\n", err);
		ptlrpc_req_finished(request);
617
		GOTO(out_lock_cn_cb, err);
618
	}
619

620
	LASSERT(fid_is_sane(&sbi->ll_root_fid));
621
	root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
622
					    sbi->ll_flags & LL_SBI_32BIT_API),
623
		       &lmd);
624
625
	md_free_lustre_md(sbi->ll_md_exp, &lmd);
	ptlrpc_req_finished(request);
626

627
	if (IS_ERR(root)) {
628
		lmd_clear_acl(&lmd);
629
630
		err = IS_ERR(root) ? PTR_ERR(root) : -EBADF;
		root = NULL;
631
632
		CERROR("%s: bad ll_iget() for root: rc = %d\n",
		       sbi->ll_fsname, err);
633
634
		GOTO(out_root, err);
	}
635

636
	checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
637
638
639
640
641
642
643
644
645
	if (sbi->ll_checksum_set) {
		err = obd_set_info_async(NULL, sbi->ll_dt_exp,
					 sizeof(KEY_CHECKSUM), KEY_CHECKSUM,
					 sizeof(checksum), &checksum, NULL);
		if (err) {
			CERROR("%s: Set checksum failed: rc = %d\n",
			       sbi->ll_dt_exp->exp_obd->obd_name, err);
			GOTO(out_root, err);
		}
646
	}
647
	cl_sb_init(sb);
liuy's avatar
liuy committed
648

649
650
	sb->s_root = d_make_root(root);
	if (sb->s_root == NULL) {
651
652
653
654
		err = -ENOMEM;
		CERROR("%s: can't make root dentry: rc = %d\n",
		       sbi->ll_fsname, err);
		GOTO(out_root, err);
655
	}
shadow's avatar
shadow committed
656

657
	sbi->ll_sdev_orig = sb->s_dev;
yury's avatar
b=16772    
yury committed
658

659
660
661
662
663
664
	/* We set sb->s_dev equal on all lustre clients in order to support
	 * NFS export clustering.  NFSD requires that the FSID be the same
	 * on all clients. */
	/* s_dev is also used in lt_compare() to compare two fs, but that is
	 * only a node-local comparison. */
	uuid = obd_get_uuid(sbi->ll_md_exp);
665
	if (uuid != NULL)
666
		sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
shadow's avatar
shadow committed
667

668
669
670
671
	if (data != NULL)
		OBD_FREE_PTR(data);
	if (osfs != NULL)
		OBD_FREE_PTR(osfs);
672

673
674
675
676
	if (sbi->ll_dt_obd) {
		err = sysfs_create_link(&sbi->ll_kset.kobj,
					&sbi->ll_dt_obd->obd_kset.kobj,
					sbi->ll_dt_obd->obd_type->typ_name);
677
		if (err < 0) {
678
			CERROR("%s: could not register %s in llite: rc = %d\n",
679
			       dt, sbi->ll_fsname, err);
680
681
			err = 0;
		}
682
683
684
685
686
687
	}

	if (sbi->ll_md_obd) {
		err = sysfs_create_link(&sbi->ll_kset.kobj,
					&sbi->ll_md_obd->obd_kset.kobj,
					sbi->ll_md_obd->obd_type->typ_name);
688
689
		if (err < 0) {
			CERROR("%s: could not register %s in llite: rc = %d\n",
690
			       md, sbi->ll_fsname, err);
691
692
693
			err = 0;
		}
	}
694

695
	RETURN(err);
696
out_root:
697
698
	if (root)
		iput(root);
699
700
out_lock_cn_cb:
	obd_fid_fini(sbi->ll_dt_exp->exp_obd);
tappro's avatar
tappro committed
701
out_dt:
702
703
	obd_disconnect(sbi->ll_dt_exp);
	sbi->ll_dt_exp = NULL;
704
	sbi->ll_dt_obd = NULL;
705
706
out_md_fid:
	obd_fid_fini(sbi->ll_md_exp->exp_obd);
tappro's avatar
tappro committed
707
out_md:
708
709
	obd_disconnect(sbi->ll_md_exp);
	sbi->ll_md_exp = NULL;
710
	sbi->ll_md_obd = NULL;
711
out:
712
713
714
715
716
	if (data != NULL)
		OBD_FREE_PTR(data);
	if (osfs != NULL)
		OBD_FREE_PTR(osfs);
	return err;
717
718
}

nathan's avatar
nathan committed
719
int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
720
{
721
	int size, rc;
722

723
724
725
726
727
728
729
730
731
	size = sizeof(*lmmsize);
	rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE),
			  KEY_MAX_EASIZE, &size, lmmsize);
	if (rc != 0) {
		CERROR("%s: cannot get max LOV EA size: rc = %d\n",
		       sbi->ll_dt_exp->exp_obd->obd_name, rc);
		RETURN(rc);
	}

732
733
	CDEBUG(D_INFO, "max LOV ea size: %d\n", *lmmsize);

734
735
	size = sizeof(int);
	rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
736
			  KEY_MAX_EASIZE, &size, lmmsize);
737
738
	if (rc)
		CERROR("Get max mdsize error rc %d\n", rc);
739

740
741
	CDEBUG(D_INFO, "max LMV ea size: %d\n", *lmmsize);

742
743
744
	RETURN(rc);
}

745
746
747
748
749
750
751
752
753
754
755
/**
 * Get the value of the default_easize parameter.
 *
 * \see client_obd::cl_default_mds_easize
 *
 * \param[in] sbi	superblock info for this filesystem
 * \param[out] lmmsize	pointer to storage location for value
 *
 * \retval 0		on success
 * \retval negative	negated errno on failure
 */
756
757
758
759
760
761
int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
{
	int size, rc;

	size = sizeof(int);
	rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
762
			 KEY_DEFAULT_EASIZE, &size, lmmsize);
763
764
765
766
767
768
	if (rc)
		CERROR("Get default mdsize error rc %d\n", rc);

	RETURN(rc);
}

769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
/**
 * Set the default_easize parameter to the given value.
 *
 * \see client_obd::cl_default_mds_easize
 *
 * \param[in] sbi	superblock info for this filesystem
 * \param[in] lmmsize	the size to set
 *
 * \retval 0		on success
 * \retval negative	negated errno on failure
 */
int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
{
	int rc;

784
785
	if (lmmsize < sizeof(struct lov_mds_md) ||
	    lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
786
787
788
789
790
791
792
793
794
		return -EINVAL;

	rc = obd_set_info_async(NULL, sbi->ll_md_exp,
				sizeof(KEY_DEFAULT_EASIZE), KEY_DEFAULT_EASIZE,
				sizeof(int), &lmmsize, NULL);

	RETURN(rc);
}

795
static void client_common_put_super(struct super_block *sb)
nathan's avatar
nathan committed
796
{
797
798
	struct ll_sb_info *sbi = ll_s2sbi(sb);
	ENTRY;
nathan's avatar
nathan committed
799

800
	cl_sb_fini(sb);
vitaly's avatar
vitaly committed
801

802
	obd_fid_fini(sbi->ll_dt_exp->exp_obd);
803
804
	obd_disconnect(sbi->ll_dt_exp);
	sbi->ll_dt_exp = NULL;
nathan's avatar
nathan committed
805

806
	ll_debugfs_unregister_super(sb);
tappro's avatar
tappro committed
807

808
	obd_fid_fini(sbi->ll_md_exp->exp_obd);
809
810
	obd_disconnect(sbi->ll_md_exp);
	sbi->ll_md_exp = NULL;
nathan's avatar
nathan committed
811

812
	EXIT;
shadow's avatar
shadow committed
813
814
815
816
}

void ll_kill_super(struct super_block *sb)
{
817
818
	struct ll_sb_info *sbi;
	ENTRY;
shadow's avatar
shadow committed
819

820
821
	/* not init sb ?*/
	if (!(sb->s_flags & SB_ACTIVE))
822
		return;
shadow's avatar
shadow committed
823

824
825
826
827
	sbi = ll_s2sbi(sb);
	/* we need restore s_dev from changed for clustred NFS before put_super
	 * because new kernels have cached s_dev and change sb->s_dev in
	 * put_super not affected real removing devices */
828
829
	if (sbi) {
		sb->s_dev = sbi->ll_sdev_orig;
830
831

		/* wait running statahead threads to quit */
832
833
834
		while (atomic_read(&sbi->ll_sa_running) > 0)
			schedule_timeout_uninterruptible(
				cfs_time_seconds(1) >> 3);
835
	}
836

837
	EXIT;
838
839
}

nathan's avatar
nathan committed
840
static inline int ll_set_opt(const char *opt, char *data, int fl)
841
{
842
843
844
845
	if (strncmp(opt, data, strlen(opt)) != 0)
		return 0;
	else
		return fl;
846
847
}

nathan's avatar
nathan committed
848
/* non-client-specific mount options are parsed in lmd_parse */
849
static int ll_options(char *options, struct ll_sb_info *sbi)
850
{
851
852
853
854
	int tmp;
	char *s1 = options, *s2;
	int *flags = &sbi->ll_flags;
	ENTRY;
855

856
857
	if (!options)
		RETURN(0);
858

859
	CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
nathan's avatar
nathan committed
860

861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
	while (*s1) {
		CDEBUG(D_SUPER, "next opt=%s\n", s1);
		tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
		if (tmp) {
			*flags |= tmp;
			goto next;
		}
		tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
		if (tmp) {
			*flags = (*flags & ~LL_SBI_LOCALFLOCK) | tmp;
			goto next;
		}
		tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
		if (tmp) {
			*flags = (*flags & ~LL_SBI_FLOCK) | tmp;
			goto next;
		}
		tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
		if (tmp) {
			*flags &= ~tmp;
			goto next;
		}
		tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
		if (tmp) {
			*flags |= tmp;
			goto next;
		}
		tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
		if (tmp) {
			*flags &= ~tmp;
			goto next;
		}
893
894
895
896
897
898
899
900
901
902
903
904
		tmp = ll_set_opt("context", s1, 1);
		if (tmp)
			goto next;
		tmp = ll_set_opt("fscontext", s1, 1);
		if (tmp)
			goto next;
		tmp = ll_set_opt("defcontext", s1, 1);
		if (tmp)
			goto next;
		tmp = ll_set_opt("rootcontext", s1, 1);
		if (tmp)
			goto next;
905
906
907
908
909
910
911
912
913
914
		tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
		if (tmp) {
			*flags |= tmp;
			goto next;
		}
		tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
		if (tmp) {
			*flags &= ~tmp;
			goto next;
		}
nathan's avatar
nathan committed
915

916
917
918
919
920
921
922
923
924
925
926
927
		tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
		if (tmp) {
			*flags |= tmp;
			sbi->ll_checksum_set = 1;
			goto next;
		}
		tmp = ll_set_opt("nochecksum",