1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include <sys/capsicum.h>
40 #include <sys/extattr.h>
41
42 /*
43 * Functions that perform the vfs operations required by the routines in
44 * nfsd_serv.c. It is hoped that this change will make the server more
45 * portable.
46 */
47
48 #include <fs/nfs/nfsport.h>
49 #include <security/mac/mac_framework.h>
50 #include <sys/callout.h>
51 #include <sys/filio.h>
52 #include <sys/hash.h>
53 #include <sys/sysctl.h>
54 #include <nlm/nlm_prot.h>
55 #include <nlm/nlm.h>
56
57 FEATURE(nfsd, "NFSv4 server");
58
59 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
60 extern int nfsrv_useacl;
61 extern int newnfs_numnfsd;
62 extern struct mount nfsv4root_mnt;
63 extern struct nfsrv_stablefirst nfsrv_stablefirst;
64 extern SVCPOOL *nfsrvd_pool;
65 extern struct nfsv4lock nfsd_suspend_lock;
66 extern struct nfsclienthashhead *nfsclienthash;
67 extern struct nfslockhashhead *nfslockhash;
68 extern struct nfssessionhash *nfssessionhash;
69 extern int nfsrv_sessionhashsize;
70 extern struct nfsstatsv1 nfsstatsv1;
71 extern struct nfslayouthash *nfslayouthash;
72 extern int nfsrv_layouthashsize;
73 extern struct mtx nfsrv_dslock_mtx;
74 extern int nfs_pnfsiothreads;
75 extern struct nfsdontlisthead nfsrv_dontlisthead;
76 extern volatile int nfsrv_dontlistlen;
77 extern volatile int nfsrv_devidcnt;
78 extern int nfsrv_maxpnfsmirror;
79 extern uint32_t nfs_srvmaxio;
80 extern int nfs_bufpackets;
81 extern u_long sb_max_adj;
82 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
83 NFSDLOCKMUTEX;
84 NFSSTATESPINLOCK;
85 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
86 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
87 struct mtx nfsrc_udpmtx;
88 struct mtx nfs_v4root_mutex;
89 struct mtx nfsrv_dontlistlock_mtx;
90 struct mtx nfsrv_recalllock_mtx;
91 struct nfsrvfh nfs_rootfh, nfs_pubfh;
92 int nfs_pubfhset = 0, nfs_rootfhset = 0;
93 struct proc *nfsd_master_proc = NULL;
94 int nfsd_debuglevel = 0;
95 static pid_t nfsd_master_pid = (pid_t)-1;
96 static char nfsd_master_comm[MAXCOMLEN + 1];
97 static struct timeval nfsd_master_start;
98 static uint32_t nfsv4_sysid = 0;
99 static fhandle_t zerofh;
100 struct callout nfsd_callout;
101
102 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
103 struct ucred *);
104
105 int nfsrv_enable_crossmntpt = 1;
106 static int nfs_commit_blks;
107 static int nfs_commit_miss;
108 extern int nfsrv_issuedelegs;
109 extern int nfsrv_dolocallocks;
110 extern int nfsd_enable_stringtouid;
111 extern struct nfsdevicehead nfsrv_devidhead;
112
113 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
114 struct iovec **);
115 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **,
116 struct mbuf **, struct iovec **);
117 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
118 int *);
119 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
120 NFSPROC_T *);
121 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
122 int *, char *, fhandle_t *);
123 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *,
124 NFSPROC_T *);
125 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *,
126 struct thread *, int, struct mbuf **, char *, struct mbuf **,
127 struct nfsvattr *, struct acl *, off_t *, int, bool *);
128 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
129 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *,
130 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **);
131 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
132 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **,
133 char *, int *);
134 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
135 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
136 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
137 struct vnode *, struct nfsmount **, int, struct acl *, int *);
138 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
139 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *);
140 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
141 struct vnode *, struct nfsmount *, struct nfsvattr *);
142 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *,
143 NFSPROC_T *, struct nfsmount *);
144 static int nfsrv_putfhname(fhandle_t *, char *);
145 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
146 struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
147 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
148 struct vnode *, NFSPROC_T *);
149 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
150 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
151 NFSPROC_T *);
152 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *);
153
154 int nfs_pnfsio(task_fn_t *, void *);
155
156 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
157 "NFS server");
158 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
159 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
160 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
161 0, "");
162 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
163 0, "");
164 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
165 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
166 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
167 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
168 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
169 0, "Debug level for NFS server");
170 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
171 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
172 static int nfsrv_pnfsgetdsattr = 1;
173 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
174 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
175
176 /*
177 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
178 * not running.
179 * The dsN subdirectories for the increased values must have been created
180 * on all DS servers before this increase is done.
181 */
182 u_int nfsrv_dsdirsize = 20;
183 static int
sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)184 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)
185 {
186 int error, newdsdirsize;
187
188 newdsdirsize = nfsrv_dsdirsize;
189 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req);
190 if (error != 0 || req->newptr == NULL)
191 return (error);
192 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 ||
193 newnfs_numnfsd != 0)
194 return (EINVAL);
195 nfsrv_dsdirsize = newdsdirsize;
196 return (0);
197 }
198 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
199 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
200 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
201
202 /*
203 * nfs_srvmaxio can only be increased and only when the nfsd threads are
204 * not running. The setting must be a power of 2, with the current limit of
205 * 1Mbyte.
206 */
207 static int
sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)208 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
209 {
210 int error;
211 u_int newsrvmaxio;
212 uint64_t tval;
213
214 newsrvmaxio = nfs_srvmaxio;
215 error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
216 if (error != 0 || req->newptr == NULL)
217 return (error);
218 if (newsrvmaxio == nfs_srvmaxio)
219 return (0);
220 if (newsrvmaxio < nfs_srvmaxio) {
221 printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
222 return (EINVAL);
223 }
224 if (newsrvmaxio > 1048576) {
225 printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n");
226 return (EINVAL);
227 }
228 if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
229 printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
230 return (EINVAL);
231 }
232
233 /*
234 * Check that kern.ipc.maxsockbuf is large enough for
235 * newsrviomax, given the setting of vfs.nfs.bufpackets.
236 */
237 if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
238 sb_max_adj) {
239 /*
240 * Suggest vfs.nfs.bufpackets * maximum RPC message for
241 * sb_max_adj.
242 */
243 tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
244
245 /*
246 * Convert suggested sb_max_adj value to a suggested
247 * sb_max value, which is what is set via kern.ipc.maxsockbuf.
248 * Perform the inverse calculation of (from uipc_sockbuf.c):
249 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
250 * (MSIZE + MCLBYTES);
251 * XXX If the calculation of sb_max_adj from sb_max changes,
252 * this calculation must be changed as well.
253 */
254 tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */
255 tval += MCLBYTES - 1; /* Round up divide. */
256 tval /= MCLBYTES;
257 printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
258 "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
259 newsrvmaxio);
260 return (EINVAL);
261 }
262
263 NFSD_LOCK();
264 if (newnfs_numnfsd != 0) {
265 NFSD_UNLOCK();
266 printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
267 "threads are running\n");
268 return (EINVAL);
269 }
270
271
272 nfs_srvmaxio = newsrvmaxio;
273 NFSD_UNLOCK();
274 return (0);
275 }
276 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
277 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
278 sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
279
280 #define MAX_REORDERED_RPC 16
281 #define NUM_HEURISTIC 1031
282 #define NHUSE_INIT 64
283 #define NHUSE_INC 16
284 #define NHUSE_MAX 2048
285
286 static struct nfsheur {
287 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
288 off_t nh_nextoff; /* next offset for sequential detection */
289 int nh_use; /* use count for selection */
290 int nh_seqcount; /* heuristic */
291 } nfsheur[NUM_HEURISTIC];
292
293 /*
294 * Heuristic to detect sequential operation.
295 */
296 static struct nfsheur *
nfsrv_sequential_heuristic(struct uio * uio,struct vnode * vp)297 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
298 {
299 struct nfsheur *nh;
300 int hi, try;
301
302 /* Locate best candidate. */
303 try = 32;
304 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
305 nh = &nfsheur[hi];
306 while (try--) {
307 if (nfsheur[hi].nh_vp == vp) {
308 nh = &nfsheur[hi];
309 break;
310 }
311 if (nfsheur[hi].nh_use > 0)
312 --nfsheur[hi].nh_use;
313 hi = (hi + 1) % NUM_HEURISTIC;
314 if (nfsheur[hi].nh_use < nh->nh_use)
315 nh = &nfsheur[hi];
316 }
317
318 /* Initialize hint if this is a new file. */
319 if (nh->nh_vp != vp) {
320 nh->nh_vp = vp;
321 nh->nh_nextoff = uio->uio_offset;
322 nh->nh_use = NHUSE_INIT;
323 if (uio->uio_offset == 0)
324 nh->nh_seqcount = 4;
325 else
326 nh->nh_seqcount = 1;
327 }
328
329 /* Calculate heuristic. */
330 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
331 uio->uio_offset == nh->nh_nextoff) {
332 /* See comments in vfs_vnops.c:sequential_heuristic(). */
333 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
334 if (nh->nh_seqcount > IO_SEQMAX)
335 nh->nh_seqcount = IO_SEQMAX;
336 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
337 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
338 /* Probably a reordered RPC, leave seqcount alone. */
339 } else if (nh->nh_seqcount > 1) {
340 nh->nh_seqcount /= 2;
341 } else {
342 nh->nh_seqcount = 0;
343 }
344 nh->nh_use += NHUSE_INC;
345 if (nh->nh_use > NHUSE_MAX)
346 nh->nh_use = NHUSE_MAX;
347 return (nh);
348 }
349
350 /*
351 * Get attributes into nfsvattr structure.
352 */
353 int
nfsvno_getattr(struct vnode * vp,struct nfsvattr * nvap,struct nfsrv_descript * nd,struct thread * p,int vpislocked,nfsattrbit_t * attrbitp)354 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
355 struct nfsrv_descript *nd, struct thread *p, int vpislocked,
356 nfsattrbit_t *attrbitp)
357 {
358 int error, gotattr, lockedit = 0;
359 struct nfsvattr na;
360
361 if (vpislocked == 0) {
362 /*
363 * When vpislocked == 0, the vnode is either exclusively
364 * locked by this thread or not locked by this thread.
365 * As such, shared lock it, if not exclusively locked.
366 */
367 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
368 lockedit = 1;
369 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
370 }
371 }
372
373 /*
374 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed
375 * attributes, as required.
376 * This needs to be done for regular files if:
377 * - non-NFSv4 RPCs or
378 * - when attrbitp == NULL or
379 * - an NFSv4 RPC with any of the above attributes in attrbitp.
380 * A return of 0 for nfsrv_proxyds() indicates that it has acquired
381 * these attributes. nfsrv_proxyds() will return an error if the
382 * server is not a pNFS one.
383 */
384 gotattr = 0;
385 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL ||
386 (nd->nd_flag & ND_NFSV4) == 0 ||
387 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) ||
388 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) ||
389 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) ||
390 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) ||
391 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) {
392 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p,
393 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0,
394 NULL);
395 if (error == 0)
396 gotattr = 1;
397 }
398
399 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
400 if (lockedit != 0)
401 NFSVOPUNLOCK(vp);
402
403 /*
404 * If we got the Change, Size and Modify Time from the DS,
405 * replace them.
406 */
407 if (gotattr != 0) {
408 nvap->na_atime = na.na_atime;
409 nvap->na_mtime = na.na_mtime;
410 nvap->na_filerev = na.na_filerev;
411 nvap->na_size = na.na_size;
412 nvap->na_bytes = na.na_bytes;
413 }
414 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr,
415 error, (uintmax_t)na.na_filerev);
416
417 NFSEXITCODE(error);
418 return (error);
419 }
420
421 /*
422 * Get a file handle for a vnode.
423 */
424 int
nfsvno_getfh(struct vnode * vp,fhandle_t * fhp,struct thread * p)425 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
426 {
427 int error;
428
429 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
430 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
431 error = VOP_VPTOFH(vp, &fhp->fh_fid);
432
433 NFSEXITCODE(error);
434 return (error);
435 }
436
437 /*
438 * Perform access checking for vnodes obtained from file handles that would
439 * refer to files already opened by a Unix client. You cannot just use
440 * vn_writechk() and VOP_ACCESSX() for two reasons.
441 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
442 * case.
443 * 2 - The owner is to be given access irrespective of mode bits for some
444 * operations, so that processes that chmod after opening a file don't
445 * break.
446 */
447 int
nfsvno_accchk(struct vnode * vp,accmode_t accmode,struct ucred * cred,struct nfsexstuff * exp,struct thread * p,int override,int vpislocked,u_int32_t * supportedtypep)448 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
449 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
450 u_int32_t *supportedtypep)
451 {
452 struct vattr vattr;
453 int error = 0, getret = 0;
454
455 if (vpislocked == 0) {
456 if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
457 error = EPERM;
458 goto out;
459 }
460 }
461 if (accmode & VWRITE) {
462 /* Just vn_writechk() changed to check rdonly */
463 /*
464 * Disallow write attempts on read-only file systems;
465 * unless the file is a socket or a block or character
466 * device resident on the file system.
467 */
468 if (NFSVNO_EXRDONLY(exp) ||
469 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
470 switch (vp->v_type) {
471 case VREG:
472 case VDIR:
473 case VLNK:
474 error = EROFS;
475 default:
476 break;
477 }
478 }
479 /*
480 * If there's shared text associated with
481 * the inode, try to free it up once. If
482 * we fail, we can't allow writing.
483 */
484 if (VOP_IS_TEXT(vp) && error == 0)
485 error = ETXTBSY;
486 }
487 if (error != 0) {
488 if (vpislocked == 0)
489 NFSVOPUNLOCK(vp);
490 goto out;
491 }
492
493 /*
494 * Should the override still be applied when ACLs are enabled?
495 */
496 error = VOP_ACCESSX(vp, accmode, cred, p);
497 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
498 /*
499 * Try again with VEXPLICIT_DENY, to see if the test for
500 * deletion is supported.
501 */
502 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
503 if (error == 0) {
504 if (vp->v_type == VDIR) {
505 accmode &= ~(VDELETE | VDELETE_CHILD);
506 accmode |= VWRITE;
507 error = VOP_ACCESSX(vp, accmode, cred, p);
508 } else if (supportedtypep != NULL) {
509 *supportedtypep &= ~NFSACCESS_DELETE;
510 }
511 }
512 }
513
514 /*
515 * Allow certain operations for the owner (reads and writes
516 * on files that are already open).
517 */
518 if (override != NFSACCCHK_NOOVERRIDE &&
519 (error == EPERM || error == EACCES)) {
520 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
521 error = 0;
522 else if (override & NFSACCCHK_ALLOWOWNER) {
523 getret = VOP_GETATTR(vp, &vattr, cred);
524 if (getret == 0 && cred->cr_uid == vattr.va_uid)
525 error = 0;
526 }
527 }
528 if (vpislocked == 0)
529 NFSVOPUNLOCK(vp);
530
531 out:
532 NFSEXITCODE(error);
533 return (error);
534 }
535
536 /*
537 * Set attribute(s) vnop.
538 */
539 int
nfsvno_setattr(struct vnode * vp,struct nfsvattr * nvap,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)540 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
541 struct thread *p, struct nfsexstuff *exp)
542 {
543 u_quad_t savsize = 0;
544 int error, savedit;
545 time_t savbtime;
546
547 /*
548 * If this is an exported file system and a pNFS service is running,
549 * don't VOP_SETATTR() of size for the MDS file system.
550 */
551 savedit = 0;
552 error = 0;
553 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 &&
554 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL &&
555 nvap->na_vattr.va_size > 0) {
556 savsize = nvap->na_vattr.va_size;
557 nvap->na_vattr.va_size = VNOVAL;
558 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
559 nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
560 nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
561 nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
562 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)
563 savedit = 1;
564 else
565 savedit = 2;
566 }
567 if (savedit != 2)
568 error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
569 if (savedit != 0)
570 nvap->na_vattr.va_size = savsize;
571 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
572 nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
573 nvap->na_vattr.va_size != VNOVAL ||
574 nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
575 nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
576 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) {
577 /* Never modify birthtime on a DS file. */
578 savbtime = nvap->na_vattr.va_birthtime.tv_sec;
579 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL;
580 /* For a pNFS server, set the attributes on the DS file. */
581 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR,
582 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL);
583 nvap->na_vattr.va_birthtime.tv_sec = savbtime;
584 if (error == ENOENT)
585 error = 0;
586 }
587 NFSEXITCODE(error);
588 return (error);
589 }
590
591 /*
592 * Set up nameidata for a lookup() call and do it.
593 */
594 int
nfsvno_namei(struct nfsrv_descript * nd,struct nameidata * ndp,struct vnode * dp,int islocked,struct nfsexstuff * exp,struct thread * p,struct vnode ** retdirp)595 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
596 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
597 struct vnode **retdirp)
598 {
599 struct componentname *cnp = &ndp->ni_cnd;
600 int i;
601 struct iovec aiov;
602 struct uio auio;
603 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
604 int error = 0;
605 char *cp;
606
607 *retdirp = NULL;
608 cnp->cn_nameptr = cnp->cn_pnbuf;
609 ndp->ni_lcf = 0;
610 /*
611 * Extract and set starting directory.
612 */
613 if (dp->v_type != VDIR) {
614 if (islocked)
615 vput(dp);
616 else
617 vrele(dp);
618 nfsvno_relpathbuf(ndp);
619 error = ENOTDIR;
620 goto out1;
621 }
622 if (islocked)
623 NFSVOPUNLOCK(dp);
624 VREF(dp);
625 *retdirp = dp;
626 if (NFSVNO_EXRDONLY(exp))
627 cnp->cn_flags |= RDONLY;
628 ndp->ni_segflg = UIO_SYSSPACE;
629
630 if (nd->nd_flag & ND_PUBLOOKUP) {
631 ndp->ni_loopcnt = 0;
632 if (cnp->cn_pnbuf[0] == '/') {
633 vrele(dp);
634 /*
635 * Check for degenerate pathnames here, since lookup()
636 * panics on them.
637 */
638 for (i = 1; i < ndp->ni_pathlen; i++)
639 if (cnp->cn_pnbuf[i] != '/')
640 break;
641 if (i == ndp->ni_pathlen) {
642 error = NFSERR_ACCES;
643 goto out;
644 }
645 dp = rootvnode;
646 VREF(dp);
647 }
648 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
649 (nd->nd_flag & ND_NFSV4) == 0) {
650 /*
651 * Only cross mount points for NFSv4 when doing a
652 * mount while traversing the file system above
653 * the mount point, unless nfsrv_enable_crossmntpt is set.
654 */
655 cnp->cn_flags |= NOCROSSMOUNT;
656 }
657
658 /*
659 * Initialize for scan, set ni_startdir and bump ref on dp again
660 * because lookup() will dereference ni_startdir.
661 */
662
663 cnp->cn_thread = p;
664 ndp->ni_startdir = dp;
665 ndp->ni_rootdir = rootvnode;
666 ndp->ni_topdir = NULL;
667
668 if (!lockleaf)
669 cnp->cn_flags |= LOCKLEAF;
670 for (;;) {
671 cnp->cn_nameptr = cnp->cn_pnbuf;
672 /*
673 * Call lookup() to do the real work. If an error occurs,
674 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
675 * we do not have to dereference anything before returning.
676 * In either case ni_startdir will be dereferenced and NULLed
677 * out.
678 */
679 error = lookup(ndp);
680 if (error)
681 break;
682
683 /*
684 * Check for encountering a symbolic link. Trivial
685 * termination occurs if no symlink encountered.
686 */
687 if ((cnp->cn_flags & ISSYMLINK) == 0) {
688 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
689 nfsvno_relpathbuf(ndp);
690 if (ndp->ni_vp && !lockleaf)
691 NFSVOPUNLOCK(ndp->ni_vp);
692 break;
693 }
694
695 /*
696 * Validate symlink
697 */
698 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
699 NFSVOPUNLOCK(ndp->ni_dvp);
700 if (!(nd->nd_flag & ND_PUBLOOKUP)) {
701 error = EINVAL;
702 goto badlink2;
703 }
704
705 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
706 error = ELOOP;
707 goto badlink2;
708 }
709 if (ndp->ni_pathlen > 1)
710 cp = uma_zalloc(namei_zone, M_WAITOK);
711 else
712 cp = cnp->cn_pnbuf;
713 aiov.iov_base = cp;
714 aiov.iov_len = MAXPATHLEN;
715 auio.uio_iov = &aiov;
716 auio.uio_iovcnt = 1;
717 auio.uio_offset = 0;
718 auio.uio_rw = UIO_READ;
719 auio.uio_segflg = UIO_SYSSPACE;
720 auio.uio_td = NULL;
721 auio.uio_resid = MAXPATHLEN;
722 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
723 if (error) {
724 badlink1:
725 if (ndp->ni_pathlen > 1)
726 uma_zfree(namei_zone, cp);
727 badlink2:
728 vrele(ndp->ni_dvp);
729 vput(ndp->ni_vp);
730 break;
731 }
732 linklen = MAXPATHLEN - auio.uio_resid;
733 if (linklen == 0) {
734 error = ENOENT;
735 goto badlink1;
736 }
737 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
738 error = ENAMETOOLONG;
739 goto badlink1;
740 }
741
742 /*
743 * Adjust or replace path
744 */
745 if (ndp->ni_pathlen > 1) {
746 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
747 uma_zfree(namei_zone, cnp->cn_pnbuf);
748 cnp->cn_pnbuf = cp;
749 } else
750 cnp->cn_pnbuf[linklen] = '\0';
751 ndp->ni_pathlen += linklen;
752
753 /*
754 * Cleanup refs for next loop and check if root directory
755 * should replace current directory. Normally ni_dvp
756 * becomes the new base directory and is cleaned up when
757 * we loop. Explicitly null pointers after invalidation
758 * to clarify operation.
759 */
760 vput(ndp->ni_vp);
761 ndp->ni_vp = NULL;
762
763 if (cnp->cn_pnbuf[0] == '/') {
764 vrele(ndp->ni_dvp);
765 ndp->ni_dvp = ndp->ni_rootdir;
766 VREF(ndp->ni_dvp);
767 }
768 ndp->ni_startdir = ndp->ni_dvp;
769 ndp->ni_dvp = NULL;
770 }
771 if (!lockleaf)
772 cnp->cn_flags &= ~LOCKLEAF;
773
774 out:
775 if (error) {
776 nfsvno_relpathbuf(ndp);
777 ndp->ni_vp = NULL;
778 ndp->ni_dvp = NULL;
779 ndp->ni_startdir = NULL;
780 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
781 ndp->ni_dvp = NULL;
782 }
783
784 out1:
785 NFSEXITCODE2(error, nd);
786 return (error);
787 }
788
789 /*
790 * Set up a pathname buffer and return a pointer to it and, optionally
791 * set a hash pointer.
792 */
793 void
nfsvno_setpathbuf(struct nameidata * ndp,char ** bufpp,u_long ** hashpp)794 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
795 {
796 struct componentname *cnp = &ndp->ni_cnd;
797
798 cnp->cn_flags |= (NOMACCHECK | HASBUF);
799 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
800 if (hashpp != NULL)
801 *hashpp = NULL;
802 *bufpp = cnp->cn_pnbuf;
803 }
804
805 /*
806 * Release the above path buffer, if not released by nfsvno_namei().
807 */
808 void
nfsvno_relpathbuf(struct nameidata * ndp)809 nfsvno_relpathbuf(struct nameidata *ndp)
810 {
811
812 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
813 panic("nfsrelpath");
814 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
815 ndp->ni_cnd.cn_flags &= ~HASBUF;
816 }
817
818 /*
819 * Readlink vnode op into an mbuf list.
820 */
821 int
nfsvno_readlink(struct vnode * vp,struct ucred * cred,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp,int * lenp)822 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz,
823 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
824 {
825 struct iovec *iv;
826 struct uio io, *uiop = &io;
827 struct mbuf *mp, *mp3;
828 int len, tlen, error = 0;
829
830 len = NFS_MAXPATHLEN;
831 if (maxextsiz > 0)
832 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
833 &mp3, &mp, &iv);
834 else
835 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv);
836 uiop->uio_iov = iv;
837 uiop->uio_offset = 0;
838 uiop->uio_resid = len;
839 uiop->uio_rw = UIO_READ;
840 uiop->uio_segflg = UIO_SYSSPACE;
841 uiop->uio_td = NULL;
842 error = VOP_READLINK(vp, uiop, cred);
843 free(iv, M_TEMP);
844 if (error) {
845 m_freem(mp3);
846 *lenp = 0;
847 goto out;
848 }
849 if (uiop->uio_resid > 0) {
850 len -= uiop->uio_resid;
851 tlen = NFSM_RNDUP(len);
852 if (tlen == 0) {
853 m_freem(mp3);
854 mp3 = mp = NULL;
855 } else if (tlen != NFS_MAXPATHLEN || tlen != len)
856 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen,
857 tlen - len);
858 }
859 *lenp = len;
860 *mpp = mp3;
861 *mpendp = mp;
862
863 out:
864 NFSEXITCODE(error);
865 return (error);
866 }
867
868 /*
869 * Create an mbuf chain and an associated iovec that can be used to Read
870 * or Getextattr of data.
871 * Upon success, return pointers to the first and last mbufs in the chain
872 * plus the malloc'd iovec and its iovlen.
873 */
874 static int
nfsrv_createiovec(int len,struct mbuf ** mpp,struct mbuf ** mpendp,struct iovec ** ivp)875 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp,
876 struct iovec **ivp)
877 {
878 struct mbuf *m, *m2 = NULL, *m3;
879 struct iovec *iv;
880 int i, left, siz;
881
882 left = len;
883 m3 = NULL;
884 /*
885 * Generate the mbuf list with the uio_iov ref. to it.
886 */
887 i = 0;
888 while (left > 0) {
889 NFSMGET(m);
890 MCLGET(m, M_WAITOK);
891 m->m_len = 0;
892 siz = min(M_TRAILINGSPACE(m), left);
893 left -= siz;
894 i++;
895 if (m3)
896 m2->m_next = m;
897 else
898 m3 = m;
899 m2 = m;
900 }
901 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
902 m = m3;
903 left = len;
904 i = 0;
905 while (left > 0) {
906 if (m == NULL)
907 panic("nfsrv_createiovec iov");
908 siz = min(M_TRAILINGSPACE(m), left);
909 if (siz > 0) {
910 iv->iov_base = mtod(m, caddr_t) + m->m_len;
911 iv->iov_len = siz;
912 m->m_len += siz;
913 left -= siz;
914 iv++;
915 i++;
916 }
917 m = m->m_next;
918 }
919 *mpp = m3;
920 *mpendp = m2;
921 return (i);
922 }
923
924 /*
925 * Create an mbuf chain and an associated iovec that can be used to Read
926 * or Getextattr of data.
927 * Upon success, return pointers to the first and last mbufs in the chain
928 * plus the malloc'd iovec and its iovlen.
929 * Same as above, but creates ext_pgs mbuf(s).
930 */
931 static int
nfsrv_createiovec_extpgs(int len,int maxextsiz,struct mbuf ** mpp,struct mbuf ** mpendp,struct iovec ** ivp)932 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp,
933 struct mbuf **mpendp, struct iovec **ivp)
934 {
935 struct mbuf *m, *m2 = NULL, *m3;
936 struct iovec *iv;
937 int i, left, pgno, siz;
938
939 left = len;
940 m3 = NULL;
941 /*
942 * Generate the mbuf list with the uio_iov ref. to it.
943 */
944 i = 0;
945 while (left > 0) {
946 siz = min(left, maxextsiz);
947 m = mb_alloc_ext_plus_pages(siz, M_WAITOK);
948 left -= siz;
949 i += m->m_epg_npgs;
950 if (m3 != NULL)
951 m2->m_next = m;
952 else
953 m3 = m;
954 m2 = m;
955 }
956 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
957 m = m3;
958 left = len;
959 i = 0;
960 pgno = 0;
961 while (left > 0) {
962 if (m == NULL)
963 panic("nfsvno_createiovec_extpgs iov");
964 siz = min(PAGE_SIZE, left);
965 if (siz > 0) {
966 iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
967 iv->iov_len = siz;
968 m->m_len += siz;
969 if (pgno == m->m_epg_npgs - 1)
970 m->m_epg_last_len = siz;
971 left -= siz;
972 iv++;
973 i++;
974 pgno++;
975 }
976 if (pgno == m->m_epg_npgs && left > 0) {
977 m = m->m_next;
978 if (m == NULL)
979 panic("nfsvno_createiovec_extpgs iov");
980 pgno = 0;
981 }
982 }
983 *mpp = m3;
984 *mpendp = m2;
985 return (i);
986 }
987
988 /*
989 * Read vnode op call into mbuf list.
990 */
991 int
nfsvno_read(struct vnode * vp,off_t off,int cnt,struct ucred * cred,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp)992 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
993 int maxextsiz, struct thread *p, struct mbuf **mpp,
994 struct mbuf **mpendp)
995 {
996 struct mbuf *m;
997 struct iovec *iv;
998 int error = 0, len, tlen, ioflag = 0;
999 struct mbuf *m3;
1000 struct uio io, *uiop = &io;
1001 struct nfsheur *nh;
1002
1003 /*
1004 * Attempt to read from a DS file. A return of ENOENT implies
1005 * there is no DS file to read.
1006 */
1007 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp,
1008 NULL, mpendp, NULL, NULL, NULL, 0, NULL);
1009 if (error != ENOENT)
1010 return (error);
1011
1012 len = NFSM_RNDUP(cnt);
1013 if (maxextsiz > 0)
1014 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
1015 &m3, &m, &iv);
1016 else
1017 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv);
1018 uiop->uio_iov = iv;
1019 uiop->uio_offset = off;
1020 uiop->uio_resid = len;
1021 uiop->uio_rw = UIO_READ;
1022 uiop->uio_segflg = UIO_SYSSPACE;
1023 uiop->uio_td = NULL;
1024 nh = nfsrv_sequential_heuristic(uiop, vp);
1025 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
1026 /* XXX KDM make this more systematic? */
1027 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid;
1028 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
1029 free(iv, M_TEMP);
1030 if (error) {
1031 m_freem(m3);
1032 *mpp = NULL;
1033 goto out;
1034 }
1035 nh->nh_nextoff = uiop->uio_offset;
1036 tlen = len - uiop->uio_resid;
1037 cnt = cnt < tlen ? cnt : tlen;
1038 tlen = NFSM_RNDUP(cnt);
1039 if (tlen == 0) {
1040 m_freem(m3);
1041 m3 = m = NULL;
1042 } else if (len != tlen || tlen != cnt)
1043 m = nfsrv_adj(m3, len - tlen, tlen - cnt);
1044 *mpp = m3;
1045 *mpendp = m;
1046
1047 out:
1048 NFSEXITCODE(error);
1049 return (error);
1050 }
1051
1052 /*
1053 * Create the iovec for the mbuf chain passed in as an argument.
1054 * The "cp" argument is where the data starts within the first mbuf in
1055 * the chain. It returns the iovec and the iovcnt.
1056 */
1057 static int
nfsrv_createiovecw(int retlen,struct mbuf * m,char * cp,struct iovec ** ivpp,int * iovcntp)1058 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp,
1059 int *iovcntp)
1060 {
1061 struct mbuf *mp;
1062 struct iovec *ivp;
1063 int cnt, i, len;
1064
1065 /*
1066 * Loop through the mbuf chain, counting how many mbufs are a
1067 * part of this write operation, so the iovec size is known.
1068 */
1069 cnt = 0;
1070 len = retlen;
1071 mp = m;
1072 i = mtod(mp, caddr_t) + mp->m_len - cp;
1073 while (len > 0) {
1074 if (i > 0) {
1075 len -= i;
1076 cnt++;
1077 }
1078 mp = mp->m_next;
1079 if (!mp) {
1080 if (len > 0)
1081 return (EBADRPC);
1082 } else
1083 i = mp->m_len;
1084 }
1085
1086 /* Now, create the iovec. */
1087 mp = m;
1088 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
1089 M_WAITOK);
1090 *iovcntp = cnt;
1091 i = mtod(mp, caddr_t) + mp->m_len - cp;
1092 len = retlen;
1093 while (len > 0) {
1094 if (mp == NULL)
1095 panic("nfsrv_createiovecw");
1096 if (i > 0) {
1097 i = min(i, len);
1098 ivp->iov_base = cp;
1099 ivp->iov_len = i;
1100 ivp++;
1101 len -= i;
1102 }
1103 mp = mp->m_next;
1104 if (mp) {
1105 i = mp->m_len;
1106 cp = mtod(mp, caddr_t);
1107 }
1108 }
1109 return (0);
1110 }
1111
1112 /*
1113 * Write vnode op from an mbuf list.
1114 */
1115 int
nfsvno_write(struct vnode * vp,off_t off,int retlen,int * stable,struct mbuf * mp,char * cp,struct ucred * cred,struct thread * p)1116 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable,
1117 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
1118 {
1119 struct iovec *iv;
1120 int cnt, ioflags, error;
1121 struct uio io, *uiop = &io;
1122 struct nfsheur *nh;
1123
1124 /*
1125 * Attempt to write to a DS file. A return of ENOENT implies
1126 * there is no DS file to write.
1127 */
1128 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS,
1129 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL);
1130 if (error != ENOENT) {
1131 *stable = NFSWRITE_FILESYNC;
1132 return (error);
1133 }
1134
1135 if (*stable == NFSWRITE_UNSTABLE)
1136 ioflags = IO_NODELOCKED;
1137 else
1138 ioflags = (IO_SYNC | IO_NODELOCKED);
1139 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt);
1140 if (error != 0)
1141 return (error);
1142 uiop->uio_iov = iv;
1143 uiop->uio_iovcnt = cnt;
1144 uiop->uio_resid = retlen;
1145 uiop->uio_rw = UIO_WRITE;
1146 uiop->uio_segflg = UIO_SYSSPACE;
1147 NFSUIOPROC(uiop, p);
1148 uiop->uio_offset = off;
1149 nh = nfsrv_sequential_heuristic(uiop, vp);
1150 ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1151 /* XXX KDM make this more systematic? */
1152 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
1153 error = VOP_WRITE(vp, uiop, ioflags, cred);
1154 if (error == 0)
1155 nh->nh_nextoff = uiop->uio_offset;
1156 free(iv, M_TEMP);
1157
1158 NFSEXITCODE(error);
1159 return (error);
1160 }
1161
1162 /*
1163 * Common code for creating a regular file (plus special files for V2).
1164 */
1165 int
nfsvno_createsub(struct nfsrv_descript * nd,struct nameidata * ndp,struct vnode ** vpp,struct nfsvattr * nvap,int * exclusive_flagp,int32_t * cverf,NFSDEV_T rdev,struct nfsexstuff * exp)1166 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
1167 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
1168 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp)
1169 {
1170 u_quad_t tempsize;
1171 int error;
1172 struct thread *p = curthread;
1173
1174 error = nd->nd_repstat;
1175 if (!error && ndp->ni_vp == NULL) {
1176 if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
1177 vrele(ndp->ni_startdir);
1178 error = VOP_CREATE(ndp->ni_dvp,
1179 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1180 /* For a pNFS server, create the data file on a DS. */
1181 if (error == 0 && nvap->na_type == VREG) {
1182 /*
1183 * Create a data file on a DS for a pNFS server.
1184 * This function just returns if not
1185 * running a pNFS DS or the creation fails.
1186 */
1187 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
1188 nd->nd_cred, p);
1189 }
1190 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
1191 NULL, false);
1192 nfsvno_relpathbuf(ndp);
1193 if (!error) {
1194 if (*exclusive_flagp) {
1195 *exclusive_flagp = 0;
1196 NFSVNO_ATTRINIT(nvap);
1197 nvap->na_atime.tv_sec = cverf[0];
1198 nvap->na_atime.tv_nsec = cverf[1];
1199 error = VOP_SETATTR(ndp->ni_vp,
1200 &nvap->na_vattr, nd->nd_cred);
1201 if (error != 0) {
1202 vput(ndp->ni_vp);
1203 ndp->ni_vp = NULL;
1204 error = NFSERR_NOTSUPP;
1205 }
1206 }
1207 }
1208 /*
1209 * NFS V2 Only. nfsrvd_mknod() does this for V3.
1210 * (This implies, just get out on an error.)
1211 */
1212 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
1213 nvap->na_type == VFIFO) {
1214 if (nvap->na_type == VCHR && rdev == 0xffffffff)
1215 nvap->na_type = VFIFO;
1216 if (nvap->na_type != VFIFO &&
1217 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) {
1218 vrele(ndp->ni_startdir);
1219 nfsvno_relpathbuf(ndp);
1220 vput(ndp->ni_dvp);
1221 goto out;
1222 }
1223 nvap->na_rdev = rdev;
1224 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
1225 &ndp->ni_cnd, &nvap->na_vattr);
1226 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
1227 NULL, false);
1228 nfsvno_relpathbuf(ndp);
1229 vrele(ndp->ni_startdir);
1230 if (error)
1231 goto out;
1232 } else {
1233 vrele(ndp->ni_startdir);
1234 nfsvno_relpathbuf(ndp);
1235 vput(ndp->ni_dvp);
1236 error = ENXIO;
1237 goto out;
1238 }
1239 *vpp = ndp->ni_vp;
1240 } else {
1241 /*
1242 * Handle cases where error is already set and/or
1243 * the file exists.
1244 * 1 - clean up the lookup
1245 * 2 - iff !error and na_size set, truncate it
1246 */
1247 vrele(ndp->ni_startdir);
1248 nfsvno_relpathbuf(ndp);
1249 *vpp = ndp->ni_vp;
1250 if (ndp->ni_dvp == *vpp)
1251 vrele(ndp->ni_dvp);
1252 else
1253 vput(ndp->ni_dvp);
1254 if (!error && nvap->na_size != VNOVAL) {
1255 error = nfsvno_accchk(*vpp, VWRITE,
1256 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1257 NFSACCCHK_VPISLOCKED, NULL);
1258 if (!error) {
1259 tempsize = nvap->na_size;
1260 NFSVNO_ATTRINIT(nvap);
1261 nvap->na_size = tempsize;
1262 error = VOP_SETATTR(*vpp,
1263 &nvap->na_vattr, nd->nd_cred);
1264 }
1265 }
1266 if (error)
1267 vput(*vpp);
1268 }
1269
1270 out:
1271 NFSEXITCODE(error);
1272 return (error);
1273 }
1274
1275 /*
1276 * Do a mknod vnode op.
1277 */
1278 int
nfsvno_mknod(struct nameidata * ndp,struct nfsvattr * nvap,struct ucred * cred,struct thread * p)1279 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
1280 struct thread *p)
1281 {
1282 int error = 0;
1283 enum vtype vtyp;
1284
1285 vtyp = nvap->na_type;
1286 /*
1287 * Iff doesn't exist, create it.
1288 */
1289 if (ndp->ni_vp) {
1290 vrele(ndp->ni_startdir);
1291 nfsvno_relpathbuf(ndp);
1292 vput(ndp->ni_dvp);
1293 vrele(ndp->ni_vp);
1294 error = EEXIST;
1295 goto out;
1296 }
1297 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1298 vrele(ndp->ni_startdir);
1299 nfsvno_relpathbuf(ndp);
1300 vput(ndp->ni_dvp);
1301 error = NFSERR_BADTYPE;
1302 goto out;
1303 }
1304 if (vtyp == VSOCK) {
1305 vrele(ndp->ni_startdir);
1306 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
1307 &ndp->ni_cnd, &nvap->na_vattr);
1308 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
1309 false);
1310 nfsvno_relpathbuf(ndp);
1311 } else {
1312 if (nvap->na_type != VFIFO &&
1313 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) {
1314 vrele(ndp->ni_startdir);
1315 nfsvno_relpathbuf(ndp);
1316 vput(ndp->ni_dvp);
1317 goto out;
1318 }
1319 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
1320 &ndp->ni_cnd, &nvap->na_vattr);
1321 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
1322 false);
1323 nfsvno_relpathbuf(ndp);
1324 vrele(ndp->ni_startdir);
1325 /*
1326 * Since VOP_MKNOD returns the ni_vp, I can't
1327 * see any reason to do the lookup.
1328 */
1329 }
1330
1331 out:
1332 NFSEXITCODE(error);
1333 return (error);
1334 }
1335
1336 /*
1337 * Mkdir vnode op.
1338 */
1339 int
nfsvno_mkdir(struct nameidata * ndp,struct nfsvattr * nvap,uid_t saved_uid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1340 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
1341 struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
1342 {
1343 int error = 0;
1344
1345 if (ndp->ni_vp != NULL) {
1346 if (ndp->ni_dvp == ndp->ni_vp)
1347 vrele(ndp->ni_dvp);
1348 else
1349 vput(ndp->ni_dvp);
1350 vrele(ndp->ni_vp);
1351 nfsvno_relpathbuf(ndp);
1352 error = EEXIST;
1353 goto out;
1354 }
1355 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
1356 &nvap->na_vattr);
1357 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false);
1358 nfsvno_relpathbuf(ndp);
1359
1360 out:
1361 NFSEXITCODE(error);
1362 return (error);
1363 }
1364
1365 /*
1366 * symlink vnode op.
1367 */
1368 int
nfsvno_symlink(struct nameidata * ndp,struct nfsvattr * nvap,char * pathcp,int pathlen,int not_v2,uid_t saved_uid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1369 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
1370 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
1371 struct nfsexstuff *exp)
1372 {
1373 int error = 0;
1374
1375 if (ndp->ni_vp) {
1376 vrele(ndp->ni_startdir);
1377 nfsvno_relpathbuf(ndp);
1378 if (ndp->ni_dvp == ndp->ni_vp)
1379 vrele(ndp->ni_dvp);
1380 else
1381 vput(ndp->ni_dvp);
1382 vrele(ndp->ni_vp);
1383 error = EEXIST;
1384 goto out;
1385 }
1386
1387 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
1388 &nvap->na_vattr, pathcp);
1389 /*
1390 * Although FreeBSD still had the lookup code in
1391 * it for 7/current, there doesn't seem to be any
1392 * point, since VOP_SYMLINK() returns the ni_vp.
1393 * Just vput it for v2.
1394 */
1395 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0);
1396 vrele(ndp->ni_startdir);
1397 nfsvno_relpathbuf(ndp);
1398
1399 out:
1400 NFSEXITCODE(error);
1401 return (error);
1402 }
1403
1404 /*
1405 * Parse symbolic link arguments.
1406 * This function has an ugly side effect. It will malloc() an area for
1407 * the symlink and set iov_base to point to it, only if it succeeds.
1408 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
1409 * be FREE'd later.
1410 */
1411 int
nfsvno_getsymlink(struct nfsrv_descript * nd,struct nfsvattr * nvap,struct thread * p,char ** pathcpp,int * lenp)1412 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
1413 struct thread *p, char **pathcpp, int *lenp)
1414 {
1415 u_int32_t *tl;
1416 char *pathcp = NULL;
1417 int error = 0, len;
1418 struct nfsv2_sattr *sp;
1419
1420 *pathcpp = NULL;
1421 *lenp = 0;
1422 if ((nd->nd_flag & ND_NFSV3) &&
1423 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
1424 goto nfsmout;
1425 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1426 len = fxdr_unsigned(int, *tl);
1427 if (len > NFS_MAXPATHLEN || len <= 0) {
1428 error = EBADRPC;
1429 goto nfsmout;
1430 }
1431 pathcp = malloc(len + 1, M_TEMP, M_WAITOK);
1432 error = nfsrv_mtostr(nd, pathcp, len);
1433 if (error)
1434 goto nfsmout;
1435 if (nd->nd_flag & ND_NFSV2) {
1436 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1437 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
1438 }
1439 *pathcpp = pathcp;
1440 *lenp = len;
1441 NFSEXITCODE2(0, nd);
1442 return (0);
1443 nfsmout:
1444 if (pathcp)
1445 free(pathcp, M_TEMP);
1446 NFSEXITCODE2(error, nd);
1447 return (error);
1448 }
1449
1450 /*
1451 * Remove a non-directory object.
1452 */
1453 int
nfsvno_removesub(struct nameidata * ndp,int is_v4,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1454 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1455 struct thread *p, struct nfsexstuff *exp)
1456 {
1457 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
1458 int error = 0, mirrorcnt;
1459 char fname[PNFS_FILENAME_LEN + 1];
1460 fhandle_t fh;
1461
1462 vp = ndp->ni_vp;
1463 dsdvp[0] = NULL;
1464 if (vp->v_type == VDIR)
1465 error = NFSERR_ISDIR;
1466 else if (is_v4)
1467 error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0),
1468 p);
1469 if (error == 0)
1470 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
1471 if (!error)
1472 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
1473 if (error == 0 && dsdvp[0] != NULL)
1474 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
1475 if (ndp->ni_dvp == vp)
1476 vrele(ndp->ni_dvp);
1477 else
1478 vput(ndp->ni_dvp);
1479 vput(vp);
1480 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1481 nfsvno_relpathbuf(ndp);
1482 NFSEXITCODE(error);
1483 return (error);
1484 }
1485
1486 /*
1487 * Remove a directory.
1488 */
1489 int
nfsvno_rmdirsub(struct nameidata * ndp,int is_v4,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1490 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1491 struct thread *p, struct nfsexstuff *exp)
1492 {
1493 struct vnode *vp;
1494 int error = 0;
1495
1496 vp = ndp->ni_vp;
1497 if (vp->v_type != VDIR) {
1498 error = ENOTDIR;
1499 goto out;
1500 }
1501 /*
1502 * No rmdir "." please.
1503 */
1504 if (ndp->ni_dvp == vp) {
1505 error = EINVAL;
1506 goto out;
1507 }
1508 /*
1509 * The root of a mounted filesystem cannot be deleted.
1510 */
1511 if (vp->v_vflag & VV_ROOT)
1512 error = EBUSY;
1513 out:
1514 if (!error)
1515 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1516 if (ndp->ni_dvp == vp)
1517 vrele(ndp->ni_dvp);
1518 else
1519 vput(ndp->ni_dvp);
1520 vput(vp);
1521 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1522 nfsvno_relpathbuf(ndp);
1523 NFSEXITCODE(error);
1524 return (error);
1525 }
1526
1527 /*
1528 * Rename vnode op.
1529 */
1530 int
nfsvno_rename(struct nameidata * fromndp,struct nameidata * tondp,u_int32_t ndstat,u_int32_t ndflag,struct ucred * cred,struct thread * p)1531 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1532 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1533 {
1534 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
1535 int error = 0, mirrorcnt;
1536 char fname[PNFS_FILENAME_LEN + 1];
1537 fhandle_t fh;
1538
1539 dsdvp[0] = NULL;
1540 fvp = fromndp->ni_vp;
1541 if (ndstat) {
1542 vrele(fromndp->ni_dvp);
1543 vrele(fvp);
1544 error = ndstat;
1545 goto out1;
1546 }
1547 tdvp = tondp->ni_dvp;
1548 tvp = tondp->ni_vp;
1549 if (tvp != NULL) {
1550 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1551 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1552 goto out;
1553 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1554 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1555 goto out;
1556 }
1557 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1558 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1559 goto out;
1560 }
1561
1562 /*
1563 * A rename to '.' or '..' results in a prematurely
1564 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1565 * here.
1566 */
1567 if ((tondp->ni_cnd.cn_namelen == 1 &&
1568 tondp->ni_cnd.cn_nameptr[0] == '.') ||
1569 (tondp->ni_cnd.cn_namelen == 2 &&
1570 tondp->ni_cnd.cn_nameptr[0] == '.' &&
1571 tondp->ni_cnd.cn_nameptr[1] == '.')) {
1572 error = EINVAL;
1573 goto out;
1574 }
1575 }
1576 if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1577 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1578 goto out;
1579 }
1580 if (fvp->v_mount != tdvp->v_mount) {
1581 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1582 goto out;
1583 }
1584 if (fvp == tdvp) {
1585 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1586 goto out;
1587 }
1588 if (fvp == tvp) {
1589 /*
1590 * If source and destination are the same, there is nothing to
1591 * do. Set error to -1 to indicate this.
1592 */
1593 error = -1;
1594 goto out;
1595 }
1596 if (ndflag & ND_NFSV4) {
1597 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
1598 error = nfsrv_checkremove(fvp, 0, NULL,
1599 (nfsquad_t)((u_quad_t)0), p);
1600 NFSVOPUNLOCK(fvp);
1601 } else
1602 error = EPERM;
1603 if (tvp && !error)
1604 error = nfsrv_checkremove(tvp, 1, NULL,
1605 (nfsquad_t)((u_quad_t)0), p);
1606 } else {
1607 /*
1608 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1609 * that the NFSv4 client won't be confused by the rename.
1610 * Since nfsd_recalldelegation() can only be called on an
1611 * unlocked vnode at this point and fvp is the file that will
1612 * still exist after the rename, just do fvp.
1613 */
1614 nfsd_recalldelegation(fvp, p);
1615 }
1616 if (error == 0 && tvp != NULL) {
1617 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
1618 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
1619 " dsdvp=%p\n", dsdvp[0]);
1620 }
1621 out:
1622 if (!error) {
1623 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1624 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1625 &tondp->ni_cnd);
1626 } else {
1627 if (tdvp == tvp)
1628 vrele(tdvp);
1629 else
1630 vput(tdvp);
1631 if (tvp)
1632 vput(tvp);
1633 vrele(fromndp->ni_dvp);
1634 vrele(fvp);
1635 if (error == -1)
1636 error = 0;
1637 }
1638
1639 /*
1640 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and
1641 * if the rename succeeded, the DS file for the tvp needs to be
1642 * removed.
1643 */
1644 if (error == 0 && dsdvp[0] != NULL) {
1645 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
1646 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
1647 }
1648
1649 vrele(tondp->ni_startdir);
1650 nfsvno_relpathbuf(tondp);
1651 out1:
1652 vrele(fromndp->ni_startdir);
1653 nfsvno_relpathbuf(fromndp);
1654 NFSEXITCODE(error);
1655 return (error);
1656 }
1657
1658 /*
1659 * Link vnode op.
1660 */
1661 int
nfsvno_link(struct nameidata * ndp,struct vnode * vp,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)1662 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1663 struct thread *p, struct nfsexstuff *exp)
1664 {
1665 struct vnode *xp;
1666 int error = 0;
1667
1668 xp = ndp->ni_vp;
1669 if (xp != NULL) {
1670 error = EEXIST;
1671 } else {
1672 xp = ndp->ni_dvp;
1673 if (vp->v_mount != xp->v_mount)
1674 error = EXDEV;
1675 }
1676 if (!error) {
1677 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1678 if (!VN_IS_DOOMED(vp))
1679 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1680 else
1681 error = EPERM;
1682 if (ndp->ni_dvp == vp) {
1683 vrele(ndp->ni_dvp);
1684 NFSVOPUNLOCK(vp);
1685 } else {
1686 vref(vp);
1687 VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true);
1688 }
1689 } else {
1690 if (ndp->ni_dvp == ndp->ni_vp)
1691 vrele(ndp->ni_dvp);
1692 else
1693 vput(ndp->ni_dvp);
1694 if (ndp->ni_vp)
1695 vrele(ndp->ni_vp);
1696 }
1697 nfsvno_relpathbuf(ndp);
1698 NFSEXITCODE(error);
1699 return (error);
1700 }
1701
1702 /*
1703 * Do the fsync() appropriate for the commit.
1704 */
1705 int
nfsvno_fsync(struct vnode * vp,u_int64_t off,int cnt,struct ucred * cred,struct thread * td)1706 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1707 struct thread *td)
1708 {
1709 int error = 0;
1710
1711 /*
1712 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
1713 * file is done. At this time VOP_FSYNC does not accept offset and
1714 * byte count parameters so call VOP_FSYNC the whole file for now.
1715 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
1716 * File systems that do not use the buffer cache (as indicated
1717 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
1718 */
1719 if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
1720 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
1721 /*
1722 * Give up and do the whole thing
1723 */
1724 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
1725 VM_OBJECT_WLOCK(vp->v_object);
1726 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1727 VM_OBJECT_WUNLOCK(vp->v_object);
1728 }
1729 error = VOP_FSYNC(vp, MNT_WAIT, td);
1730 } else {
1731 /*
1732 * Locate and synchronously write any buffers that fall
1733 * into the requested range. Note: we are assuming that
1734 * f_iosize is a power of 2.
1735 */
1736 int iosize = vp->v_mount->mnt_stat.f_iosize;
1737 int iomask = iosize - 1;
1738 struct bufobj *bo;
1739 daddr_t lblkno;
1740
1741 /*
1742 * Align to iosize boundary, super-align to page boundary.
1743 */
1744 if (off & iomask) {
1745 cnt += off & iomask;
1746 off &= ~(u_quad_t)iomask;
1747 }
1748 if (off & PAGE_MASK) {
1749 cnt += off & PAGE_MASK;
1750 off &= ~(u_quad_t)PAGE_MASK;
1751 }
1752 lblkno = off / iosize;
1753
1754 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
1755 VM_OBJECT_WLOCK(vp->v_object);
1756 vm_object_page_clean(vp->v_object, off, off + cnt,
1757 OBJPC_SYNC);
1758 VM_OBJECT_WUNLOCK(vp->v_object);
1759 }
1760
1761 bo = &vp->v_bufobj;
1762 BO_LOCK(bo);
1763 while (cnt > 0) {
1764 struct buf *bp;
1765
1766 /*
1767 * If we have a buffer and it is marked B_DELWRI we
1768 * have to lock and write it. Otherwise the prior
1769 * write is assumed to have already been committed.
1770 *
1771 * gbincore() can return invalid buffers now so we
1772 * have to check that bit as well (though B_DELWRI
1773 * should not be set if B_INVAL is set there could be
1774 * a race here since we haven't locked the buffer).
1775 */
1776 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1777 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1778 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
1779 BO_LOCK(bo);
1780 continue; /* retry */
1781 }
1782 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1783 B_DELWRI) {
1784 bremfree(bp);
1785 bp->b_flags &= ~B_ASYNC;
1786 bwrite(bp);
1787 ++nfs_commit_miss;
1788 } else
1789 BUF_UNLOCK(bp);
1790 BO_LOCK(bo);
1791 }
1792 ++nfs_commit_blks;
1793 if (cnt < iosize)
1794 break;
1795 cnt -= iosize;
1796 ++lblkno;
1797 }
1798 BO_UNLOCK(bo);
1799 }
1800 NFSEXITCODE(error);
1801 return (error);
1802 }
1803
1804 /*
1805 * Statfs vnode op.
1806 */
1807 int
nfsvno_statfs(struct vnode * vp,struct statfs * sf)1808 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1809 {
1810 struct statfs *tsf;
1811 int error;
1812
1813 tsf = NULL;
1814 if (nfsrv_devidcnt > 0) {
1815 /* For a pNFS service, get the DS numbers. */
1816 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO);
1817 error = nfsrv_pnfsstatfs(tsf, vp->v_mount);
1818 if (error != 0) {
1819 free(tsf, M_TEMP);
1820 tsf = NULL;
1821 }
1822 }
1823 error = VFS_STATFS(vp->v_mount, sf);
1824 if (error == 0) {
1825 if (tsf != NULL) {
1826 sf->f_blocks = tsf->f_blocks;
1827 sf->f_bavail = tsf->f_bavail;
1828 sf->f_bfree = tsf->f_bfree;
1829 sf->f_bsize = tsf->f_bsize;
1830 }
1831 /*
1832 * Since NFS handles these values as unsigned on the
1833 * wire, there is no way to represent negative values,
1834 * so set them to 0. Without this, they will appear
1835 * to be very large positive values for clients like
1836 * Solaris10.
1837 */
1838 if (sf->f_bavail < 0)
1839 sf->f_bavail = 0;
1840 if (sf->f_ffree < 0)
1841 sf->f_ffree = 0;
1842 }
1843 free(tsf, M_TEMP);
1844 NFSEXITCODE(error);
1845 return (error);
1846 }
1847
1848 /*
1849 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1850 * must handle nfsrv_opencheck() calls after any other access checks.
1851 */
1852 void
nfsvno_open(struct nfsrv_descript * nd,struct nameidata * ndp,nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsstate * stp,int * exclusive_flagp,struct nfsvattr * nvap,int32_t * cverf,int create,NFSACL_T * aclp,nfsattrbit_t * attrbitp,struct ucred * cred,struct nfsexstuff * exp,struct vnode ** vpp)1853 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1854 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1855 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1856 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred,
1857 struct nfsexstuff *exp, struct vnode **vpp)
1858 {
1859 struct vnode *vp = NULL;
1860 u_quad_t tempsize;
1861 struct nfsexstuff nes;
1862 struct thread *p = curthread;
1863
1864 if (ndp->ni_vp == NULL)
1865 nd->nd_repstat = nfsrv_opencheck(clientid,
1866 stateidp, stp, NULL, nd, p, nd->nd_repstat);
1867 if (!nd->nd_repstat) {
1868 if (ndp->ni_vp == NULL) {
1869 vrele(ndp->ni_startdir);
1870 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1871 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1872 /* For a pNFS server, create the data file on a DS. */
1873 if (nd->nd_repstat == 0) {
1874 /*
1875 * Create a data file on a DS for a pNFS server.
1876 * This function just returns if not
1877 * running a pNFS DS or the creation fails.
1878 */
1879 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
1880 cred, p);
1881 }
1882 VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ?
1883 &ndp->ni_vp : NULL, false);
1884 nfsvno_relpathbuf(ndp);
1885 if (!nd->nd_repstat) {
1886 if (*exclusive_flagp) {
1887 *exclusive_flagp = 0;
1888 NFSVNO_ATTRINIT(nvap);
1889 nvap->na_atime.tv_sec = cverf[0];
1890 nvap->na_atime.tv_nsec = cverf[1];
1891 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1892 &nvap->na_vattr, cred);
1893 if (nd->nd_repstat != 0) {
1894 vput(ndp->ni_vp);
1895 ndp->ni_vp = NULL;
1896 nd->nd_repstat = NFSERR_NOTSUPP;
1897 } else
1898 NFSSETBIT_ATTRBIT(attrbitp,
1899 NFSATTRBIT_TIMEACCESS);
1900 } else {
1901 nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1902 aclp, p, attrbitp, exp);
1903 }
1904 }
1905 vp = ndp->ni_vp;
1906 } else {
1907 if (ndp->ni_startdir)
1908 vrele(ndp->ni_startdir);
1909 nfsvno_relpathbuf(ndp);
1910 vp = ndp->ni_vp;
1911 if (create == NFSV4OPEN_CREATE) {
1912 if (ndp->ni_dvp == vp)
1913 vrele(ndp->ni_dvp);
1914 else
1915 vput(ndp->ni_dvp);
1916 }
1917 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1918 if (ndp->ni_cnd.cn_flags & RDONLY)
1919 NFSVNO_SETEXRDONLY(&nes);
1920 else
1921 NFSVNO_EXINIT(&nes);
1922 nd->nd_repstat = nfsvno_accchk(vp,
1923 VWRITE, cred, &nes, p,
1924 NFSACCCHK_NOOVERRIDE,
1925 NFSACCCHK_VPISLOCKED, NULL);
1926 nd->nd_repstat = nfsrv_opencheck(clientid,
1927 stateidp, stp, vp, nd, p, nd->nd_repstat);
1928 if (!nd->nd_repstat) {
1929 tempsize = nvap->na_size;
1930 NFSVNO_ATTRINIT(nvap);
1931 nvap->na_size = tempsize;
1932 nd->nd_repstat = VOP_SETATTR(vp,
1933 &nvap->na_vattr, cred);
1934 }
1935 } else if (vp->v_type == VREG) {
1936 nd->nd_repstat = nfsrv_opencheck(clientid,
1937 stateidp, stp, vp, nd, p, nd->nd_repstat);
1938 }
1939 }
1940 } else {
1941 if (ndp->ni_cnd.cn_flags & HASBUF)
1942 nfsvno_relpathbuf(ndp);
1943 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1944 vrele(ndp->ni_startdir);
1945 if (ndp->ni_dvp == ndp->ni_vp)
1946 vrele(ndp->ni_dvp);
1947 else
1948 vput(ndp->ni_dvp);
1949 if (ndp->ni_vp)
1950 vput(ndp->ni_vp);
1951 }
1952 }
1953 *vpp = vp;
1954
1955 NFSEXITCODE2(0, nd);
1956 }
1957
1958 /*
1959 * Updates the file rev and sets the mtime and ctime
1960 * to the current clock time, returning the va_filerev and va_Xtime
1961 * values.
1962 * Return ESTALE to indicate the vnode is VIRF_DOOMED.
1963 */
1964 int
nfsvno_updfilerev(struct vnode * vp,struct nfsvattr * nvap,struct nfsrv_descript * nd,struct thread * p)1965 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1966 struct nfsrv_descript *nd, struct thread *p)
1967 {
1968 struct vattr va;
1969
1970 VATTR_NULL(&va);
1971 vfs_timestamp(&va.va_mtime);
1972 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
1973 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
1974 if (VN_IS_DOOMED(vp))
1975 return (ESTALE);
1976 }
1977 (void) VOP_SETATTR(vp, &va, nd->nd_cred);
1978 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL);
1979 return (0);
1980 }
1981
1982 /*
1983 * Glue routine to nfsv4_fillattr().
1984 */
1985 int
nfsvno_fillattr(struct nfsrv_descript * nd,struct mount * mp,struct vnode * vp,struct nfsvattr * nvap,fhandle_t * fhp,int rderror,nfsattrbit_t * attrbitp,struct ucred * cred,struct thread * p,int isdgram,int reterr,int supports_nfsv4acls,int at_root,uint64_t mounted_on_fileno)1986 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
1987 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1988 struct ucred *cred, struct thread *p, int isdgram, int reterr,
1989 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
1990 {
1991 struct statfs *sf;
1992 int error;
1993
1994 sf = NULL;
1995 if (nfsrv_devidcnt > 0 &&
1996 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) ||
1997 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) ||
1998 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) {
1999 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO);
2000 error = nfsrv_pnfsstatfs(sf, mp);
2001 if (error != 0) {
2002 free(sf, M_TEMP);
2003 sf = NULL;
2004 }
2005 }
2006 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
2007 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
2008 mounted_on_fileno, sf);
2009 free(sf, M_TEMP);
2010 NFSEXITCODE2(0, nd);
2011 return (error);
2012 }
2013
2014 /* Since the Readdir vnode ops vary, put the entire functions in here. */
2015 /*
2016 * nfs readdir service
2017 * - mallocs what it thinks is enough to read
2018 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
2019 * - calls VOP_READDIR()
2020 * - loops around building the reply
2021 * if the output generated exceeds count break out of loop
2022 * The NFSM_CLGET macro is used here so that the reply will be packed
2023 * tightly in mbuf clusters.
2024 * - it trims out records with d_fileno == 0
2025 * this doesn't matter for Unix clients, but they might confuse clients
2026 * for other os'.
2027 * - it trims out records with d_type == DT_WHT
2028 * these cannot be seen through NFS (unless we extend the protocol)
2029 * The alternate call nfsrvd_readdirplus() does lookups as well.
2030 * PS: The NFS protocol spec. does not clarify what the "count" byte
2031 * argument is a count of.. just name strings and file id's or the
2032 * entire reply rpc or ...
2033 * I tried just file name and id sizes and it confused the Sun client,
2034 * so I am using the full rpc size now. The "paranoia.." comment refers
2035 * to including the status longwords that are not a part of the dir.
2036 * "entry" structures, but are in the rpc.
2037 */
2038 int
nfsrvd_readdir(struct nfsrv_descript * nd,int isdgram,struct vnode * vp,struct nfsexstuff * exp)2039 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
2040 struct vnode *vp, struct nfsexstuff *exp)
2041 {
2042 struct dirent *dp;
2043 u_int32_t *tl;
2044 int dirlen;
2045 char *cpos, *cend, *rbuf;
2046 struct nfsvattr at;
2047 int nlen, error = 0, getret = 1;
2048 int siz, cnt, fullsiz, eofflag, ncookies;
2049 u_int64_t off, toff, verf __unused;
2050 u_long *cookies = NULL, *cookiep;
2051 struct uio io;
2052 struct iovec iv;
2053 int is_ufs;
2054 struct thread *p = curthread;
2055
2056 if (nd->nd_repstat) {
2057 nfsrv_postopattr(nd, getret, &at);
2058 goto out;
2059 }
2060 if (nd->nd_flag & ND_NFSV2) {
2061 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2062 off = fxdr_unsigned(u_quad_t, *tl++);
2063 } else {
2064 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2065 off = fxdr_hyper(tl);
2066 tl += 2;
2067 verf = fxdr_hyper(tl);
2068 tl += 2;
2069 }
2070 toff = off;
2071 cnt = fxdr_unsigned(int, *tl);
2072 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
2073 cnt = NFS_SRVMAXDATA(nd);
2074 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2075 fullsiz = siz;
2076 if (nd->nd_flag & ND_NFSV3) {
2077 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1,
2078 NULL);
2079 #if 0
2080 /*
2081 * va_filerev is not sufficient as a cookie verifier,
2082 * since it is not supposed to change when entries are
2083 * removed/added unless that offset cookies returned to
2084 * the client are no longer valid.
2085 */
2086 if (!nd->nd_repstat && toff && verf != at.na_filerev)
2087 nd->nd_repstat = NFSERR_BAD_COOKIE;
2088 #endif
2089 }
2090 if (!nd->nd_repstat && vp->v_type != VDIR)
2091 nd->nd_repstat = NFSERR_NOTDIR;
2092 if (nd->nd_repstat == 0 && cnt == 0) {
2093 if (nd->nd_flag & ND_NFSV2)
2094 /* NFSv2 does not have NFSERR_TOOSMALL */
2095 nd->nd_repstat = EPERM;
2096 else
2097 nd->nd_repstat = NFSERR_TOOSMALL;
2098 }
2099 if (!nd->nd_repstat)
2100 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
2101 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
2102 NFSACCCHK_VPISLOCKED, NULL);
2103 if (nd->nd_repstat) {
2104 vput(vp);
2105 if (nd->nd_flag & ND_NFSV3)
2106 nfsrv_postopattr(nd, getret, &at);
2107 goto out;
2108 }
2109 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
2110 rbuf = malloc(siz, M_TEMP, M_WAITOK);
2111 again:
2112 eofflag = 0;
2113 if (cookies) {
2114 free(cookies, M_TEMP);
2115 cookies = NULL;
2116 }
2117
2118 iv.iov_base = rbuf;
2119 iv.iov_len = siz;
2120 io.uio_iov = &iv;
2121 io.uio_iovcnt = 1;
2122 io.uio_offset = (off_t)off;
2123 io.uio_resid = siz;
2124 io.uio_segflg = UIO_SYSSPACE;
2125 io.uio_rw = UIO_READ;
2126 io.uio_td = NULL;
2127 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
2128 &cookies);
2129 off = (u_int64_t)io.uio_offset;
2130 if (io.uio_resid)
2131 siz -= io.uio_resid;
2132
2133 if (!cookies && !nd->nd_repstat)
2134 nd->nd_repstat = NFSERR_PERM;
2135 if (nd->nd_flag & ND_NFSV3) {
2136 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
2137 if (!nd->nd_repstat)
2138 nd->nd_repstat = getret;
2139 }
2140
2141 /*
2142 * Handles the failed cases. nd->nd_repstat == 0 past here.
2143 */
2144 if (nd->nd_repstat) {
2145 vput(vp);
2146 free(rbuf, M_TEMP);
2147 if (cookies)
2148 free(cookies, M_TEMP);
2149 if (nd->nd_flag & ND_NFSV3)
2150 nfsrv_postopattr(nd, getret, &at);
2151 goto out;
2152 }
2153 /*
2154 * If nothing read, return eof
2155 * rpc reply
2156 */
2157 if (siz == 0) {
2158 vput(vp);
2159 if (nd->nd_flag & ND_NFSV2) {
2160 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2161 } else {
2162 nfsrv_postopattr(nd, getret, &at);
2163 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2164 txdr_hyper(at.na_filerev, tl);
2165 tl += 2;
2166 }
2167 *tl++ = newnfs_false;
2168 *tl = newnfs_true;
2169 free(rbuf, M_TEMP);
2170 free(cookies, M_TEMP);
2171 goto out;
2172 }
2173
2174 /*
2175 * Check for degenerate cases of nothing useful read.
2176 * If so go try again
2177 */
2178 cpos = rbuf;
2179 cend = rbuf + siz;
2180 dp = (struct dirent *)cpos;
2181 cookiep = cookies;
2182
2183 /*
2184 * For some reason FreeBSD's ufs_readdir() chooses to back the
2185 * directory offset up to a block boundary, so it is necessary to
2186 * skip over the records that precede the requested offset. This
2187 * requires the assumption that file offset cookies monotonically
2188 * increase.
2189 */
2190 while (cpos < cend && ncookies > 0 &&
2191 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
2192 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
2193 cpos += dp->d_reclen;
2194 dp = (struct dirent *)cpos;
2195 cookiep++;
2196 ncookies--;
2197 }
2198 if (cpos >= cend || ncookies == 0) {
2199 siz = fullsiz;
2200 toff = off;
2201 goto again;
2202 }
2203 vput(vp);
2204
2205 /*
2206 * If cnt > MCLBYTES and the reply will not be saved, use
2207 * ext_pgs mbufs for TLS.
2208 * For NFSv4.0, we do not know for sure if the reply will
2209 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
2210 */
2211 if (cnt > MCLBYTES && siz > MCLBYTES &&
2212 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
2213 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
2214 nd->nd_flag |= ND_EXTPG;
2215
2216 /*
2217 * dirlen is the size of the reply, including all XDR and must
2218 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
2219 * if the XDR should be included in "count", but to be safe, we do.
2220 * (Include the two booleans at the end of the reply in dirlen now.)
2221 */
2222 if (nd->nd_flag & ND_NFSV3) {
2223 nfsrv_postopattr(nd, getret, &at);
2224 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2225 txdr_hyper(at.na_filerev, tl);
2226 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
2227 } else {
2228 dirlen = 2 * NFSX_UNSIGNED;
2229 }
2230
2231 /* Loop through the records and build reply */
2232 while (cpos < cend && ncookies > 0) {
2233 nlen = dp->d_namlen;
2234 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
2235 nlen <= NFS_MAXNAMLEN) {
2236 if (nd->nd_flag & ND_NFSV3)
2237 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
2238 else
2239 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
2240 if (dirlen > cnt) {
2241 eofflag = 0;
2242 break;
2243 }
2244
2245 /*
2246 * Build the directory record xdr from
2247 * the dirent entry.
2248 */
2249 if (nd->nd_flag & ND_NFSV3) {
2250 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2251 *tl++ = newnfs_true;
2252 *tl++ = 0;
2253 } else {
2254 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2255 *tl++ = newnfs_true;
2256 }
2257 *tl = txdr_unsigned(dp->d_fileno);
2258 (void) nfsm_strtom(nd, dp->d_name, nlen);
2259 if (nd->nd_flag & ND_NFSV3) {
2260 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2261 txdr_hyper(*cookiep, tl);
2262 } else {
2263 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2264 *tl = txdr_unsigned(*cookiep);
2265 }
2266 }
2267 cpos += dp->d_reclen;
2268 dp = (struct dirent *)cpos;
2269 cookiep++;
2270 ncookies--;
2271 }
2272 if (cpos < cend)
2273 eofflag = 0;
2274 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2275 *tl++ = newnfs_false;
2276 if (eofflag)
2277 *tl = newnfs_true;
2278 else
2279 *tl = newnfs_false;
2280 free(rbuf, M_TEMP);
2281 free(cookies, M_TEMP);
2282
2283 out:
2284 NFSEXITCODE2(0, nd);
2285 return (0);
2286 nfsmout:
2287 vput(vp);
2288 NFSEXITCODE2(error, nd);
2289 return (error);
2290 }
2291
2292 /*
2293 * Readdirplus for V3 and Readdir for V4.
2294 */
2295 int
nfsrvd_readdirplus(struct nfsrv_descript * nd,int isdgram,struct vnode * vp,struct nfsexstuff * exp)2296 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
2297 struct vnode *vp, struct nfsexstuff *exp)
2298 {
2299 struct dirent *dp;
2300 u_int32_t *tl;
2301 int dirlen;
2302 char *cpos, *cend, *rbuf;
2303 struct vnode *nvp;
2304 fhandle_t nfh;
2305 struct nfsvattr nva, at, *nvap = &nva;
2306 struct mbuf *mb0, *mb1;
2307 struct nfsreferral *refp;
2308 int nlen, r, error = 0, getret = 1, usevget = 1;
2309 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
2310 caddr_t bpos0, bpos1;
2311 u_int64_t off, toff, verf;
2312 u_long *cookies = NULL, *cookiep;
2313 nfsattrbit_t attrbits, rderrbits, savbits;
2314 struct uio io;
2315 struct iovec iv;
2316 struct componentname cn;
2317 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
2318 struct mount *mp, *new_mp;
2319 uint64_t mounted_on_fileno;
2320 struct thread *p = curthread;
2321 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1;
2322
2323 if (nd->nd_repstat) {
2324 nfsrv_postopattr(nd, getret, &at);
2325 goto out;
2326 }
2327 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
2328 off = fxdr_hyper(tl);
2329 toff = off;
2330 tl += 2;
2331 verf = fxdr_hyper(tl);
2332 tl += 2;
2333 siz = fxdr_unsigned(int, *tl++);
2334 cnt = fxdr_unsigned(int, *tl);
2335
2336 /*
2337 * Use the server's maximum data transfer size as the upper bound
2338 * on reply datalen.
2339 */
2340 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
2341 cnt = NFS_SRVMAXDATA(nd);
2342
2343 /*
2344 * siz is a "hint" of how much directory information (name, fileid,
2345 * cookie) should be in the reply. At least one client "hints" 0,
2346 * so I set it to cnt for that case. I also round it up to the
2347 * next multiple of DIRBLKSIZ.
2348 * Since the size of a Readdirplus directory entry reply will always
2349 * be greater than a directory entry returned by VOP_READDIR(), it
2350 * does not make sense to read more than NFS_SRVMAXDATA() via
2351 * VOP_READDIR().
2352 */
2353 if (siz <= 0)
2354 siz = cnt;
2355 else if (siz > NFS_SRVMAXDATA(nd))
2356 siz = NFS_SRVMAXDATA(nd);
2357 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2358
2359 if (nd->nd_flag & ND_NFSV4) {
2360 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2361 if (error)
2362 goto nfsmout;
2363 NFSSET_ATTRBIT(&savbits, &attrbits);
2364 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd);
2365 NFSZERO_ATTRBIT(&rderrbits);
2366 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
2367 } else {
2368 NFSZERO_ATTRBIT(&attrbits);
2369 }
2370 fullsiz = siz;
2371 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
2372 #if 0
2373 if (!nd->nd_repstat) {
2374 if (off && verf != at.na_filerev) {
2375 /*
2376 * va_filerev is not sufficient as a cookie verifier,
2377 * since it is not supposed to change when entries are
2378 * removed/added unless that offset cookies returned to
2379 * the client are no longer valid.
2380 */
2381 if (nd->nd_flag & ND_NFSV4) {
2382 nd->nd_repstat = NFSERR_NOTSAME;
2383 } else {
2384 nd->nd_repstat = NFSERR_BAD_COOKIE;
2385 }
2386 }
2387 }
2388 #endif
2389 if (!nd->nd_repstat && vp->v_type != VDIR)
2390 nd->nd_repstat = NFSERR_NOTDIR;
2391 if (!nd->nd_repstat && cnt == 0)
2392 nd->nd_repstat = NFSERR_TOOSMALL;
2393 if (!nd->nd_repstat)
2394 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
2395 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
2396 NFSACCCHK_VPISLOCKED, NULL);
2397 if (nd->nd_repstat) {
2398 vput(vp);
2399 if (nd->nd_flag & ND_NFSV3)
2400 nfsrv_postopattr(nd, getret, &at);
2401 goto out;
2402 }
2403 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
2404 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
2405
2406 rbuf = malloc(siz, M_TEMP, M_WAITOK);
2407 again:
2408 eofflag = 0;
2409 if (cookies) {
2410 free(cookies, M_TEMP);
2411 cookies = NULL;
2412 }
2413
2414 iv.iov_base = rbuf;
2415 iv.iov_len = siz;
2416 io.uio_iov = &iv;
2417 io.uio_iovcnt = 1;
2418 io.uio_offset = (off_t)off;
2419 io.uio_resid = siz;
2420 io.uio_segflg = UIO_SYSSPACE;
2421 io.uio_rw = UIO_READ;
2422 io.uio_td = NULL;
2423 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
2424 &cookies);
2425 off = (u_int64_t)io.uio_offset;
2426 if (io.uio_resid)
2427 siz -= io.uio_resid;
2428
2429 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
2430
2431 if (!cookies && !nd->nd_repstat)
2432 nd->nd_repstat = NFSERR_PERM;
2433 if (!nd->nd_repstat)
2434 nd->nd_repstat = getret;
2435 if (nd->nd_repstat) {
2436 vput(vp);
2437 if (cookies)
2438 free(cookies, M_TEMP);
2439 free(rbuf, M_TEMP);
2440 if (nd->nd_flag & ND_NFSV3)
2441 nfsrv_postopattr(nd, getret, &at);
2442 goto out;
2443 }
2444 /*
2445 * If nothing read, return eof
2446 * rpc reply
2447 */
2448 if (siz == 0) {
2449 vput(vp);
2450 if (nd->nd_flag & ND_NFSV3)
2451 nfsrv_postopattr(nd, getret, &at);
2452 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2453 txdr_hyper(at.na_filerev, tl);
2454 tl += 2;
2455 *tl++ = newnfs_false;
2456 *tl = newnfs_true;
2457 free(cookies, M_TEMP);
2458 free(rbuf, M_TEMP);
2459 goto out;
2460 }
2461
2462 /*
2463 * Check for degenerate cases of nothing useful read.
2464 * If so go try again
2465 */
2466 cpos = rbuf;
2467 cend = rbuf + siz;
2468 dp = (struct dirent *)cpos;
2469 cookiep = cookies;
2470
2471 /*
2472 * For some reason FreeBSD's ufs_readdir() chooses to back the
2473 * directory offset up to a block boundary, so it is necessary to
2474 * skip over the records that precede the requested offset. This
2475 * requires the assumption that file offset cookies monotonically
2476 * increase.
2477 */
2478 while (cpos < cend && ncookies > 0 &&
2479 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
2480 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
2481 ((nd->nd_flag & ND_NFSV4) &&
2482 ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
2483 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
2484 cpos += dp->d_reclen;
2485 dp = (struct dirent *)cpos;
2486 cookiep++;
2487 ncookies--;
2488 }
2489 if (cpos >= cend || ncookies == 0) {
2490 siz = fullsiz;
2491 toff = off;
2492 goto again;
2493 }
2494
2495 /*
2496 * Busy the file system so that the mount point won't go away
2497 * and, as such, VFS_VGET() can be used safely.
2498 */
2499 mp = vp->v_mount;
2500 vfs_ref(mp);
2501 NFSVOPUNLOCK(vp);
2502 nd->nd_repstat = vfs_busy(mp, 0);
2503 vfs_rel(mp);
2504 if (nd->nd_repstat != 0) {
2505 vrele(vp);
2506 free(cookies, M_TEMP);
2507 free(rbuf, M_TEMP);
2508 if (nd->nd_flag & ND_NFSV3)
2509 nfsrv_postopattr(nd, getret, &at);
2510 goto out;
2511 }
2512
2513 /*
2514 * Check to see if entries in this directory can be safely acquired
2515 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
2516 * ZFS snapshot directories need VOP_LOOKUP(), so that any
2517 * automount of the snapshot directory that is required will
2518 * be done.
2519 * This needs to be done here for NFSv4, since NFSv4 never does
2520 * a VFS_VGET() for "." or "..".
2521 */
2522 if (is_zfs == 1) {
2523 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
2524 if (r == EOPNOTSUPP) {
2525 usevget = 0;
2526 cn.cn_nameiop = LOOKUP;
2527 cn.cn_lkflags = LK_SHARED | LK_RETRY;
2528 cn.cn_cred = nd->nd_cred;
2529 cn.cn_thread = p;
2530 } else if (r == 0)
2531 vput(nvp);
2532 }
2533
2534 /*
2535 * If the reply is likely to exceed MCLBYTES and the reply will
2536 * not be saved, use ext_pgs mbufs for TLS.
2537 * It is difficult to predict how large each entry will be and
2538 * how many entries have been read, so just assume the directory
2539 * entries grow by a factor of 4 when attributes are included.
2540 * For NFSv4.0, we do not know for sure if the reply will
2541 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
2542 */
2543 if (cnt > MCLBYTES && siz > MCLBYTES / 4 &&
2544 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
2545 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
2546 nd->nd_flag |= ND_EXTPG;
2547
2548 /*
2549 * Save this position, in case there is an error before one entry
2550 * is created.
2551 */
2552 mb0 = nd->nd_mb;
2553 bpos0 = nd->nd_bpos;
2554 bextpg0 = nd->nd_bextpg;
2555 bextpgsiz0 = nd->nd_bextpgsiz;
2556
2557 /*
2558 * Fill in the first part of the reply.
2559 * dirlen is the reply length in bytes and cannot exceed cnt.
2560 * (Include the two booleans at the end of the reply in dirlen now,
2561 * so we recognize when we have exceeded cnt.)
2562 */
2563 if (nd->nd_flag & ND_NFSV3) {
2564 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
2565 nfsrv_postopattr(nd, getret, &at);
2566 } else {
2567 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
2568 }
2569 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2570 txdr_hyper(at.na_filerev, tl);
2571
2572 /*
2573 * Save this position, in case there is an empty reply needed.
2574 */
2575 mb1 = nd->nd_mb;
2576 bpos1 = nd->nd_bpos;
2577 bextpg1 = nd->nd_bextpg;
2578 bextpgsiz1 = nd->nd_bextpgsiz;
2579
2580 /* Loop through the records and build reply */
2581 entrycnt = 0;
2582 while (cpos < cend && ncookies > 0 && dirlen < cnt) {
2583 nlen = dp->d_namlen;
2584 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
2585 nlen <= NFS_MAXNAMLEN &&
2586 ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
2587 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
2588 || (nlen == 1 && dp->d_name[0] != '.'))) {
2589 /*
2590 * Save the current position in the reply, in case
2591 * this entry exceeds cnt.
2592 */
2593 mb1 = nd->nd_mb;
2594 bpos1 = nd->nd_bpos;
2595 bextpg1 = nd->nd_bextpg;
2596 bextpgsiz1 = nd->nd_bextpgsiz;
2597
2598 /*
2599 * For readdir_and_lookup get the vnode using
2600 * the file number.
2601 */
2602 nvp = NULL;
2603 refp = NULL;
2604 r = 0;
2605 at_root = 0;
2606 needs_unbusy = 0;
2607 new_mp = mp;
2608 mounted_on_fileno = (uint64_t)dp->d_fileno;
2609 if ((nd->nd_flag & ND_NFSV3) ||
2610 NFSNONZERO_ATTRBIT(&savbits)) {
2611 if (nd->nd_flag & ND_NFSV4)
2612 refp = nfsv4root_getreferral(NULL,
2613 vp, dp->d_fileno);
2614 if (refp == NULL) {
2615 if (usevget)
2616 r = VFS_VGET(mp, dp->d_fileno,
2617 LK_SHARED, &nvp);
2618 else
2619 r = EOPNOTSUPP;
2620 if (r == EOPNOTSUPP) {
2621 if (usevget) {
2622 usevget = 0;
2623 cn.cn_nameiop = LOOKUP;
2624 cn.cn_lkflags =
2625 LK_SHARED |
2626 LK_RETRY;
2627 cn.cn_cred =
2628 nd->nd_cred;
2629 cn.cn_thread = p;
2630 }
2631 cn.cn_nameptr = dp->d_name;
2632 cn.cn_namelen = nlen;
2633 cn.cn_flags = ISLASTCN |
2634 NOFOLLOW | LOCKLEAF;
2635 if (nlen == 2 &&
2636 dp->d_name[0] == '.' &&
2637 dp->d_name[1] == '.')
2638 cn.cn_flags |=
2639 ISDOTDOT;
2640 if (NFSVOPLOCK(vp, LK_SHARED)
2641 != 0) {
2642 nd->nd_repstat = EPERM;
2643 break;
2644 }
2645 if ((vp->v_vflag & VV_ROOT) != 0
2646 && (cn.cn_flags & ISDOTDOT)
2647 != 0) {
2648 vref(vp);
2649 nvp = vp;
2650 r = 0;
2651 } else {
2652 r = VOP_LOOKUP(vp, &nvp,
2653 &cn);
2654 if (vp != nvp)
2655 NFSVOPUNLOCK(vp);
2656 }
2657 }
2658
2659 /*
2660 * For NFSv4, check to see if nvp is
2661 * a mount point and get the mount
2662 * point vnode, as required.
2663 */
2664 if (r == 0 &&
2665 nfsrv_enable_crossmntpt != 0 &&
2666 (nd->nd_flag & ND_NFSV4) != 0 &&
2667 nvp->v_type == VDIR &&
2668 nvp->v_mountedhere != NULL) {
2669 new_mp = nvp->v_mountedhere;
2670 r = vfs_busy(new_mp, 0);
2671 vput(nvp);
2672 nvp = NULL;
2673 if (r == 0) {
2674 r = VFS_ROOT(new_mp,
2675 LK_SHARED, &nvp);
2676 needs_unbusy = 1;
2677 if (r == 0)
2678 at_root = 1;
2679 }
2680 }
2681 }
2682
2683 /*
2684 * If we failed to look up the entry, then it
2685 * has become invalid, most likely removed.
2686 */
2687 if (r != 0) {
2688 if (needs_unbusy)
2689 vfs_unbusy(new_mp);
2690 goto invalid;
2691 }
2692 KASSERT(refp != NULL || nvp != NULL,
2693 ("%s: undetected lookup error", __func__));
2694
2695 if (refp == NULL &&
2696 ((nd->nd_flag & ND_NFSV3) ||
2697 NFSNONZERO_ATTRBIT(&attrbits))) {
2698 r = nfsvno_getfh(nvp, &nfh, p);
2699 if (!r)
2700 r = nfsvno_getattr(nvp, nvap, nd, p,
2701 1, &attrbits);
2702 if (r == 0 && is_zfs == 1 &&
2703 nfsrv_enable_crossmntpt != 0 &&
2704 (nd->nd_flag & ND_NFSV4) != 0 &&
2705 nvp->v_type == VDIR &&
2706 vp->v_mount != nvp->v_mount) {
2707 /*
2708 * For a ZFS snapshot, there is a
2709 * pseudo mount that does not set
2710 * v_mountedhere, so it needs to
2711 * be detected via a different
2712 * mount structure.
2713 */
2714 at_root = 1;
2715 if (new_mp == mp)
2716 new_mp = nvp->v_mount;
2717 }
2718 }
2719
2720 /*
2721 * If we failed to get attributes of the entry,
2722 * then just skip it for NFSv3 (the traditional
2723 * behavior in the old NFS server).
2724 * For NFSv4 the behavior is controlled by
2725 * RDATTRERROR: we either ignore the error or
2726 * fail the request.
2727 * Note that RDATTRERROR is never set for NFSv3.
2728 */
2729 if (r != 0) {
2730 if (!NFSISSET_ATTRBIT(&attrbits,
2731 NFSATTRBIT_RDATTRERROR)) {
2732 vput(nvp);
2733 if (needs_unbusy != 0)
2734 vfs_unbusy(new_mp);
2735 if ((nd->nd_flag & ND_NFSV3))
2736 goto invalid;
2737 nd->nd_repstat = r;
2738 break;
2739 }
2740 }
2741 }
2742
2743 /*
2744 * Build the directory record xdr
2745 */
2746 if (nd->nd_flag & ND_NFSV3) {
2747 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2748 *tl++ = newnfs_true;
2749 *tl++ = 0;
2750 *tl = txdr_unsigned(dp->d_fileno);
2751 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2752 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2753 txdr_hyper(*cookiep, tl);
2754 nfsrv_postopattr(nd, 0, nvap);
2755 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
2756 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
2757 if (nvp != NULL)
2758 vput(nvp);
2759 } else {
2760 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2761 *tl++ = newnfs_true;
2762 txdr_hyper(*cookiep, tl);
2763 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2764 if (nvp != NULL) {
2765 supports_nfsv4acls =
2766 nfs_supportsnfsv4acls(nvp);
2767 NFSVOPUNLOCK(nvp);
2768 } else
2769 supports_nfsv4acls = 0;
2770 if (refp != NULL) {
2771 dirlen += nfsrv_putreferralattr(nd,
2772 &savbits, refp, 0,
2773 &nd->nd_repstat);
2774 if (nd->nd_repstat) {
2775 if (nvp != NULL)
2776 vrele(nvp);
2777 if (needs_unbusy != 0)
2778 vfs_unbusy(new_mp);
2779 break;
2780 }
2781 } else if (r) {
2782 dirlen += nfsvno_fillattr(nd, new_mp,
2783 nvp, nvap, &nfh, r, &rderrbits,
2784 nd->nd_cred, p, isdgram, 0,
2785 supports_nfsv4acls, at_root,
2786 mounted_on_fileno);
2787 } else {
2788 dirlen += nfsvno_fillattr(nd, new_mp,
2789 nvp, nvap, &nfh, r, &attrbits,
2790 nd->nd_cred, p, isdgram, 0,
2791 supports_nfsv4acls, at_root,
2792 mounted_on_fileno);
2793 }
2794 if (nvp != NULL)
2795 vrele(nvp);
2796 dirlen += (3 * NFSX_UNSIGNED);
2797 }
2798 if (needs_unbusy != 0)
2799 vfs_unbusy(new_mp);
2800 if (dirlen <= cnt)
2801 entrycnt++;
2802 }
2803 invalid:
2804 cpos += dp->d_reclen;
2805 dp = (struct dirent *)cpos;
2806 cookiep++;
2807 ncookies--;
2808 }
2809 vrele(vp);
2810 vfs_unbusy(mp);
2811
2812 /*
2813 * If dirlen > cnt, we must strip off the last entry. If that
2814 * results in an empty reply, report NFSERR_TOOSMALL.
2815 */
2816 if (dirlen > cnt || nd->nd_repstat) {
2817 if (!nd->nd_repstat && entrycnt == 0)
2818 nd->nd_repstat = NFSERR_TOOSMALL;
2819 if (nd->nd_repstat) {
2820 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0);
2821 if (nd->nd_flag & ND_NFSV3)
2822 nfsrv_postopattr(nd, getret, &at);
2823 } else
2824 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1);
2825 eofflag = 0;
2826 } else if (cpos < cend)
2827 eofflag = 0;
2828 if (!nd->nd_repstat) {
2829 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2830 *tl++ = newnfs_false;
2831 if (eofflag)
2832 *tl = newnfs_true;
2833 else
2834 *tl = newnfs_false;
2835 }
2836 free(cookies, M_TEMP);
2837 free(rbuf, M_TEMP);
2838
2839 out:
2840 NFSEXITCODE2(0, nd);
2841 return (0);
2842 nfsmout:
2843 vput(vp);
2844 NFSEXITCODE2(error, nd);
2845 return (error);
2846 }
2847
2848 /*
2849 * Get the settable attributes out of the mbuf list.
2850 * (Return 0 or EBADRPC)
2851 */
2852 int
nfsrv_sattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSACL_T * aclp,struct thread * p)2853 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
2854 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2855 {
2856 u_int32_t *tl;
2857 struct nfsv2_sattr *sp;
2858 int error = 0, toclient = 0;
2859
2860 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2861 case ND_NFSV2:
2862 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2863 /*
2864 * Some old clients didn't fill in the high order 16bits.
2865 * --> check the low order 2 bytes for 0xffff
2866 */
2867 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2868 nvap->na_mode = nfstov_mode(sp->sa_mode);
2869 if (sp->sa_uid != newnfs_xdrneg1)
2870 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2871 if (sp->sa_gid != newnfs_xdrneg1)
2872 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2873 if (sp->sa_size != newnfs_xdrneg1)
2874 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2875 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2876 #ifdef notyet
2877 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2878 #else
2879 nvap->na_atime.tv_sec =
2880 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2881 nvap->na_atime.tv_nsec = 0;
2882 #endif
2883 }
2884 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2885 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2886 break;
2887 case ND_NFSV3:
2888 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2889 if (*tl == newnfs_true) {
2890 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2891 nvap->na_mode = nfstov_mode(*tl);
2892 }
2893 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2894 if (*tl == newnfs_true) {
2895 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2896 nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2897 }
2898 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2899 if (*tl == newnfs_true) {
2900 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2901 nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2902 }
2903 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2904 if (*tl == newnfs_true) {
2905 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2906 nvap->na_size = fxdr_hyper(tl);
2907 }
2908 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2909 switch (fxdr_unsigned(int, *tl)) {
2910 case NFSV3SATTRTIME_TOCLIENT:
2911 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2912 fxdr_nfsv3time(tl, &nvap->na_atime);
2913 toclient = 1;
2914 break;
2915 case NFSV3SATTRTIME_TOSERVER:
2916 vfs_timestamp(&nvap->na_atime);
2917 nvap->na_vaflags |= VA_UTIMES_NULL;
2918 break;
2919 }
2920 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2921 switch (fxdr_unsigned(int, *tl)) {
2922 case NFSV3SATTRTIME_TOCLIENT:
2923 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2924 fxdr_nfsv3time(tl, &nvap->na_mtime);
2925 nvap->na_vaflags &= ~VA_UTIMES_NULL;
2926 break;
2927 case NFSV3SATTRTIME_TOSERVER:
2928 vfs_timestamp(&nvap->na_mtime);
2929 if (!toclient)
2930 nvap->na_vaflags |= VA_UTIMES_NULL;
2931 break;
2932 }
2933 break;
2934 case ND_NFSV4:
2935 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p);
2936 }
2937 nfsmout:
2938 NFSEXITCODE2(error, nd);
2939 return (error);
2940 }
2941
2942 /*
2943 * Handle the setable attributes for V4.
2944 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2945 */
2946 int
nfsv4_sattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSACL_T * aclp,struct thread * p)2947 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
2948 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2949 {
2950 u_int32_t *tl;
2951 int attrsum = 0;
2952 int i, j;
2953 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2954 int moderet, toclient = 0;
2955 u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2956 uid_t uid;
2957 gid_t gid;
2958 u_short mode, mask; /* Same type as va_mode. */
2959 struct vattr va;
2960
2961 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2962 if (error)
2963 goto nfsmout;
2964 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2965 attrsize = fxdr_unsigned(int, *tl);
2966
2967 /*
2968 * Loop around getting the setable attributes. If an unsupported
2969 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2970 */
2971 if (retnotsup) {
2972 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2973 bitpos = NFSATTRBIT_MAX;
2974 } else {
2975 bitpos = 0;
2976 }
2977 moderet = 0;
2978 for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2979 if (attrsum > attrsize) {
2980 error = NFSERR_BADXDR;
2981 goto nfsmout;
2982 }
2983 if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2984 switch (bitpos) {
2985 case NFSATTRBIT_SIZE:
2986 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2987 if (vp != NULL && vp->v_type != VREG) {
2988 error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
2989 NFSERR_INVAL;
2990 goto nfsmout;
2991 }
2992 nvap->na_size = fxdr_hyper(tl);
2993 attrsum += NFSX_HYPER;
2994 break;
2995 case NFSATTRBIT_ACL:
2996 error = nfsrv_dissectacl(nd, aclp, true, &aceerr,
2997 &aclsize, p);
2998 if (error)
2999 goto nfsmout;
3000 if (aceerr && !nd->nd_repstat)
3001 nd->nd_repstat = aceerr;
3002 attrsum += aclsize;
3003 break;
3004 case NFSATTRBIT_ARCHIVE:
3005 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3006 if (!nd->nd_repstat)
3007 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3008 attrsum += NFSX_UNSIGNED;
3009 break;
3010 case NFSATTRBIT_HIDDEN:
3011 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3012 if (!nd->nd_repstat)
3013 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3014 attrsum += NFSX_UNSIGNED;
3015 break;
3016 case NFSATTRBIT_MIMETYPE:
3017 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3018 i = fxdr_unsigned(int, *tl);
3019 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3020 if (error)
3021 goto nfsmout;
3022 if (!nd->nd_repstat)
3023 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3024 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
3025 break;
3026 case NFSATTRBIT_MODE:
3027 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */
3028 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3029 nvap->na_mode = nfstov_mode(*tl);
3030 attrsum += NFSX_UNSIGNED;
3031 break;
3032 case NFSATTRBIT_OWNER:
3033 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3034 j = fxdr_unsigned(int, *tl);
3035 if (j < 0) {
3036 error = NFSERR_BADXDR;
3037 goto nfsmout;
3038 }
3039 if (j > NFSV4_SMALLSTR)
3040 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
3041 else
3042 cp = namestr;
3043 error = nfsrv_mtostr(nd, cp, j);
3044 if (error) {
3045 if (j > NFSV4_SMALLSTR)
3046 free(cp, M_NFSSTRING);
3047 goto nfsmout;
3048 }
3049 if (!nd->nd_repstat) {
3050 nd->nd_repstat = nfsv4_strtouid(nd, cp, j,
3051 &uid);
3052 if (!nd->nd_repstat)
3053 nvap->na_uid = uid;
3054 }
3055 if (j > NFSV4_SMALLSTR)
3056 free(cp, M_NFSSTRING);
3057 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
3058 break;
3059 case NFSATTRBIT_OWNERGROUP:
3060 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3061 j = fxdr_unsigned(int, *tl);
3062 if (j < 0) {
3063 error = NFSERR_BADXDR;
3064 goto nfsmout;
3065 }
3066 if (j > NFSV4_SMALLSTR)
3067 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
3068 else
3069 cp = namestr;
3070 error = nfsrv_mtostr(nd, cp, j);
3071 if (error) {
3072 if (j > NFSV4_SMALLSTR)
3073 free(cp, M_NFSSTRING);
3074 goto nfsmout;
3075 }
3076 if (!nd->nd_repstat) {
3077 nd->nd_repstat = nfsv4_strtogid(nd, cp, j,
3078 &gid);
3079 if (!nd->nd_repstat)
3080 nvap->na_gid = gid;
3081 }
3082 if (j > NFSV4_SMALLSTR)
3083 free(cp, M_NFSSTRING);
3084 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
3085 break;
3086 case NFSATTRBIT_SYSTEM:
3087 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3088 if (!nd->nd_repstat)
3089 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3090 attrsum += NFSX_UNSIGNED;
3091 break;
3092 case NFSATTRBIT_TIMEACCESSSET:
3093 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3094 attrsum += NFSX_UNSIGNED;
3095 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
3096 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3097 fxdr_nfsv4time(tl, &nvap->na_atime);
3098 toclient = 1;
3099 attrsum += NFSX_V4TIME;
3100 } else {
3101 vfs_timestamp(&nvap->na_atime);
3102 nvap->na_vaflags |= VA_UTIMES_NULL;
3103 }
3104 break;
3105 case NFSATTRBIT_TIMEBACKUP:
3106 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3107 if (!nd->nd_repstat)
3108 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3109 attrsum += NFSX_V4TIME;
3110 break;
3111 case NFSATTRBIT_TIMECREATE:
3112 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3113 fxdr_nfsv4time(tl, &nvap->na_btime);
3114 attrsum += NFSX_V4TIME;
3115 break;
3116 case NFSATTRBIT_TIMEMODIFYSET:
3117 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3118 attrsum += NFSX_UNSIGNED;
3119 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
3120 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3121 fxdr_nfsv4time(tl, &nvap->na_mtime);
3122 nvap->na_vaflags &= ~VA_UTIMES_NULL;
3123 attrsum += NFSX_V4TIME;
3124 } else {
3125 vfs_timestamp(&nvap->na_mtime);
3126 if (!toclient)
3127 nvap->na_vaflags |= VA_UTIMES_NULL;
3128 }
3129 break;
3130 case NFSATTRBIT_MODESETMASKED:
3131 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
3132 mode = fxdr_unsigned(u_short, *tl++);
3133 mask = fxdr_unsigned(u_short, *tl);
3134 /*
3135 * vp == NULL implies an Open/Create operation.
3136 * This attribute can only be used for Setattr and
3137 * only for NFSv4.1 or higher.
3138 * If moderet != 0, a mode attribute has also been
3139 * specified and this attribute cannot be done in the
3140 * same Setattr operation.
3141 */
3142 if ((nd->nd_flag & ND_NFSV41) == 0)
3143 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3144 else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 ||
3145 vp == NULL)
3146 nd->nd_repstat = NFSERR_INVAL;
3147 else if (moderet == 0)
3148 moderet = VOP_GETATTR(vp, &va, nd->nd_cred);
3149 if (moderet == 0)
3150 nvap->na_mode = (mode & mask) |
3151 (va.va_mode & ~mask);
3152 else
3153 nd->nd_repstat = moderet;
3154 attrsum += 2 * NFSX_UNSIGNED;
3155 break;
3156 default:
3157 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3158 /*
3159 * set bitpos so we drop out of the loop.
3160 */
3161 bitpos = NFSATTRBIT_MAX;
3162 break;
3163 }
3164 }
3165
3166 /*
3167 * some clients pad the attrlist, so we need to skip over the
3168 * padding.
3169 */
3170 if (attrsum > attrsize) {
3171 error = NFSERR_BADXDR;
3172 } else {
3173 attrsize = NFSM_RNDUP(attrsize);
3174 if (attrsum < attrsize)
3175 error = nfsm_advance(nd, attrsize - attrsum, -1);
3176 }
3177 nfsmout:
3178 NFSEXITCODE2(error, nd);
3179 return (error);
3180 }
3181
3182 /*
3183 * Check/setup export credentials.
3184 */
3185 int
nfsd_excred(struct nfsrv_descript * nd,struct nfsexstuff * exp,struct ucred * credanon,bool testsec)3186 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
3187 struct ucred *credanon, bool testsec)
3188 {
3189 int error;
3190
3191 /*
3192 * Check/setup credentials.
3193 */
3194 if (nd->nd_flag & ND_GSS)
3195 exp->nes_exflag &= ~MNT_EXPORTANON;
3196
3197 /*
3198 * Check to see if the operation is allowed for this security flavor.
3199 */
3200 error = 0;
3201 if (testsec) {
3202 error = nfsvno_testexp(nd, exp);
3203 if (error != 0)
3204 goto out;
3205 }
3206
3207 /*
3208 * Check to see if the file system is exported V4 only.
3209 */
3210 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
3211 error = NFSERR_PROGNOTV4;
3212 goto out;
3213 }
3214
3215 /*
3216 * Now, map the user credentials.
3217 * (Note that ND_AUTHNONE will only be set for an NFSv3
3218 * Fsinfo RPC. If set for anything else, this code might need
3219 * to change.)
3220 */
3221 if (NFSVNO_EXPORTED(exp)) {
3222 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
3223 NFSVNO_EXPORTANON(exp) ||
3224 (nd->nd_flag & ND_AUTHNONE) != 0) {
3225 nd->nd_cred->cr_uid = credanon->cr_uid;
3226 nd->nd_cred->cr_gid = credanon->cr_gid;
3227 crsetgroups(nd->nd_cred, credanon->cr_ngroups,
3228 credanon->cr_groups);
3229 } else if ((nd->nd_flag & ND_GSS) == 0) {
3230 /*
3231 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
3232 * if there is a replacement credential with a group
3233 * list set up by "nfsuserd -manage-gids".
3234 * If there is no replacement, nfsrv_getgrpscred()
3235 * simply returns its argument.
3236 */
3237 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
3238 }
3239 }
3240
3241 out:
3242 NFSEXITCODE2(error, nd);
3243 return (error);
3244 }
3245
3246 /*
3247 * Check exports.
3248 */
3249 int
nfsvno_checkexp(struct mount * mp,struct sockaddr * nam,struct nfsexstuff * exp,struct ucred ** credp)3250 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
3251 struct ucred **credp)
3252 {
3253 int error;
3254
3255 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
3256 &exp->nes_numsecflavor, exp->nes_secflavors);
3257 if (error) {
3258 if (nfs_rootfhset) {
3259 exp->nes_exflag = 0;
3260 exp->nes_numsecflavor = 0;
3261 error = 0;
3262 }
3263 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
3264 MAXSECFLAVORS) {
3265 printf("nfsvno_checkexp: numsecflavors out of range\n");
3266 exp->nes_numsecflavor = 0;
3267 error = EACCES;
3268 }
3269 NFSEXITCODE(error);
3270 return (error);
3271 }
3272
3273 /*
3274 * Get a vnode for a file handle and export stuff.
3275 */
3276 int
nfsvno_fhtovp(struct mount * mp,fhandle_t * fhp,struct sockaddr * nam,int lktype,struct vnode ** vpp,struct nfsexstuff * exp,struct ucred ** credp)3277 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
3278 int lktype, struct vnode **vpp, struct nfsexstuff *exp,
3279 struct ucred **credp)
3280 {
3281 int error;
3282
3283 *credp = NULL;
3284 exp->nes_numsecflavor = 0;
3285 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
3286 if (error != 0)
3287 /* Make sure the server replies ESTALE to the client. */
3288 error = ESTALE;
3289 if (nam && !error) {
3290 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
3291 &exp->nes_numsecflavor, exp->nes_secflavors);
3292 if (error) {
3293 if (nfs_rootfhset) {
3294 exp->nes_exflag = 0;
3295 exp->nes_numsecflavor = 0;
3296 error = 0;
3297 } else {
3298 vput(*vpp);
3299 }
3300 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
3301 MAXSECFLAVORS) {
3302 printf("nfsvno_fhtovp: numsecflavors out of range\n");
3303 exp->nes_numsecflavor = 0;
3304 error = EACCES;
3305 vput(*vpp);
3306 }
3307 }
3308 NFSEXITCODE(error);
3309 return (error);
3310 }
3311
3312 /*
3313 * nfsd_fhtovp() - convert a fh to a vnode ptr
3314 * - look up fsid in mount list (if not found ret error)
3315 * - get vp and export rights by calling nfsvno_fhtovp()
3316 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
3317 * for AUTH_SYS
3318 * - if mpp != NULL, return the mount point so that it can
3319 * be used for vn_finished_write() by the caller
3320 */
3321 void
nfsd_fhtovp(struct nfsrv_descript * nd,struct nfsrvfh * nfp,int lktype,struct vnode ** vpp,struct nfsexstuff * exp,struct mount ** mpp,int startwrite,int nextop)3322 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
3323 struct vnode **vpp, struct nfsexstuff *exp,
3324 struct mount **mpp, int startwrite, int nextop)
3325 {
3326 struct mount *mp, *mpw;
3327 struct ucred *credanon;
3328 fhandle_t *fhp;
3329 int error;
3330
3331 if (mpp != NULL)
3332 *mpp = NULL;
3333 *vpp = NULL;
3334 fhp = (fhandle_t *)nfp->nfsrvfh_data;
3335 mp = vfs_busyfs(&fhp->fh_fsid);
3336 if (mp == NULL) {
3337 nd->nd_repstat = ESTALE;
3338 goto out;
3339 }
3340
3341 if (startwrite) {
3342 mpw = mp;
3343 error = vn_start_write(NULL, &mpw, V_WAIT);
3344 if (error != 0) {
3345 mpw = NULL;
3346 vfs_unbusy(mp);
3347 nd->nd_repstat = ESTALE;
3348 goto out;
3349 }
3350 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
3351 lktype = LK_EXCLUSIVE;
3352 } else
3353 mpw = NULL;
3354
3355 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
3356 &credanon);
3357 vfs_unbusy(mp);
3358
3359 /*
3360 * For NFSv4 without a pseudo root fs, unexported file handles
3361 * can be returned, so that Lookup works everywhere.
3362 */
3363 if (!nd->nd_repstat && exp->nes_exflag == 0 &&
3364 !(nd->nd_flag & ND_NFSV4)) {
3365 vput(*vpp);
3366 *vpp = NULL;
3367 nd->nd_repstat = EACCES;
3368 }
3369
3370 /*
3371 * Personally, I've never seen any point in requiring a
3372 * reserved port#, since only in the rare case where the
3373 * clients are all boxes with secure system privileges,
3374 * does it provide any enhanced security, but... some people
3375 * believe it to be useful and keep putting this code back in.
3376 * (There is also some "security checker" out there that
3377 * complains if the nfs server doesn't enforce this.)
3378 * However, note the following:
3379 * RFC3530 (NFSv4) specifies that a reserved port# not be
3380 * required.
3381 * RFC2623 recommends that, if a reserved port# is checked for,
3382 * that there be a way to turn that off--> ifdef'd.
3383 */
3384 #ifdef NFS_REQRSVPORT
3385 if (!nd->nd_repstat) {
3386 struct sockaddr_in *saddr;
3387 struct sockaddr_in6 *saddr6;
3388
3389 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3390 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
3391 if (!(nd->nd_flag & ND_NFSV4) &&
3392 ((saddr->sin_family == AF_INET &&
3393 ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
3394 (saddr6->sin6_family == AF_INET6 &&
3395 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
3396 vput(*vpp);
3397 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
3398 }
3399 }
3400 #endif /* NFS_REQRSVPORT */
3401
3402 /*
3403 * Check/setup credentials.
3404 */
3405 if (!nd->nd_repstat) {
3406 nd->nd_saveduid = nd->nd_cred->cr_uid;
3407 nd->nd_repstat = nfsd_excred(nd, exp, credanon,
3408 nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type));
3409 if (nd->nd_repstat)
3410 vput(*vpp);
3411 }
3412 if (credanon != NULL)
3413 crfree(credanon);
3414 if (nd->nd_repstat) {
3415 vn_finished_write(mpw);
3416 *vpp = NULL;
3417 } else if (mpp != NULL) {
3418 *mpp = mpw;
3419 }
3420
3421 out:
3422 NFSEXITCODE2(0, nd);
3423 }
3424
3425 /*
3426 * glue for fp.
3427 */
3428 static int
fp_getfvp(struct thread * p,int fd,struct file ** fpp,struct vnode ** vpp)3429 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
3430 {
3431 struct filedesc *fdp;
3432 struct file *fp;
3433 int error = 0;
3434
3435 fdp = p->td_proc->p_fd;
3436 if (fd < 0 || fd >= fdp->fd_nfiles ||
3437 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
3438 error = EBADF;
3439 goto out;
3440 }
3441 *fpp = fp;
3442
3443 out:
3444 NFSEXITCODE(error);
3445 return (error);
3446 }
3447
3448 /*
3449 * Called from nfssvc() to update the exports list. Just call
3450 * vfs_export(). This has to be done, since the v4 root fake fs isn't
3451 * in the mount list.
3452 */
3453 int
nfsrv_v4rootexport(void * argp,struct ucred * cred,struct thread * p)3454 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
3455 {
3456 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
3457 int error = 0;
3458 struct nameidata nd;
3459 fhandle_t fh;
3460
3461 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
3462 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
3463 nfs_rootfhset = 0;
3464 else if (error == 0) {
3465 if (nfsexargp->fspec == NULL) {
3466 error = EPERM;
3467 goto out;
3468 }
3469 /*
3470 * If fspec != NULL, this is the v4root path.
3471 */
3472 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
3473 nfsexargp->fspec, p);
3474 if ((error = namei(&nd)) != 0)
3475 goto out;
3476 error = nfsvno_getfh(nd.ni_vp, &fh, p);
3477 vrele(nd.ni_vp);
3478 if (!error) {
3479 nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
3480 NFSBCOPY((caddr_t)&fh,
3481 nfs_rootfh.nfsrvfh_data,
3482 sizeof (fhandle_t));
3483 nfs_rootfhset = 1;
3484 }
3485 }
3486
3487 out:
3488 NFSEXITCODE(error);
3489 return (error);
3490 }
3491
3492 /*
3493 * This function needs to test to see if the system is near its limit
3494 * for memory allocation via malloc() or mget() and return True iff
3495 * either of these resources are near their limit.
3496 * XXX (For now, this is just a stub.)
3497 */
3498 int nfsrv_testmalloclimit = 0;
3499 int
nfsrv_mallocmget_limit(void)3500 nfsrv_mallocmget_limit(void)
3501 {
3502 static int printmesg = 0;
3503 static int testval = 1;
3504
3505 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
3506 if ((printmesg++ % 100) == 0)
3507 printf("nfsd: malloc/mget near limit\n");
3508 return (1);
3509 }
3510 return (0);
3511 }
3512
3513 /*
3514 * BSD specific initialization of a mount point.
3515 */
3516 void
nfsd_mntinit(void)3517 nfsd_mntinit(void)
3518 {
3519 static int inited = 0;
3520
3521 if (inited)
3522 return;
3523 inited = 1;
3524 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
3525 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
3526 TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist);
3527 nfsv4root_mnt.mnt_export = NULL;
3528 TAILQ_INIT(&nfsv4root_opt);
3529 TAILQ_INIT(&nfsv4root_newopt);
3530 nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
3531 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
3532 nfsv4root_mnt.mnt_nvnodelistsize = 0;
3533 nfsv4root_mnt.mnt_lazyvnodelistsize = 0;
3534 }
3535
3536 static void
nfsd_timer(void * arg)3537 nfsd_timer(void *arg)
3538 {
3539
3540 nfsrv_servertimer();
3541 callout_reset_sbt(&nfsd_callout, SBT_1S, SBT_1S, nfsd_timer, NULL, 0);
3542 }
3543
3544 /*
3545 * Get a vnode for a file handle, without checking exports, etc.
3546 */
3547 struct vnode *
nfsvno_getvp(fhandle_t * fhp)3548 nfsvno_getvp(fhandle_t *fhp)
3549 {
3550 struct mount *mp;
3551 struct vnode *vp;
3552 int error;
3553
3554 mp = vfs_busyfs(&fhp->fh_fsid);
3555 if (mp == NULL)
3556 return (NULL);
3557 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
3558 vfs_unbusy(mp);
3559 if (error)
3560 return (NULL);
3561 return (vp);
3562 }
3563
3564 /*
3565 * Do a local VOP_ADVLOCK().
3566 */
3567 int
nfsvno_advlock(struct vnode * vp,int ftype,u_int64_t first,u_int64_t end,struct thread * td)3568 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
3569 u_int64_t end, struct thread *td)
3570 {
3571 int error = 0;
3572 struct flock fl;
3573 u_int64_t tlen;
3574
3575 if (nfsrv_dolocallocks == 0)
3576 goto out;
3577 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
3578
3579 fl.l_whence = SEEK_SET;
3580 fl.l_type = ftype;
3581 fl.l_start = (off_t)first;
3582 if (end == NFS64BITSSET) {
3583 fl.l_len = 0;
3584 } else {
3585 tlen = end - first;
3586 fl.l_len = (off_t)tlen;
3587 }
3588 /*
3589 * For FreeBSD8, the l_pid and l_sysid must be set to the same
3590 * values for all calls, so that all locks will be held by the
3591 * nfsd server. (The nfsd server handles conflicts between the
3592 * various clients.)
3593 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
3594 * bytes, so it can't be put in l_sysid.
3595 */
3596 if (nfsv4_sysid == 0)
3597 nfsv4_sysid = nlm_acquire_next_sysid();
3598 fl.l_pid = (pid_t)0;
3599 fl.l_sysid = (int)nfsv4_sysid;
3600
3601 if (ftype == F_UNLCK)
3602 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
3603 (F_POSIX | F_REMOTE));
3604 else
3605 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
3606 (F_POSIX | F_REMOTE));
3607
3608 out:
3609 NFSEXITCODE(error);
3610 return (error);
3611 }
3612
3613 /*
3614 * Check the nfsv4 root exports.
3615 */
3616 int
nfsvno_v4rootexport(struct nfsrv_descript * nd)3617 nfsvno_v4rootexport(struct nfsrv_descript *nd)
3618 {
3619 struct ucred *credanon;
3620 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i;
3621 uint64_t exflags;
3622
3623 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
3624 &credanon, &numsecflavor, secflavors);
3625 if (error) {
3626 error = NFSERR_PROGUNAVAIL;
3627 goto out;
3628 }
3629 if (credanon != NULL)
3630 crfree(credanon);
3631 for (i = 0; i < numsecflavor; i++) {
3632 if (secflavors[i] == AUTH_SYS)
3633 nd->nd_flag |= ND_EXAUTHSYS;
3634 else if (secflavors[i] == RPCSEC_GSS_KRB5)
3635 nd->nd_flag |= ND_EXGSS;
3636 else if (secflavors[i] == RPCSEC_GSS_KRB5I)
3637 nd->nd_flag |= ND_EXGSSINTEGRITY;
3638 else if (secflavors[i] == RPCSEC_GSS_KRB5P)
3639 nd->nd_flag |= ND_EXGSSPRIVACY;
3640 }
3641
3642 /* And set ND_EXxx flags for TLS. */
3643 if ((exflags & MNT_EXTLS) != 0) {
3644 nd->nd_flag |= ND_EXTLS;
3645 if ((exflags & MNT_EXTLSCERT) != 0)
3646 nd->nd_flag |= ND_EXTLSCERT;
3647 if ((exflags & MNT_EXTLSCERTUSER) != 0)
3648 nd->nd_flag |= ND_EXTLSCERTUSER;
3649 }
3650
3651 out:
3652 NFSEXITCODE(error);
3653 return (error);
3654 }
3655
3656 /*
3657 * Nfs server pseudo system call for the nfsd's
3658 */
3659 /*
3660 * MPSAFE
3661 */
3662 static int
nfssvc_nfsd(struct thread * td,struct nfssvc_args * uap)3663 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
3664 {
3665 struct file *fp;
3666 struct nfsd_addsock_args sockarg;
3667 struct nfsd_nfsd_args nfsdarg;
3668 struct nfsd_nfsd_oargs onfsdarg;
3669 struct nfsd_pnfsd_args pnfsdarg;
3670 struct vnode *vp, *nvp, *curdvp;
3671 struct pnfsdsfile *pf;
3672 struct nfsdevice *ds, *fds;
3673 cap_rights_t rights;
3674 int buflen, error, ret;
3675 char *buf, *cp, *cp2, *cp3;
3676 char fname[PNFS_FILENAME_LEN + 1];
3677
3678 if (uap->flag & NFSSVC_NFSDADDSOCK) {
3679 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
3680 if (error)
3681 goto out;
3682 /*
3683 * Since we don't know what rights might be required,
3684 * pretend that we need them all. It is better to be too
3685 * careful than too reckless.
3686 */
3687 error = fget(td, sockarg.sock,
3688 cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp);
3689 if (error != 0)
3690 goto out;
3691 if (fp->f_type != DTYPE_SOCKET) {
3692 fdrop(fp, td);
3693 error = EPERM;
3694 goto out;
3695 }
3696 error = nfsrvd_addsock(fp);
3697 fdrop(fp, td);
3698 } else if (uap->flag & NFSSVC_NFSDNFSD) {
3699 if (uap->argp == NULL) {
3700 error = EINVAL;
3701 goto out;
3702 }
3703 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) {
3704 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg));
3705 if (error == 0) {
3706 nfsdarg.principal = onfsdarg.principal;
3707 nfsdarg.minthreads = onfsdarg.minthreads;
3708 nfsdarg.maxthreads = onfsdarg.maxthreads;
3709 nfsdarg.version = 1;
3710 nfsdarg.addr = NULL;
3711 nfsdarg.addrlen = 0;
3712 nfsdarg.dnshost = NULL;
3713 nfsdarg.dnshostlen = 0;
3714 nfsdarg.dspath = NULL;
3715 nfsdarg.dspathlen = 0;
3716 nfsdarg.mdspath = NULL;
3717 nfsdarg.mdspathlen = 0;
3718 nfsdarg.mirrorcnt = 1;
3719 }
3720 } else
3721 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg));
3722 if (error)
3723 goto out;
3724 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
3725 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
3726 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
3727 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
3728 nfsdarg.mirrorcnt >= 1 &&
3729 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
3730 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
3731 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
3732 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
3733 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
3734 nfsdarg.dspathlen, nfsdarg.dnshostlen,
3735 nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
3736 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
3737 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
3738 if (error != 0) {
3739 free(cp, M_TEMP);
3740 goto out;
3741 }
3742 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */
3743 nfsdarg.addr = cp;
3744 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK);
3745 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen);
3746 if (error != 0) {
3747 free(nfsdarg.addr, M_TEMP);
3748 free(cp, M_TEMP);
3749 goto out;
3750 }
3751 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */
3752 nfsdarg.dnshost = cp;
3753 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK);
3754 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen);
3755 if (error != 0) {
3756 free(nfsdarg.addr, M_TEMP);
3757 free(nfsdarg.dnshost, M_TEMP);
3758 free(cp, M_TEMP);
3759 goto out;
3760 }
3761 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */
3762 nfsdarg.dspath = cp;
3763 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
3764 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
3765 if (error != 0) {
3766 free(nfsdarg.addr, M_TEMP);
3767 free(nfsdarg.dnshost, M_TEMP);
3768 free(nfsdarg.dspath, M_TEMP);
3769 free(cp, M_TEMP);
3770 goto out;
3771 }
3772 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */
3773 nfsdarg.mdspath = cp;
3774 } else {
3775 nfsdarg.addr = NULL;
3776 nfsdarg.addrlen = 0;
3777 nfsdarg.dnshost = NULL;
3778 nfsdarg.dnshostlen = 0;
3779 nfsdarg.dspath = NULL;
3780 nfsdarg.dspathlen = 0;
3781 nfsdarg.mdspath = NULL;
3782 nfsdarg.mdspathlen = 0;
3783 nfsdarg.mirrorcnt = 1;
3784 }
3785 nfsd_timer(NULL);
3786 error = nfsrvd_nfsd(td, &nfsdarg);
3787 free(nfsdarg.addr, M_TEMP);
3788 free(nfsdarg.dnshost, M_TEMP);
3789 free(nfsdarg.dspath, M_TEMP);
3790 free(nfsdarg.mdspath, M_TEMP);
3791 } else if (uap->flag & NFSSVC_PNFSDS) {
3792 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
3793 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER ||
3794 pnfsdarg.op == PNFSDOP_FORCEDELDS)) {
3795 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3796 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1,
3797 NULL);
3798 if (error == 0)
3799 error = nfsrv_deldsserver(pnfsdarg.op, cp, td);
3800 free(cp, M_TEMP);
3801 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) {
3802 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3803 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS;
3804 buf = malloc(buflen, M_TEMP, M_WAITOK);
3805 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1,
3806 NULL);
3807 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error);
3808 if (error == 0 && pnfsdarg.dspath != NULL) {
3809 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3810 error = copyinstr(pnfsdarg.dspath, cp2,
3811 PATH_MAX + 1, NULL);
3812 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n",
3813 error);
3814 } else
3815 cp2 = NULL;
3816 if (error == 0 && pnfsdarg.curdspath != NULL) {
3817 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3818 error = copyinstr(pnfsdarg.curdspath, cp3,
3819 PATH_MAX + 1, NULL);
3820 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n",
3821 error);
3822 } else
3823 cp3 = NULL;
3824 curdvp = NULL;
3825 fds = NULL;
3826 if (error == 0)
3827 error = nfsrv_mdscopymr(cp, cp2, cp3, buf,
3828 &buflen, fname, td, &vp, &nvp, &pf, &ds,
3829 &fds);
3830 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error);
3831 if (error == 0) {
3832 if (pf->dsf_dir >= nfsrv_dsdirsize) {
3833 printf("copymr: dsdir out of range\n");
3834 pf->dsf_dir = 0;
3835 }
3836 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen);
3837 error = nfsrv_copymr(vp, nvp,
3838 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf,
3839 (struct pnfsdsfile *)buf,
3840 buflen / sizeof(*pf), td->td_ucred, td);
3841 vput(vp);
3842 vput(nvp);
3843 if (fds != NULL && error == 0) {
3844 curdvp = fds->nfsdev_dsdir[pf->dsf_dir];
3845 ret = vn_lock(curdvp, LK_EXCLUSIVE);
3846 if (ret == 0) {
3847 nfsrv_dsremove(curdvp, fname,
3848 td->td_ucred, td);
3849 NFSVOPUNLOCK(curdvp);
3850 }
3851 }
3852 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error);
3853 }
3854 free(cp, M_TEMP);
3855 free(cp2, M_TEMP);
3856 free(cp3, M_TEMP);
3857 free(buf, M_TEMP);
3858 }
3859 } else {
3860 error = nfssvc_srvcall(td, uap, td->td_ucred);
3861 }
3862
3863 out:
3864 NFSEXITCODE(error);
3865 return (error);
3866 }
3867
3868 static int
nfssvc_srvcall(struct thread * p,struct nfssvc_args * uap,struct ucred * cred)3869 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
3870 {
3871 struct nfsex_args export;
3872 struct nfsex_oldargs oexp;
3873 struct file *fp = NULL;
3874 int stablefd, i, len;
3875 struct nfsd_clid adminrevoke;
3876 struct nfsd_dumplist dumplist;
3877 struct nfsd_dumpclients *dumpclients;
3878 struct nfsd_dumplocklist dumplocklist;
3879 struct nfsd_dumplocks *dumplocks;
3880 struct nameidata nd;
3881 vnode_t vp;
3882 int error = EINVAL, igotlock;
3883 struct proc *procp;
3884 gid_t *grps;
3885 static int suspend_nfsd = 0;
3886
3887 if (uap->flag & NFSSVC_PUBLICFH) {
3888 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
3889 sizeof (fhandle_t));
3890 error = copyin(uap->argp,
3891 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
3892 if (!error)
3893 nfs_pubfhset = 1;
3894 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
3895 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) {
3896 error = copyin(uap->argp,(caddr_t)&export,
3897 sizeof (struct nfsex_args));
3898 if (!error) {
3899 grps = NULL;
3900 if (export.export.ex_ngroups > NGROUPS_MAX ||
3901 export.export.ex_ngroups < 0)
3902 error = EINVAL;
3903 else if (export.export.ex_ngroups > 0) {
3904 grps = malloc(export.export.ex_ngroups *
3905 sizeof(gid_t), M_TEMP, M_WAITOK);
3906 error = copyin(export.export.ex_groups, grps,
3907 export.export.ex_ngroups * sizeof(gid_t));
3908 export.export.ex_groups = grps;
3909 } else
3910 export.export.ex_groups = NULL;
3911 if (!error)
3912 error = nfsrv_v4rootexport(&export, cred, p);
3913 free(grps, M_TEMP);
3914 }
3915 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
3916 NFSSVC_V4ROOTEXPORT) {
3917 error = copyin(uap->argp,(caddr_t)&oexp,
3918 sizeof (struct nfsex_oldargs));
3919 if (!error) {
3920 memset(&export.export, 0, sizeof(export.export));
3921 export.export.ex_flags = (uint64_t)oexp.export.ex_flags;
3922 export.export.ex_root = oexp.export.ex_root;
3923 export.export.ex_uid = oexp.export.ex_anon.cr_uid;
3924 export.export.ex_ngroups =
3925 oexp.export.ex_anon.cr_ngroups;
3926 export.export.ex_groups = NULL;
3927 if (export.export.ex_ngroups > XU_NGROUPS ||
3928 export.export.ex_ngroups < 0)
3929 error = EINVAL;
3930 else if (export.export.ex_ngroups > 0) {
3931 export.export.ex_groups = malloc(
3932 export.export.ex_ngroups * sizeof(gid_t),
3933 M_TEMP, M_WAITOK);
3934 for (i = 0; i < export.export.ex_ngroups; i++)
3935 export.export.ex_groups[i] =
3936 oexp.export.ex_anon.cr_groups[i];
3937 }
3938 export.export.ex_addr = oexp.export.ex_addr;
3939 export.export.ex_addrlen = oexp.export.ex_addrlen;
3940 export.export.ex_mask = oexp.export.ex_mask;
3941 export.export.ex_masklen = oexp.export.ex_masklen;
3942 export.export.ex_indexfile = oexp.export.ex_indexfile;
3943 export.export.ex_numsecflavors =
3944 oexp.export.ex_numsecflavors;
3945 if (export.export.ex_numsecflavors >= MAXSECFLAVORS ||
3946 export.export.ex_numsecflavors < 0)
3947 error = EINVAL;
3948 else {
3949 for (i = 0; i < export.export.ex_numsecflavors;
3950 i++)
3951 export.export.ex_secflavors[i] =
3952 oexp.export.ex_secflavors[i];
3953 }
3954 export.fspec = oexp.fspec;
3955 if (error == 0)
3956 error = nfsrv_v4rootexport(&export, cred, p);
3957 free(export.export.ex_groups, M_TEMP);
3958 }
3959 } else if (uap->flag & NFSSVC_NOPUBLICFH) {
3960 nfs_pubfhset = 0;
3961 error = 0;
3962 } else if (uap->flag & NFSSVC_STABLERESTART) {
3963 error = copyin(uap->argp, (caddr_t)&stablefd,
3964 sizeof (int));
3965 if (!error)
3966 error = fp_getfvp(p, stablefd, &fp, &vp);
3967 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
3968 error = EBADF;
3969 if (!error && newnfs_numnfsd != 0)
3970 error = EPERM;
3971 if (!error) {
3972 nfsrv_stablefirst.nsf_fp = fp;
3973 nfsrv_setupstable(p);
3974 }
3975 } else if (uap->flag & NFSSVC_ADMINREVOKE) {
3976 error = copyin(uap->argp, (caddr_t)&adminrevoke,
3977 sizeof (struct nfsd_clid));
3978 if (!error)
3979 error = nfsrv_adminrevoke(&adminrevoke, p);
3980 } else if (uap->flag & NFSSVC_DUMPCLIENTS) {
3981 error = copyin(uap->argp, (caddr_t)&dumplist,
3982 sizeof (struct nfsd_dumplist));
3983 if (!error && (dumplist.ndl_size < 1 ||
3984 dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3985 error = EPERM;
3986 if (!error) {
3987 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3988 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
3989 nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3990 error = copyout(dumpclients, dumplist.ndl_list, len);
3991 free(dumpclients, M_TEMP);
3992 }
3993 } else if (uap->flag & NFSSVC_DUMPLOCKS) {
3994 error = copyin(uap->argp, (caddr_t)&dumplocklist,
3995 sizeof (struct nfsd_dumplocklist));
3996 if (!error && (dumplocklist.ndllck_size < 1 ||
3997 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
3998 error = EPERM;
3999 if (!error)
4000 error = nfsrv_lookupfilename(&nd,
4001 dumplocklist.ndllck_fname, p);
4002 if (!error) {
4003 len = sizeof (struct nfsd_dumplocks) *
4004 dumplocklist.ndllck_size;
4005 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
4006 nfsrv_dumplocks(nd.ni_vp, dumplocks,
4007 dumplocklist.ndllck_size, p);
4008 vput(nd.ni_vp);
4009 error = copyout(dumplocks, dumplocklist.ndllck_list,
4010 len);
4011 free(dumplocks, M_TEMP);
4012 }
4013 } else if (uap->flag & NFSSVC_BACKUPSTABLE) {
4014 procp = p->td_proc;
4015 PROC_LOCK(procp);
4016 nfsd_master_pid = procp->p_pid;
4017 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
4018 nfsd_master_start = procp->p_stats->p_start;
4019 nfsd_master_proc = procp;
4020 PROC_UNLOCK(procp);
4021 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
4022 NFSLOCKV4ROOTMUTEX();
4023 if (suspend_nfsd == 0) {
4024 /* Lock out all nfsd threads */
4025 do {
4026 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
4027 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
4028 } while (igotlock == 0 && suspend_nfsd == 0);
4029 suspend_nfsd = 1;
4030 }
4031 NFSUNLOCKV4ROOTMUTEX();
4032 error = 0;
4033 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
4034 NFSLOCKV4ROOTMUTEX();
4035 if (suspend_nfsd != 0) {
4036 nfsv4_unlock(&nfsd_suspend_lock, 0);
4037 suspend_nfsd = 0;
4038 }
4039 NFSUNLOCKV4ROOTMUTEX();
4040 error = 0;
4041 }
4042
4043 NFSEXITCODE(error);
4044 return (error);
4045 }
4046
4047 /*
4048 * Check exports.
4049 * Returns 0 if ok, 1 otherwise.
4050 */
4051 int
nfsvno_testexp(struct nfsrv_descript * nd,struct nfsexstuff * exp)4052 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
4053 {
4054 int i;
4055
4056 /*
4057 * Allow NFSv3 Fsinfo per RFC2623.
4058 */
4059 if (((nd->nd_flag & ND_NFSV4) != 0 ||
4060 nd->nd_procnum != NFSPROC_FSINFO) &&
4061 ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) ||
4062 (NFSVNO_EXTLSCERT(exp) &&
4063 (nd->nd_flag & ND_TLSCERT) == 0) ||
4064 (NFSVNO_EXTLSCERTUSER(exp) &&
4065 (nd->nd_flag & ND_TLSCERTUSER) == 0))) {
4066 if ((nd->nd_flag & ND_NFSV4) != 0)
4067 return (NFSERR_WRONGSEC);
4068 #ifdef notnow
4069 /* There is currently no auth_stat for this. */
4070 else if ((nd->nd_flag & ND_TLS) == 0)
4071 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS);
4072 else
4073 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST);
4074 #endif
4075 else
4076 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
4077 }
4078
4079 /*
4080 * This seems odd, but allow the case where the security flavor
4081 * list is empty. This happens when NFSv4 is traversing non-exported
4082 * file systems. Exported file systems should always have a non-empty
4083 * security flavor list.
4084 */
4085 if (exp->nes_numsecflavor == 0)
4086 return (0);
4087
4088 for (i = 0; i < exp->nes_numsecflavor; i++) {
4089 /*
4090 * The tests for privacy and integrity must be first,
4091 * since ND_GSS is set for everything but AUTH_SYS.
4092 */
4093 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
4094 (nd->nd_flag & ND_GSSPRIVACY))
4095 return (0);
4096 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
4097 (nd->nd_flag & ND_GSSINTEGRITY))
4098 return (0);
4099 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
4100 (nd->nd_flag & ND_GSS))
4101 return (0);
4102 if (exp->nes_secflavors[i] == AUTH_SYS &&
4103 (nd->nd_flag & ND_GSS) == 0)
4104 return (0);
4105 }
4106 if ((nd->nd_flag & ND_NFSV4) != 0)
4107 return (NFSERR_WRONGSEC);
4108 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
4109 }
4110
4111 /*
4112 * Calculate a hash value for the fid in a file handle.
4113 */
4114 uint32_t
nfsrv_hashfh(fhandle_t * fhp)4115 nfsrv_hashfh(fhandle_t *fhp)
4116 {
4117 uint32_t hashval;
4118
4119 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
4120 return (hashval);
4121 }
4122
4123 /*
4124 * Calculate a hash value for the sessionid.
4125 */
4126 uint32_t
nfsrv_hashsessionid(uint8_t * sessionid)4127 nfsrv_hashsessionid(uint8_t *sessionid)
4128 {
4129 uint32_t hashval;
4130
4131 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
4132 return (hashval);
4133 }
4134
4135 /*
4136 * Signal the userland master nfsd to backup the stable restart file.
4137 */
4138 void
nfsrv_backupstable(void)4139 nfsrv_backupstable(void)
4140 {
4141 struct proc *procp;
4142
4143 if (nfsd_master_proc != NULL) {
4144 procp = pfind(nfsd_master_pid);
4145 /* Try to make sure it is the correct process. */
4146 if (procp == nfsd_master_proc &&
4147 procp->p_stats->p_start.tv_sec ==
4148 nfsd_master_start.tv_sec &&
4149 procp->p_stats->p_start.tv_usec ==
4150 nfsd_master_start.tv_usec &&
4151 strcmp(procp->p_comm, nfsd_master_comm) == 0)
4152 kern_psignal(procp, SIGUSR2);
4153 else
4154 nfsd_master_proc = NULL;
4155
4156 if (procp != NULL)
4157 PROC_UNLOCK(procp);
4158 }
4159 }
4160
4161 /*
4162 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror.
4163 * The arguments are in a structure, so that they can be passed through
4164 * taskqueue for a kernel process to execute this function.
4165 */
4166 struct nfsrvdscreate {
4167 int done;
4168 int inprog;
4169 struct task tsk;
4170 struct ucred *tcred;
4171 struct vnode *dvp;
4172 NFSPROC_T *p;
4173 struct pnfsdsfile *pf;
4174 int err;
4175 fhandle_t fh;
4176 struct vattr va;
4177 struct vattr createva;
4178 };
4179
4180 int
nfsrv_dscreate(struct vnode * dvp,struct vattr * vap,struct vattr * nvap,fhandle_t * fhp,struct pnfsdsfile * pf,struct pnfsdsattr * dsa,char * fnamep,struct ucred * tcred,NFSPROC_T * p,struct vnode ** nvpp)4181 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap,
4182 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa,
4183 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp)
4184 {
4185 struct vnode *nvp;
4186 struct nameidata named;
4187 struct vattr va;
4188 char *bufp;
4189 u_long *hashp;
4190 struct nfsnode *np;
4191 struct nfsmount *nmp;
4192 int error;
4193
4194 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE,
4195 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
4196 nfsvno_setpathbuf(&named, &bufp, &hashp);
4197 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
4198 named.ni_cnd.cn_thread = p;
4199 named.ni_cnd.cn_nameptr = bufp;
4200 if (fnamep != NULL) {
4201 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1);
4202 named.ni_cnd.cn_namelen = strlen(bufp);
4203 } else
4204 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp);
4205 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp);
4206
4207 /* Create the date file in the DS mount. */
4208 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
4209 if (error == 0) {
4210 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap);
4211 vref(dvp);
4212 VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false);
4213 if (error == 0) {
4214 /* Set the ownership of the file. */
4215 error = VOP_SETATTR(nvp, nvap, tcred);
4216 NFSD_DEBUG(4, "nfsrv_dscreate:"
4217 " setattr-uid=%d\n", error);
4218 if (error != 0)
4219 vput(nvp);
4220 }
4221 if (error != 0)
4222 printf("pNFS: pnfscreate failed=%d\n", error);
4223 } else
4224 printf("pNFS: pnfscreate vnlock=%d\n", error);
4225 if (error == 0) {
4226 np = VTONFS(nvp);
4227 nmp = VFSTONFS(nvp->v_mount);
4228 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs")
4229 != 0 || nmp->nm_nam->sa_len > sizeof(
4230 struct sockaddr_in6) ||
4231 np->n_fhp->nfh_len != NFSX_MYFH) {
4232 printf("Bad DS file: fstype=%s salen=%d"
4233 " fhlen=%d\n",
4234 nvp->v_mount->mnt_vfc->vfc_name,
4235 nmp->nm_nam->sa_len, np->n_fhp->nfh_len);
4236 error = ENOENT;
4237 }
4238
4239 /* Set extattrs for the DS on the MDS file. */
4240 if (error == 0) {
4241 if (dsa != NULL) {
4242 error = VOP_GETATTR(nvp, &va, tcred);
4243 if (error == 0) {
4244 dsa->dsa_filerev = va.va_filerev;
4245 dsa->dsa_size = va.va_size;
4246 dsa->dsa_atime = va.va_atime;
4247 dsa->dsa_mtime = va.va_mtime;
4248 dsa->dsa_bytes = va.va_bytes;
4249 }
4250 }
4251 if (error == 0) {
4252 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh,
4253 NFSX_MYFH);
4254 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin,
4255 nmp->nm_nam->sa_len);
4256 NFSBCOPY(named.ni_cnd.cn_nameptr,
4257 pf->dsf_filename,
4258 sizeof(pf->dsf_filename));
4259 }
4260 } else
4261 printf("pNFS: pnfscreate can't get DS"
4262 " attr=%d\n", error);
4263 if (nvpp != NULL && error == 0)
4264 *nvpp = nvp;
4265 else
4266 vput(nvp);
4267 }
4268 nfsvno_relpathbuf(&named);
4269 return (error);
4270 }
4271
4272 /*
4273 * Start up the thread that will execute nfsrv_dscreate().
4274 */
4275 static void
start_dscreate(void * arg,int pending)4276 start_dscreate(void *arg, int pending)
4277 {
4278 struct nfsrvdscreate *dsc;
4279
4280 dsc = (struct nfsrvdscreate *)arg;
4281 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh,
4282 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL);
4283 dsc->done = 1;
4284 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err);
4285 }
4286
4287 /*
4288 * Create a pNFS data file on the Data Server(s).
4289 */
4290 static void
nfsrv_pnfscreate(struct vnode * vp,struct vattr * vap,struct ucred * cred,NFSPROC_T * p)4291 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
4292 NFSPROC_T *p)
4293 {
4294 struct nfsrvdscreate *dsc, *tdsc = NULL;
4295 struct nfsdevice *ds, *tds, *fds;
4296 struct mount *mp;
4297 struct pnfsdsfile *pf, *tpf;
4298 struct pnfsdsattr dsattr;
4299 struct vattr va;
4300 struct vnode *dvp[NFSDEV_MAXMIRRORS];
4301 struct nfsmount *nmp;
4302 fhandle_t fh;
4303 uid_t vauid;
4304 gid_t vagid;
4305 u_short vamode;
4306 struct ucred *tcred;
4307 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret;
4308 int failpos, timo;
4309
4310 /* Get a DS server directory in a round-robin order. */
4311 mirrorcnt = 1;
4312 mp = vp->v_mount;
4313 ds = fds = NULL;
4314 NFSDDSLOCK();
4315 /*
4316 * Search for the first entry that handles this MDS fs, but use the
4317 * first entry for all MDS fs's otherwise.
4318 */
4319 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) {
4320 if (tds->nfsdev_nmp != NULL) {
4321 if (tds->nfsdev_mdsisset == 0 && ds == NULL)
4322 ds = tds;
4323 else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
4324 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
4325 ds = fds = tds;
4326 break;
4327 }
4328 }
4329 }
4330 if (ds == NULL) {
4331 NFSDDSUNLOCK();
4332 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n");
4333 return;
4334 }
4335 i = dsdir[0] = ds->nfsdev_nextdir;
4336 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize;
4337 dvp[0] = ds->nfsdev_dsdir[i];
4338 tds = TAILQ_NEXT(ds, nfsdev_list);
4339 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) {
4340 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) {
4341 if (tds->nfsdev_nmp != NULL &&
4342 ((tds->nfsdev_mdsisset == 0 && fds == NULL) ||
4343 (tds->nfsdev_mdsisset != 0 && fds != NULL &&
4344 fsidcmp(&mp->mnt_stat.f_fsid,
4345 &tds->nfsdev_mdsfsid) == 0))) {
4346 dsdir[mirrorcnt] = i;
4347 dvp[mirrorcnt] = tds->nfsdev_dsdir[i];
4348 mirrorcnt++;
4349 if (mirrorcnt >= nfsrv_maxpnfsmirror)
4350 break;
4351 }
4352 }
4353 }
4354 /* Put at end of list to implement round-robin usage. */
4355 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
4356 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
4357 NFSDDSUNLOCK();
4358 dsc = NULL;
4359 if (mirrorcnt > 1)
4360 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP,
4361 M_WAITOK | M_ZERO);
4362 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK |
4363 M_ZERO);
4364
4365 error = nfsvno_getfh(vp, &fh, p);
4366 if (error == 0)
4367 error = VOP_GETATTR(vp, &va, cred);
4368 if (error == 0) {
4369 /* Set the attributes for "vp" to Setattr the DS vp. */
4370 vauid = va.va_uid;
4371 vagid = va.va_gid;
4372 vamode = va.va_mode;
4373 VATTR_NULL(&va);
4374 va.va_uid = vauid;
4375 va.va_gid = vagid;
4376 va.va_mode = vamode;
4377 va.va_size = 0;
4378 } else
4379 printf("pNFS: pnfscreate getfh+attr=%d\n", error);
4380
4381 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid,
4382 cred->cr_gid);
4383 /* Make data file name based on FH. */
4384 tcred = newnfs_getcred();
4385
4386 /*
4387 * Create the file on each DS mirror, using kernel process(es) for the
4388 * additional mirrors.
4389 */
4390 failpos = -1;
4391 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) {
4392 tpf->dsf_dir = dsdir[i];
4393 tdsc->tcred = tcred;
4394 tdsc->p = p;
4395 tdsc->pf = tpf;
4396 tdsc->createva = *vap;
4397 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh));
4398 tdsc->va = va;
4399 tdsc->dvp = dvp[i];
4400 tdsc->done = 0;
4401 tdsc->inprog = 0;
4402 tdsc->err = 0;
4403 ret = EIO;
4404 if (nfs_pnfsiothreads != 0) {
4405 ret = nfs_pnfsio(start_dscreate, tdsc);
4406 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret);
4407 }
4408 if (ret != 0) {
4409 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL,
4410 NULL, tcred, p, NULL);
4411 if (ret != 0) {
4412 KASSERT(error == 0, ("nfsrv_dscreate err=%d",
4413 error));
4414 if (failpos == -1 && nfsds_failerr(ret))
4415 failpos = i;
4416 else
4417 error = ret;
4418 }
4419 }
4420 }
4421 if (error == 0) {
4422 tpf->dsf_dir = dsdir[mirrorcnt - 1];
4423 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf,
4424 &dsattr, NULL, tcred, p, NULL);
4425 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) {
4426 failpos = mirrorcnt - 1;
4427 error = 0;
4428 }
4429 }
4430 timo = hz / 50; /* Wait for 20msec. */
4431 if (timo < 1)
4432 timo = 1;
4433 /* Wait for kernel task(s) to complete. */
4434 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) {
4435 while (tdsc->inprog != 0 && tdsc->done == 0)
4436 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo);
4437 if (tdsc->err != 0) {
4438 if (failpos == -1 && nfsds_failerr(tdsc->err))
4439 failpos = i;
4440 else if (error == 0)
4441 error = tdsc->err;
4442 }
4443 }
4444
4445 /*
4446 * If failpos has been set, that mirror has failed, so it needs
4447 * to be disabled.
4448 */
4449 if (failpos >= 0) {
4450 nmp = VFSTONFS(dvp[failpos]->v_mount);
4451 NFSLOCKMNT(nmp);
4452 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
4453 NFSMNTP_CANCELRPCS)) == 0) {
4454 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
4455 NFSUNLOCKMNT(nmp);
4456 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
4457 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos,
4458 ds);
4459 if (ds != NULL)
4460 nfsrv_killrpcs(nmp);
4461 NFSLOCKMNT(nmp);
4462 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
4463 wakeup(nmp);
4464 }
4465 NFSUNLOCKMNT(nmp);
4466 }
4467
4468 NFSFREECRED(tcred);
4469 if (error == 0) {
4470 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp");
4471
4472 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n",
4473 mirrorcnt, nfsrv_maxpnfsmirror);
4474 /*
4475 * For all mirrors that couldn't be created, fill in the
4476 * *pf structure, but with an IP address == 0.0.0.0.
4477 */
4478 tpf = pf + mirrorcnt;
4479 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) {
4480 *tpf = *pf;
4481 tpf->dsf_sin.sin_family = AF_INET;
4482 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in);
4483 tpf->dsf_sin.sin_addr.s_addr = 0;
4484 tpf->dsf_sin.sin_port = 0;
4485 }
4486
4487 error = vn_extattr_set(vp, IO_NODELOCKED,
4488 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
4489 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
4490 if (error == 0)
4491 error = vn_extattr_set(vp, IO_NODELOCKED,
4492 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
4493 sizeof(dsattr), (char *)&dsattr, p);
4494 if (error != 0)
4495 printf("pNFS: pnfscreate setextattr=%d\n",
4496 error);
4497 } else
4498 printf("pNFS: pnfscreate=%d\n", error);
4499 free(pf, M_TEMP);
4500 free(dsc, M_TEMP);
4501 }
4502
4503 /*
4504 * Get the information needed to remove the pNFS Data Server file from the
4505 * Metadata file. Upon success, ddvp is set non-NULL to the locked
4506 * DS directory vnode. The caller must unlock *ddvp when done with it.
4507 */
4508 static void
nfsrv_pnfsremovesetup(struct vnode * vp,NFSPROC_T * p,struct vnode ** dvpp,int * mirrorcntp,char * fname,fhandle_t * fhp)4509 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp,
4510 int *mirrorcntp, char *fname, fhandle_t *fhp)
4511 {
4512 struct vattr va;
4513 struct ucred *tcred;
4514 char *buf;
4515 int buflen, error;
4516
4517 dvpp[0] = NULL;
4518 /* If not an exported regular file or not a pNFS server, just return. */
4519 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
4520 nfsrv_devidcnt == 0)
4521 return;
4522
4523 /* Check to see if this is the last hard link. */
4524 tcred = newnfs_getcred();
4525 error = VOP_GETATTR(vp, &va, tcred);
4526 NFSFREECRED(tcred);
4527 if (error != 0) {
4528 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error);
4529 return;
4530 }
4531 if (va.va_nlink > 1)
4532 return;
4533
4534 error = nfsvno_getfh(vp, fhp, p);
4535 if (error != 0) {
4536 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error);
4537 return;
4538 }
4539
4540 buflen = 1024;
4541 buf = malloc(buflen, M_TEMP, M_WAITOK);
4542 /* Get the directory vnode for the DS mount and the file handle. */
4543 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp,
4544 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL);
4545 free(buf, M_TEMP);
4546 if (error != 0)
4547 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error);
4548 }
4549
4550 /*
4551 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror.
4552 * The arguments are in a structure, so that they can be passed through
4553 * taskqueue for a kernel process to execute this function.
4554 */
4555 struct nfsrvdsremove {
4556 int done;
4557 int inprog;
4558 struct task tsk;
4559 struct ucred *tcred;
4560 struct vnode *dvp;
4561 NFSPROC_T *p;
4562 int err;
4563 char fname[PNFS_FILENAME_LEN + 1];
4564 };
4565
4566 static int
nfsrv_dsremove(struct vnode * dvp,char * fname,struct ucred * tcred,NFSPROC_T * p)4567 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred,
4568 NFSPROC_T *p)
4569 {
4570 struct nameidata named;
4571 struct vnode *nvp;
4572 char *bufp;
4573 u_long *hashp;
4574 int error;
4575
4576 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
4577 if (error != 0)
4578 return (error);
4579 named.ni_cnd.cn_nameiop = DELETE;
4580 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
4581 named.ni_cnd.cn_cred = tcred;
4582 named.ni_cnd.cn_thread = p;
4583 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
4584 nfsvno_setpathbuf(&named, &bufp, &hashp);
4585 named.ni_cnd.cn_nameptr = bufp;
4586 named.ni_cnd.cn_namelen = strlen(fname);
4587 strlcpy(bufp, fname, NAME_MAX);
4588 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp);
4589 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
4590 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error);
4591 if (error == 0) {
4592 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd);
4593 vput(nvp);
4594 }
4595 NFSVOPUNLOCK(dvp);
4596 nfsvno_relpathbuf(&named);
4597 if (error != 0)
4598 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error);
4599 return (error);
4600 }
4601
4602 /*
4603 * Start up the thread that will execute nfsrv_dsremove().
4604 */
4605 static void
start_dsremove(void * arg,int pending)4606 start_dsremove(void *arg, int pending)
4607 {
4608 struct nfsrvdsremove *dsrm;
4609
4610 dsrm = (struct nfsrvdsremove *)arg;
4611 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred,
4612 dsrm->p);
4613 dsrm->done = 1;
4614 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err);
4615 }
4616
4617 /*
4618 * Remove a pNFS data file from a Data Server.
4619 * nfsrv_pnfsremovesetup() must have been called before the MDS file was
4620 * removed to set up the dvp and fill in the FH.
4621 */
4622 static void
nfsrv_pnfsremove(struct vnode ** dvp,int mirrorcnt,char * fname,fhandle_t * fhp,NFSPROC_T * p)4623 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp,
4624 NFSPROC_T *p)
4625 {
4626 struct ucred *tcred;
4627 struct nfsrvdsremove *dsrm, *tdsrm;
4628 struct nfsdevice *ds;
4629 struct nfsmount *nmp;
4630 int failpos, i, ret, timo;
4631
4632 tcred = newnfs_getcred();
4633 dsrm = NULL;
4634 if (mirrorcnt > 1)
4635 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK);
4636 /*
4637 * Remove the file on each DS mirror, using kernel process(es) for the
4638 * additional mirrors.
4639 */
4640 failpos = -1;
4641 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
4642 tdsrm->tcred = tcred;
4643 tdsrm->p = p;
4644 tdsrm->dvp = dvp[i];
4645 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1);
4646 tdsrm->inprog = 0;
4647 tdsrm->done = 0;
4648 tdsrm->err = 0;
4649 ret = EIO;
4650 if (nfs_pnfsiothreads != 0) {
4651 ret = nfs_pnfsio(start_dsremove, tdsrm);
4652 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret);
4653 }
4654 if (ret != 0) {
4655 ret = nfsrv_dsremove(dvp[i], fname, tcred, p);
4656 if (failpos == -1 && nfsds_failerr(ret))
4657 failpos = i;
4658 }
4659 }
4660 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p);
4661 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret))
4662 failpos = mirrorcnt - 1;
4663 timo = hz / 50; /* Wait for 20msec. */
4664 if (timo < 1)
4665 timo = 1;
4666 /* Wait for kernel task(s) to complete. */
4667 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
4668 while (tdsrm->inprog != 0 && tdsrm->done == 0)
4669 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo);
4670 if (failpos == -1 && nfsds_failerr(tdsrm->err))
4671 failpos = i;
4672 }
4673
4674 /*
4675 * If failpos has been set, that mirror has failed, so it needs
4676 * to be disabled.
4677 */
4678 if (failpos >= 0) {
4679 nmp = VFSTONFS(dvp[failpos]->v_mount);
4680 NFSLOCKMNT(nmp);
4681 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
4682 NFSMNTP_CANCELRPCS)) == 0) {
4683 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
4684 NFSUNLOCKMNT(nmp);
4685 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
4686 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos,
4687 ds);
4688 if (ds != NULL)
4689 nfsrv_killrpcs(nmp);
4690 NFSLOCKMNT(nmp);
4691 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
4692 wakeup(nmp);
4693 }
4694 NFSUNLOCKMNT(nmp);
4695 }
4696
4697 /* Get rid all layouts for the file. */
4698 nfsrv_freefilelayouts(fhp);
4699
4700 NFSFREECRED(tcred);
4701 free(dsrm, M_TEMP);
4702 }
4703
4704 /*
4705 * Generate a file name based on the file handle and put it in *bufp.
4706 * Return the number of bytes generated.
4707 */
4708 static int
nfsrv_putfhname(fhandle_t * fhp,char * bufp)4709 nfsrv_putfhname(fhandle_t *fhp, char *bufp)
4710 {
4711 int i;
4712 uint8_t *cp;
4713 const uint8_t *hexdigits = "0123456789abcdef";
4714
4715 cp = (uint8_t *)fhp;
4716 for (i = 0; i < sizeof(*fhp); i++) {
4717 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf];
4718 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf];
4719 }
4720 bufp[2 * i] = '\0';
4721 return (2 * i);
4722 }
4723
4724 /*
4725 * Update the Metadata file's attributes from the DS file when a Read/Write
4726 * layout is returned.
4727 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN
4728 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file.
4729 */
4730 int
nfsrv_updatemdsattr(struct vnode * vp,struct nfsvattr * nap,NFSPROC_T * p)4731 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
4732 {
4733 struct ucred *tcred;
4734 int error;
4735
4736 /* Do this as root so that it won't fail with EACCES. */
4737 tcred = newnfs_getcred();
4738 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN,
4739 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL);
4740 NFSFREECRED(tcred);
4741 return (error);
4742 }
4743
4744 /*
4745 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file.
4746 */
4747 static int
nfsrv_dssetacl(struct vnode * vp,struct acl * aclp,struct ucred * cred,NFSPROC_T * p)4748 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred,
4749 NFSPROC_T *p)
4750 {
4751 int error;
4752
4753 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL,
4754 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL);
4755 return (error);
4756 }
4757
4758 static int
nfsrv_proxyds(struct vnode * vp,off_t off,int cnt,struct ucred * cred,struct thread * p,int ioproc,struct mbuf ** mpp,char * cp,struct mbuf ** mpp2,struct nfsvattr * nap,struct acl * aclp,off_t * offp,int content,bool * eofp)4759 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
4760 struct thread *p, int ioproc, struct mbuf **mpp, char *cp,
4761 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp,
4762 off_t *offp, int content, bool *eofp)
4763 {
4764 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp;
4765 fhandle_t fh[NFSDEV_MAXMIRRORS];
4766 struct vnode *dvp[NFSDEV_MAXMIRRORS];
4767 struct nfsdevice *ds;
4768 struct pnfsdsattr dsattr;
4769 struct opnfsdsattr odsattr;
4770 char *buf;
4771 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt;
4772
4773 NFSD_DEBUG(4, "in nfsrv_proxyds\n");
4774 /*
4775 * If not a regular file, not exported or not a pNFS server,
4776 * just return ENOENT.
4777 */
4778 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
4779 nfsrv_devidcnt == 0)
4780 return (ENOENT);
4781
4782 buflen = 1024;
4783 buf = malloc(buflen, M_TEMP, M_WAITOK);
4784 error = 0;
4785
4786 /*
4787 * For Getattr, get the Change attribute (va_filerev) and size (va_size)
4788 * from the MetaData file's extended attribute.
4789 */
4790 if (ioproc == NFSPROC_GETATTR) {
4791 error = vn_extattr_get(vp, IO_NODELOCKED,
4792 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf,
4793 p);
4794 if (error == 0) {
4795 if (buflen == sizeof(odsattr)) {
4796 NFSBCOPY(buf, &odsattr, buflen);
4797 nap->na_filerev = odsattr.dsa_filerev;
4798 nap->na_size = odsattr.dsa_size;
4799 nap->na_atime = odsattr.dsa_atime;
4800 nap->na_mtime = odsattr.dsa_mtime;
4801 /*
4802 * Fake na_bytes by rounding up na_size.
4803 * Since we don't know the block size, just
4804 * use BLKDEV_IOSIZE.
4805 */
4806 nap->na_bytes = (odsattr.dsa_size +
4807 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1);
4808 } else if (buflen == sizeof(dsattr)) {
4809 NFSBCOPY(buf, &dsattr, buflen);
4810 nap->na_filerev = dsattr.dsa_filerev;
4811 nap->na_size = dsattr.dsa_size;
4812 nap->na_atime = dsattr.dsa_atime;
4813 nap->na_mtime = dsattr.dsa_mtime;
4814 nap->na_bytes = dsattr.dsa_bytes;
4815 } else
4816 error = ENXIO;
4817 }
4818 if (error == 0) {
4819 /*
4820 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr()
4821 * returns 0, just return now. nfsrv_checkdsattr()
4822 * returns 0 if there is no Read/Write layout
4823 * plus either an Open/Write_access or Write
4824 * delegation issued to a client for the file.
4825 */
4826 if (nfsrv_pnfsgetdsattr == 0 ||
4827 nfsrv_checkdsattr(vp, p) == 0) {
4828 free(buf, M_TEMP);
4829 return (error);
4830 }
4831 }
4832
4833 /*
4834 * Clear ENOATTR so the code below will attempt to do a
4835 * nfsrv_getattrdsrpc() to get the attributes and (re)create
4836 * the extended attribute.
4837 */
4838 if (error == ENOATTR)
4839 error = 0;
4840 }
4841
4842 origmircnt = -1;
4843 trycnt = 0;
4844 tryagain:
4845 if (error == 0) {
4846 buflen = 1024;
4847 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
4848 LK_EXCLUSIVE)
4849 printf("nfsrv_proxyds: Readds vp exclusively locked\n");
4850 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
4851 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
4852 NULL, NULL);
4853 if (error == 0) {
4854 for (i = 0; i < mirrorcnt; i++)
4855 nmp[i] = VFSTONFS(dvp[i]->v_mount);
4856 } else
4857 printf("pNFS: proxy getextattr sockaddr=%d\n", error);
4858 } else
4859 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error);
4860 if (error == 0) {
4861 failpos = -1;
4862 if (origmircnt == -1)
4863 origmircnt = mirrorcnt;
4864 /*
4865 * If failpos is set to a mirror#, then that mirror has
4866 * failed and will be disabled. For Read, Getattr and Seek, the
4867 * function only tries one mirror, so if that mirror has
4868 * failed, it will need to be retried. As such, increment
4869 * tryitagain for these cases.
4870 * For Write, Setattr and Setacl, the function tries all
4871 * mirrors and will not return an error for the case where
4872 * one mirror has failed. For these cases, the functioning
4873 * mirror(s) will have been modified, so a retry isn't
4874 * necessary. These functions will set failpos for the
4875 * failed mirror#.
4876 */
4877 if (ioproc == NFSPROC_READDS) {
4878 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0],
4879 mpp, mpp2);
4880 if (nfsds_failerr(error) && mirrorcnt > 1) {
4881 /*
4882 * Setting failpos will cause the mirror
4883 * to be disabled and then a retry of this
4884 * read is required.
4885 */
4886 failpos = 0;
4887 error = 0;
4888 trycnt++;
4889 }
4890 } else if (ioproc == NFSPROC_WRITEDS)
4891 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp,
4892 &nmp[0], mirrorcnt, mpp, cp, &failpos);
4893 else if (ioproc == NFSPROC_SETATTR)
4894 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0],
4895 mirrorcnt, nap, &failpos);
4896 else if (ioproc == NFSPROC_SETACL)
4897 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0],
4898 mirrorcnt, aclp, &failpos);
4899 else if (ioproc == NFSPROC_SEEKDS) {
4900 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred,
4901 p, nmp[0]);
4902 if (nfsds_failerr(error) && mirrorcnt > 1) {
4903 /*
4904 * Setting failpos will cause the mirror
4905 * to be disabled and then a retry of this
4906 * read is required.
4907 */
4908 failpos = 0;
4909 error = 0;
4910 trycnt++;
4911 }
4912 } else if (ioproc == NFSPROC_ALLOCATE)
4913 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
4914 &nmp[0], mirrorcnt, &failpos);
4915 else {
4916 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
4917 vp, nmp[mirrorcnt - 1], nap);
4918 if (nfsds_failerr(error) && mirrorcnt > 1) {
4919 /*
4920 * Setting failpos will cause the mirror
4921 * to be disabled and then a retry of this
4922 * getattr is required.
4923 */
4924 failpos = mirrorcnt - 1;
4925 error = 0;
4926 trycnt++;
4927 }
4928 }
4929 ds = NULL;
4930 if (failpos >= 0) {
4931 failnmp = nmp[failpos];
4932 NFSLOCKMNT(failnmp);
4933 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM |
4934 NFSMNTP_CANCELRPCS)) == 0) {
4935 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS;
4936 NFSUNLOCKMNT(failnmp);
4937 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER,
4938 failnmp, p);
4939 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n",
4940 failpos, ds);
4941 if (ds != NULL)
4942 nfsrv_killrpcs(failnmp);
4943 NFSLOCKMNT(failnmp);
4944 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
4945 wakeup(failnmp);
4946 }
4947 NFSUNLOCKMNT(failnmp);
4948 }
4949 for (i = 0; i < mirrorcnt; i++)
4950 NFSVOPUNLOCK(dvp[i]);
4951 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error,
4952 trycnt);
4953 /* Try the Read/Getattr again if a mirror was deleted. */
4954 if (ds != NULL && trycnt > 0 && trycnt < origmircnt)
4955 goto tryagain;
4956 } else {
4957 /* Return ENOENT for any Extended Attribute error. */
4958 error = ENOENT;
4959 }
4960 free(buf, M_TEMP);
4961 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error);
4962 return (error);
4963 }
4964
4965 /*
4966 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended
4967 * attribute.
4968 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs
4969 * to be checked. If it points to a NULL nmp, then it returns
4970 * a suitable destination.
4971 * curnmp - If non-NULL, it is the source mount for the copy.
4972 */
4973 int
nfsrv_dsgetsockmnt(struct vnode * vp,int lktype,char * buf,int * buflenp,int * mirrorcntp,NFSPROC_T * p,struct vnode ** dvpp,fhandle_t * fhp,char * devid,char * fnamep,struct vnode ** nvpp,struct nfsmount ** newnmpp,struct nfsmount * curnmp,int * ippos,int * dsdirp)4974 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
4975 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp,
4976 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp,
4977 struct nfsmount *curnmp, int *ippos, int *dsdirp)
4978 {
4979 struct vnode *dvp, *nvp = NULL, **tdvpp;
4980 struct mount *mp;
4981 struct nfsmount *nmp, *newnmp;
4982 struct sockaddr *sad;
4983 struct sockaddr_in *sin;
4984 struct nfsdevice *ds, *tds, *fndds;
4985 struct pnfsdsfile *pf;
4986 uint32_t dsdir;
4987 int error, fhiszero, fnd, gotone, i, mirrorcnt;
4988
4989 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp");
4990 *mirrorcntp = 1;
4991 tdvpp = dvpp;
4992 if (nvpp != NULL)
4993 *nvpp = NULL;
4994 if (dvpp != NULL)
4995 *dvpp = NULL;
4996 if (ippos != NULL)
4997 *ippos = -1;
4998 if (newnmpp != NULL)
4999 newnmp = *newnmpp;
5000 else
5001 newnmp = NULL;
5002 mp = vp->v_mount;
5003 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
5004 "pnfsd.dsfile", buflenp, buf, p);
5005 mirrorcnt = *buflenp / sizeof(*pf);
5006 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS ||
5007 *buflenp != sizeof(*pf) * mirrorcnt))
5008 error = ENOATTR;
5009
5010 pf = (struct pnfsdsfile *)buf;
5011 /* If curnmp != NULL, check for a match in the mirror list. */
5012 if (curnmp != NULL && error == 0) {
5013 fnd = 0;
5014 for (i = 0; i < mirrorcnt; i++, pf++) {
5015 sad = (struct sockaddr *)&pf->dsf_sin;
5016 if (nfsaddr2_match(sad, curnmp->nm_nam)) {
5017 if (ippos != NULL)
5018 *ippos = i;
5019 fnd = 1;
5020 break;
5021 }
5022 }
5023 if (fnd == 0)
5024 error = ENXIO;
5025 }
5026
5027 gotone = 0;
5028 pf = (struct pnfsdsfile *)buf;
5029 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt,
5030 error);
5031 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) {
5032 fhiszero = 0;
5033 sad = (struct sockaddr *)&pf->dsf_sin;
5034 sin = &pf->dsf_sin;
5035 dsdir = pf->dsf_dir;
5036 if (dsdir >= nfsrv_dsdirsize) {
5037 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir);
5038 error = ENOATTR;
5039 } else if (nvpp != NULL && newnmp != NULL &&
5040 nfsaddr2_match(sad, newnmp->nm_nam))
5041 error = EEXIST;
5042 if (error == 0) {
5043 if (ippos != NULL && curnmp == NULL &&
5044 sad->sa_family == AF_INET &&
5045 sin->sin_addr.s_addr == 0)
5046 *ippos = i;
5047 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0)
5048 fhiszero = 1;
5049 /* Use the socket address to find the mount point. */
5050 fndds = NULL;
5051 NFSDDSLOCK();
5052 /* Find a match for the IP address. */
5053 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
5054 if (ds->nfsdev_nmp != NULL) {
5055 dvp = ds->nfsdev_dvp;
5056 nmp = VFSTONFS(dvp->v_mount);
5057 if (nmp != ds->nfsdev_nmp)
5058 printf("different2 nmp %p %p\n",
5059 nmp, ds->nfsdev_nmp);
5060 if (nfsaddr2_match(sad, nmp->nm_nam)) {
5061 fndds = ds;
5062 break;
5063 }
5064 }
5065 }
5066 if (fndds != NULL && newnmpp != NULL &&
5067 newnmp == NULL) {
5068 /* Search for a place to make a mirror copy. */
5069 TAILQ_FOREACH(tds, &nfsrv_devidhead,
5070 nfsdev_list) {
5071 if (tds->nfsdev_nmp != NULL &&
5072 fndds != tds &&
5073 ((tds->nfsdev_mdsisset == 0 &&
5074 fndds->nfsdev_mdsisset == 0) ||
5075 (tds->nfsdev_mdsisset != 0 &&
5076 fndds->nfsdev_mdsisset != 0 &&
5077 fsidcmp(&tds->nfsdev_mdsfsid,
5078 &mp->mnt_stat.f_fsid) == 0))) {
5079 *newnmpp = tds->nfsdev_nmp;
5080 break;
5081 }
5082 }
5083 if (tds != NULL) {
5084 /*
5085 * Move this entry to the end of the
5086 * list, so it won't be selected as
5087 * easily the next time.
5088 */
5089 TAILQ_REMOVE(&nfsrv_devidhead, tds,
5090 nfsdev_list);
5091 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds,
5092 nfsdev_list);
5093 }
5094 }
5095 NFSDDSUNLOCK();
5096 if (fndds != NULL) {
5097 dvp = fndds->nfsdev_dsdir[dsdir];
5098 if (lktype != 0 || fhiszero != 0 ||
5099 (nvpp != NULL && *nvpp == NULL)) {
5100 if (fhiszero != 0)
5101 error = vn_lock(dvp,
5102 LK_EXCLUSIVE);
5103 else if (lktype != 0)
5104 error = vn_lock(dvp, lktype);
5105 else
5106 error = vn_lock(dvp, LK_SHARED);
5107 /*
5108 * If the file handle is all 0's, try to
5109 * do a Lookup against the DS to acquire
5110 * it.
5111 * If dvpp == NULL or the Lookup fails,
5112 * unlock dvp after the call.
5113 */
5114 if (error == 0 && (fhiszero != 0 ||
5115 (nvpp != NULL && *nvpp == NULL))) {
5116 error = nfsrv_pnfslookupds(vp,
5117 dvp, pf, &nvp, p);
5118 if (error == 0) {
5119 if (fhiszero != 0)
5120 nfsrv_pnfssetfh(
5121 vp, pf,
5122 devid,
5123 fnamep,
5124 nvp, p);
5125 if (nvpp != NULL &&
5126 *nvpp == NULL) {
5127 *nvpp = nvp;
5128 *dsdirp = dsdir;
5129 } else
5130 vput(nvp);
5131 }
5132 if (error != 0 || lktype == 0)
5133 NFSVOPUNLOCK(dvp);
5134 }
5135 }
5136 if (error == 0) {
5137 gotone++;
5138 NFSD_DEBUG(4, "gotone=%d\n", gotone);
5139 if (devid != NULL) {
5140 NFSBCOPY(fndds->nfsdev_deviceid,
5141 devid, NFSX_V4DEVICEID);
5142 devid += NFSX_V4DEVICEID;
5143 }
5144 if (dvpp != NULL)
5145 *tdvpp++ = dvp;
5146 if (fhp != NULL)
5147 NFSBCOPY(&pf->dsf_fh, fhp++,
5148 NFSX_MYFH);
5149 if (fnamep != NULL && gotone == 1)
5150 strlcpy(fnamep,
5151 pf->dsf_filename,
5152 sizeof(pf->dsf_filename));
5153 } else
5154 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt "
5155 "err=%d\n", error);
5156 }
5157 }
5158 }
5159 if (error == 0 && gotone == 0)
5160 error = ENOENT;
5161
5162 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone,
5163 error);
5164 if (error == 0)
5165 *mirrorcntp = gotone;
5166 else {
5167 if (gotone > 0 && dvpp != NULL) {
5168 /*
5169 * If the error didn't occur on the first one and
5170 * dvpp != NULL, the one(s) prior to the failure will
5171 * have locked dvp's that need to be unlocked.
5172 */
5173 for (i = 0; i < gotone; i++) {
5174 NFSVOPUNLOCK(*dvpp);
5175 *dvpp++ = NULL;
5176 }
5177 }
5178 /*
5179 * If it found the vnode to be copied from before a failure,
5180 * it needs to be vput()'d.
5181 */
5182 if (nvpp != NULL && *nvpp != NULL) {
5183 vput(*nvpp);
5184 *nvpp = NULL;
5185 }
5186 }
5187 return (error);
5188 }
5189
5190 /*
5191 * Set the extended attribute for the Change attribute.
5192 */
5193 static int
nfsrv_setextattr(struct vnode * vp,struct nfsvattr * nap,NFSPROC_T * p)5194 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
5195 {
5196 struct pnfsdsattr dsattr;
5197 int error;
5198
5199 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
5200 dsattr.dsa_filerev = nap->na_filerev;
5201 dsattr.dsa_size = nap->na_size;
5202 dsattr.dsa_atime = nap->na_atime;
5203 dsattr.dsa_mtime = nap->na_mtime;
5204 dsattr.dsa_bytes = nap->na_bytes;
5205 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
5206 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
5207 if (error != 0)
5208 printf("pNFS: setextattr=%d\n", error);
5209 return (error);
5210 }
5211
5212 static int
nfsrv_readdsrpc(fhandle_t * fhp,off_t off,int len,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp,struct mbuf ** mpp,struct mbuf ** mpendp)5213 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
5214 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp)
5215 {
5216 uint32_t *tl;
5217 struct nfsrv_descript *nd;
5218 nfsv4stateid_t st;
5219 struct mbuf *m, *m2;
5220 int error = 0, retlen, tlen, trimlen;
5221
5222 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n");
5223 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5224 *mpp = NULL;
5225 /*
5226 * Use a stateid where other is an alternating 01010 pattern and
5227 * seqid is 0xffffffff. This value is not defined as special by
5228 * the RFC and is used by the FreeBSD NFS server to indicate an
5229 * MDS->DS proxy operation.
5230 */
5231 st.other[0] = 0x55555555;
5232 st.other[1] = 0x55555555;
5233 st.other[2] = 0x55555555;
5234 st.seqid = 0xffffffff;
5235 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp),
5236 NULL, NULL, 0, 0);
5237 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5238 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
5239 txdr_hyper(off, tl);
5240 *(tl + 2) = txdr_unsigned(len);
5241 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5242 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5243 if (error != 0) {
5244 free(nd, M_TEMP);
5245 return (error);
5246 }
5247 if (nd->nd_repstat == 0) {
5248 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
5249 NFSM_STRSIZ(retlen, len);
5250 if (retlen > 0) {
5251 /* Trim off the pre-data XDR from the mbuf chain. */
5252 m = nd->nd_mrep;
5253 while (m != NULL && m != nd->nd_md) {
5254 if (m->m_next == nd->nd_md) {
5255 m->m_next = NULL;
5256 m_freem(nd->nd_mrep);
5257 nd->nd_mrep = m = nd->nd_md;
5258 } else
5259 m = m->m_next;
5260 }
5261 if (m == NULL) {
5262 printf("nfsrv_readdsrpc: busted mbuf list\n");
5263 error = ENOENT;
5264 goto nfsmout;
5265 }
5266
5267 /*
5268 * Now, adjust first mbuf so that any XDR before the
5269 * read data is skipped over.
5270 */
5271 trimlen = nd->nd_dpos - mtod(m, char *);
5272 if (trimlen > 0) {
5273 m->m_len -= trimlen;
5274 NFSM_DATAP(m, trimlen);
5275 }
5276
5277 /*
5278 * Truncate the mbuf chain at retlen bytes of data,
5279 * plus XDR padding that brings the length up to a
5280 * multiple of 4.
5281 */
5282 tlen = NFSM_RNDUP(retlen);
5283 do {
5284 if (m->m_len >= tlen) {
5285 m->m_len = tlen;
5286 tlen = 0;
5287 m2 = m->m_next;
5288 m->m_next = NULL;
5289 m_freem(m2);
5290 break;
5291 }
5292 tlen -= m->m_len;
5293 m = m->m_next;
5294 } while (m != NULL);
5295 if (tlen > 0) {
5296 printf("nfsrv_readdsrpc: busted mbuf list\n");
5297 error = ENOENT;
5298 goto nfsmout;
5299 }
5300 *mpp = nd->nd_mrep;
5301 *mpendp = m;
5302 nd->nd_mrep = NULL;
5303 }
5304 } else
5305 error = nd->nd_repstat;
5306 nfsmout:
5307 /* If nd->nd_mrep is already NULL, this is a no-op. */
5308 m_freem(nd->nd_mrep);
5309 free(nd, M_TEMP);
5310 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error);
5311 return (error);
5312 }
5313
5314 /*
5315 * Do a write RPC on a DS data file, using this structure for the arguments,
5316 * so that this function can be executed by a separate kernel process.
5317 */
5318 struct nfsrvwritedsdorpc {
5319 int done;
5320 int inprog;
5321 struct task tsk;
5322 fhandle_t fh;
5323 off_t off;
5324 int len;
5325 struct nfsmount *nmp;
5326 struct ucred *cred;
5327 NFSPROC_T *p;
5328 struct mbuf *m;
5329 int err;
5330 };
5331
5332 static int
nfsrv_writedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,int len,struct nfsvattr * nap,struct mbuf * m,struct ucred * cred,NFSPROC_T * p)5333 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
5334 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p)
5335 {
5336 uint32_t *tl;
5337 struct nfsrv_descript *nd;
5338 nfsattrbit_t attrbits;
5339 nfsv4stateid_t st;
5340 int commit, error, retlen;
5341
5342 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5343 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp,
5344 sizeof(fhandle_t), NULL, NULL, 0, 0);
5345
5346 /*
5347 * Use a stateid where other is an alternating 01010 pattern and
5348 * seqid is 0xffffffff. This value is not defined as special by
5349 * the RFC and is used by the FreeBSD NFS server to indicate an
5350 * MDS->DS proxy operation.
5351 */
5352 st.other[0] = 0x55555555;
5353 st.other[1] = 0x55555555;
5354 st.other[2] = 0x55555555;
5355 st.seqid = 0xffffffff;
5356 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5357 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
5358 txdr_hyper(off, tl);
5359 tl += 2;
5360 /*
5361 * Do all writes FileSync, since the server doesn't hold onto dirty
5362 * buffers. Since clients should be accessing the DS servers directly
5363 * using the pNFS layouts, this just needs to work correctly as a
5364 * fallback.
5365 */
5366 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
5367 *tl = txdr_unsigned(len);
5368 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len);
5369
5370 /* Put data in mbuf chain. */
5371 nd->nd_mb->m_next = m;
5372
5373 /* Set nd_mb and nd_bpos to end of data. */
5374 while (m->m_next != NULL)
5375 m = m->m_next;
5376 nd->nd_mb = m;
5377 nfsm_set(nd, m->m_len);
5378 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len);
5379
5380 /* Do a Getattr for the attributes that change upon writing. */
5381 NFSZERO_ATTRBIT(&attrbits);
5382 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
5383 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
5384 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
5385 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
5386 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
5387 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5388 *tl = txdr_unsigned(NFSV4OP_GETATTR);
5389 (void) nfsrv_putattrbit(nd, &attrbits);
5390 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
5391 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5392 if (error != 0) {
5393 free(nd, M_TEMP);
5394 return (error);
5395 }
5396 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat);
5397 /* Get rid of weak cache consistency data for now. */
5398 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
5399 (ND_NFSV4 | ND_V4WCCATTR)) {
5400 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
5401 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5402 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
5403 if (error != 0)
5404 goto nfsmout;
5405 /*
5406 * Get rid of Op# and status for next op.
5407 */
5408 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5409 if (*++tl != 0)
5410 nd->nd_flag |= ND_NOMOREDATA;
5411 }
5412 if (nd->nd_repstat == 0) {
5413 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
5414 retlen = fxdr_unsigned(int, *tl++);
5415 commit = fxdr_unsigned(int, *tl);
5416 if (commit != NFSWRITE_FILESYNC)
5417 error = NFSERR_IO;
5418 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n",
5419 retlen, commit, error);
5420 } else
5421 error = nd->nd_repstat;
5422 /* We have no use for the Write Verifier since we use FileSync. */
5423
5424 /*
5425 * Get the Change, Size, Access Time and Modify Time attributes and set
5426 * on the Metadata file, so its attributes will be what the file's
5427 * would be if it had been written.
5428 */
5429 if (error == 0) {
5430 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5431 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
5432 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5433 }
5434 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
5435 nfsmout:
5436 m_freem(nd->nd_mrep);
5437 free(nd, M_TEMP);
5438 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error);
5439 return (error);
5440 }
5441
5442 /*
5443 * Start up the thread that will execute nfsrv_writedsdorpc().
5444 */
5445 static void
start_writedsdorpc(void * arg,int pending)5446 start_writedsdorpc(void *arg, int pending)
5447 {
5448 struct nfsrvwritedsdorpc *drpc;
5449
5450 drpc = (struct nfsrvwritedsdorpc *)arg;
5451 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
5452 drpc->len, NULL, drpc->m, drpc->cred, drpc->p);
5453 drpc->done = 1;
5454 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err);
5455 }
5456
5457 static int
nfsrv_writedsrpc(fhandle_t * fhp,off_t off,int len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,struct mbuf ** mpp,char * cp,int * failposp)5458 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
5459 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
5460 struct mbuf **mpp, char *cp, int *failposp)
5461 {
5462 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL;
5463 struct nfsvattr na;
5464 struct mbuf *m;
5465 int error, i, offs, ret, timo;
5466
5467 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n");
5468 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain"));
5469 drpc = NULL;
5470 if (mirrorcnt > 1)
5471 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
5472 M_WAITOK);
5473
5474 /* Calculate offset in mbuf chain that data starts. */
5475 offs = cp - mtod(*mpp, char *);
5476 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len);
5477
5478 /*
5479 * Do the write RPC for every DS, using a separate kernel process
5480 * for every DS except the last one.
5481 */
5482 error = 0;
5483 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5484 tdrpc->done = 0;
5485 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
5486 tdrpc->off = off;
5487 tdrpc->len = len;
5488 tdrpc->nmp = *nmpp;
5489 tdrpc->cred = cred;
5490 tdrpc->p = p;
5491 tdrpc->inprog = 0;
5492 tdrpc->err = 0;
5493 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
5494 ret = EIO;
5495 if (nfs_pnfsiothreads != 0) {
5496 ret = nfs_pnfsio(start_writedsdorpc, tdrpc);
5497 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n",
5498 ret);
5499 }
5500 if (ret != 0) {
5501 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL,
5502 tdrpc->m, cred, p);
5503 if (nfsds_failerr(ret) && *failposp == -1)
5504 *failposp = i;
5505 else if (error == 0 && ret != 0)
5506 error = ret;
5507 }
5508 nmpp++;
5509 fhp++;
5510 }
5511 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
5512 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p);
5513 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
5514 *failposp = mirrorcnt - 1;
5515 else if (error == 0 && ret != 0)
5516 error = ret;
5517 if (error == 0)
5518 error = nfsrv_setextattr(vp, &na, p);
5519 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error);
5520 tdrpc = drpc;
5521 timo = hz / 50; /* Wait for 20msec. */
5522 if (timo < 1)
5523 timo = 1;
5524 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5525 /* Wait for RPCs on separate threads to complete. */
5526 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5527 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
5528 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
5529 *failposp = i;
5530 else if (error == 0 && tdrpc->err != 0)
5531 error = tdrpc->err;
5532 }
5533 free(drpc, M_TEMP);
5534 return (error);
5535 }
5536
5537 /*
5538 * Do a allocate RPC on a DS data file, using this structure for the arguments,
5539 * so that this function can be executed by a separate kernel process.
5540 */
5541 struct nfsrvallocatedsdorpc {
5542 int done;
5543 int inprog;
5544 struct task tsk;
5545 fhandle_t fh;
5546 off_t off;
5547 off_t len;
5548 struct nfsmount *nmp;
5549 struct ucred *cred;
5550 NFSPROC_T *p;
5551 int err;
5552 };
5553
5554 static int
nfsrv_allocatedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,off_t len,struct nfsvattr * nap,struct ucred * cred,NFSPROC_T * p)5555 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
5556 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
5557 {
5558 uint32_t *tl;
5559 struct nfsrv_descript *nd;
5560 nfsattrbit_t attrbits;
5561 nfsv4stateid_t st;
5562 int error;
5563
5564 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5565 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp,
5566 sizeof(fhandle_t), NULL, NULL, 0, 0);
5567
5568 /*
5569 * Use a stateid where other is an alternating 01010 pattern and
5570 * seqid is 0xffffffff. This value is not defined as special by
5571 * the RFC and is used by the FreeBSD NFS server to indicate an
5572 * MDS->DS proxy operation.
5573 */
5574 st.other[0] = 0x55555555;
5575 st.other[1] = 0x55555555;
5576 st.other[2] = 0x55555555;
5577 st.seqid = 0xffffffff;
5578 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5579 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
5580 txdr_hyper(off, tl); tl += 2;
5581 txdr_hyper(len, tl); tl += 2;
5582 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len);
5583
5584 *tl = txdr_unsigned(NFSV4OP_GETATTR);
5585 NFSGETATTR_ATTRBIT(&attrbits);
5586 nfsrv_putattrbit(nd, &attrbits);
5587 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
5588 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5589 if (error != 0) {
5590 free(nd, M_TEMP);
5591 return (error);
5592 }
5593 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n",
5594 nd->nd_repstat);
5595 if (nd->nd_repstat == 0) {
5596 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5597 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
5598 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5599 } else
5600 error = nd->nd_repstat;
5601 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error);
5602 nfsmout:
5603 m_freem(nd->nd_mrep);
5604 free(nd, M_TEMP);
5605 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error);
5606 return (error);
5607 }
5608
5609 /*
5610 * Start up the thread that will execute nfsrv_allocatedsdorpc().
5611 */
5612 static void
start_allocatedsdorpc(void * arg,int pending)5613 start_allocatedsdorpc(void *arg, int pending)
5614 {
5615 struct nfsrvallocatedsdorpc *drpc;
5616
5617 drpc = (struct nfsrvallocatedsdorpc *)arg;
5618 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
5619 drpc->len, NULL, drpc->cred, drpc->p);
5620 drpc->done = 1;
5621 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err);
5622 }
5623
5624 static int
nfsrv_allocatedsrpc(fhandle_t * fhp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,int * failposp)5625 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
5626 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
5627 int *failposp)
5628 {
5629 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL;
5630 struct nfsvattr na;
5631 int error, i, ret, timo;
5632
5633 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n");
5634 drpc = NULL;
5635 if (mirrorcnt > 1)
5636 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
5637 M_WAITOK);
5638
5639 /*
5640 * Do the allocate RPC for every DS, using a separate kernel process
5641 * for every DS except the last one.
5642 */
5643 error = 0;
5644 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5645 tdrpc->done = 0;
5646 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
5647 tdrpc->off = off;
5648 tdrpc->len = len;
5649 tdrpc->nmp = *nmpp;
5650 tdrpc->cred = cred;
5651 tdrpc->p = p;
5652 tdrpc->inprog = 0;
5653 tdrpc->err = 0;
5654 ret = EIO;
5655 if (nfs_pnfsiothreads != 0) {
5656 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc);
5657 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n",
5658 ret);
5659 }
5660 if (ret != 0) {
5661 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL,
5662 cred, p);
5663 if (nfsds_failerr(ret) && *failposp == -1)
5664 *failposp = i;
5665 else if (error == 0 && ret != 0)
5666 error = ret;
5667 }
5668 nmpp++;
5669 fhp++;
5670 }
5671 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
5672 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
5673 *failposp = mirrorcnt - 1;
5674 else if (error == 0 && ret != 0)
5675 error = ret;
5676 if (error == 0)
5677 error = nfsrv_setextattr(vp, &na, p);
5678 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error);
5679 tdrpc = drpc;
5680 timo = hz / 50; /* Wait for 20msec. */
5681 if (timo < 1)
5682 timo = 1;
5683 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5684 /* Wait for RPCs on separate threads to complete. */
5685 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5686 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
5687 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
5688 *failposp = i;
5689 else if (error == 0 && tdrpc->err != 0)
5690 error = tdrpc->err;
5691 }
5692 free(drpc, M_TEMP);
5693 return (error);
5694 }
5695
5696 static int
nfsrv_setattrdsdorpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount * nmp,struct nfsvattr * nap,struct nfsvattr * dsnap)5697 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
5698 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
5699 struct nfsvattr *dsnap)
5700 {
5701 uint32_t *tl;
5702 struct nfsrv_descript *nd;
5703 nfsv4stateid_t st;
5704 nfsattrbit_t attrbits;
5705 int error;
5706
5707 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n");
5708 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5709 /*
5710 * Use a stateid where other is an alternating 01010 pattern and
5711 * seqid is 0xffffffff. This value is not defined as special by
5712 * the RFC and is used by the FreeBSD NFS server to indicate an
5713 * MDS->DS proxy operation.
5714 */
5715 st.other[0] = 0x55555555;
5716 st.other[1] = 0x55555555;
5717 st.other[2] = 0x55555555;
5718 st.seqid = 0xffffffff;
5719 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp),
5720 NULL, NULL, 0, 0);
5721 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5722 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0);
5723
5724 /* Do a Getattr for the attributes that change due to writing. */
5725 NFSZERO_ATTRBIT(&attrbits);
5726 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
5727 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
5728 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
5729 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
5730 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
5731 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5732 *tl = txdr_unsigned(NFSV4OP_GETATTR);
5733 (void) nfsrv_putattrbit(nd, &attrbits);
5734 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5735 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5736 if (error != 0) {
5737 free(nd, M_TEMP);
5738 return (error);
5739 }
5740 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n",
5741 nd->nd_repstat);
5742 /* Get rid of weak cache consistency data for now. */
5743 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
5744 (ND_NFSV4 | ND_V4WCCATTR)) {
5745 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
5746 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5747 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
5748 if (error != 0)
5749 goto nfsmout;
5750 /*
5751 * Get rid of Op# and status for next op.
5752 */
5753 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5754 if (*++tl != 0)
5755 nd->nd_flag |= ND_NOMOREDATA;
5756 }
5757 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
5758 if (error != 0)
5759 goto nfsmout;
5760 if (nd->nd_repstat != 0)
5761 error = nd->nd_repstat;
5762 /*
5763 * Get the Change, Size, Access Time and Modify Time attributes and set
5764 * on the Metadata file, so its attributes will be what the file's
5765 * would be if it had been written.
5766 */
5767 if (error == 0) {
5768 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5769 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
5770 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5771 }
5772 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
5773 nfsmout:
5774 m_freem(nd->nd_mrep);
5775 free(nd, M_TEMP);
5776 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error);
5777 return (error);
5778 }
5779
5780 struct nfsrvsetattrdsdorpc {
5781 int done;
5782 int inprog;
5783 struct task tsk;
5784 fhandle_t fh;
5785 struct nfsmount *nmp;
5786 struct vnode *vp;
5787 struct ucred *cred;
5788 NFSPROC_T *p;
5789 struct nfsvattr na;
5790 struct nfsvattr dsna;
5791 int err;
5792 };
5793
5794 /*
5795 * Start up the thread that will execute nfsrv_setattrdsdorpc().
5796 */
5797 static void
start_setattrdsdorpc(void * arg,int pending)5798 start_setattrdsdorpc(void *arg, int pending)
5799 {
5800 struct nfsrvsetattrdsdorpc *drpc;
5801
5802 drpc = (struct nfsrvsetattrdsdorpc *)arg;
5803 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p,
5804 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna);
5805 drpc->done = 1;
5806 }
5807
5808 static int
nfsrv_setattrdsrpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,struct nfsvattr * nap,int * failposp)5809 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
5810 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
5811 struct nfsvattr *nap, int *failposp)
5812 {
5813 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL;
5814 struct nfsvattr na;
5815 int error, i, ret, timo;
5816
5817 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n");
5818 drpc = NULL;
5819 if (mirrorcnt > 1)
5820 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
5821 M_WAITOK);
5822
5823 /*
5824 * Do the setattr RPC for every DS, using a separate kernel process
5825 * for every DS except the last one.
5826 */
5827 error = 0;
5828 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5829 tdrpc->done = 0;
5830 tdrpc->inprog = 0;
5831 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
5832 tdrpc->nmp = *nmpp;
5833 tdrpc->vp = vp;
5834 tdrpc->cred = cred;
5835 tdrpc->p = p;
5836 tdrpc->na = *nap;
5837 tdrpc->err = 0;
5838 ret = EIO;
5839 if (nfs_pnfsiothreads != 0) {
5840 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc);
5841 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n",
5842 ret);
5843 }
5844 if (ret != 0) {
5845 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap,
5846 &na);
5847 if (nfsds_failerr(ret) && *failposp == -1)
5848 *failposp = i;
5849 else if (error == 0 && ret != 0)
5850 error = ret;
5851 }
5852 nmpp++;
5853 fhp++;
5854 }
5855 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na);
5856 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
5857 *failposp = mirrorcnt - 1;
5858 else if (error == 0 && ret != 0)
5859 error = ret;
5860 if (error == 0)
5861 error = nfsrv_setextattr(vp, &na, p);
5862 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error);
5863 tdrpc = drpc;
5864 timo = hz / 50; /* Wait for 20msec. */
5865 if (timo < 1)
5866 timo = 1;
5867 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5868 /* Wait for RPCs on separate threads to complete. */
5869 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5870 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo);
5871 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
5872 *failposp = i;
5873 else if (error == 0 && tdrpc->err != 0)
5874 error = tdrpc->err;
5875 }
5876 free(drpc, M_TEMP);
5877 return (error);
5878 }
5879
5880 /*
5881 * Do a Setattr of an NFSv4 ACL on the DS file.
5882 */
5883 static int
nfsrv_setacldsdorpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount * nmp,struct acl * aclp)5884 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
5885 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp)
5886 {
5887 struct nfsrv_descript *nd;
5888 nfsv4stateid_t st;
5889 nfsattrbit_t attrbits;
5890 int error;
5891
5892 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n");
5893 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5894 /*
5895 * Use a stateid where other is an alternating 01010 pattern and
5896 * seqid is 0xffffffff. This value is not defined as special by
5897 * the RFC and is used by the FreeBSD NFS server to indicate an
5898 * MDS->DS proxy operation.
5899 */
5900 st.other[0] = 0x55555555;
5901 st.other[1] = 0x55555555;
5902 st.other[2] = 0x55555555;
5903 st.seqid = 0xffffffff;
5904 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp),
5905 NULL, NULL, 0, 0);
5906 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5907 NFSZERO_ATTRBIT(&attrbits);
5908 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5909 /*
5910 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(),
5911 * so passing in the metadata "vp" will be ok, since it is of
5912 * the same type (VREG).
5913 */
5914 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
5915 NULL, 0, 0, 0, 0, 0, NULL);
5916 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5917 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5918 if (error != 0) {
5919 free(nd, M_TEMP);
5920 return (error);
5921 }
5922 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n",
5923 nd->nd_repstat);
5924 error = nd->nd_repstat;
5925 m_freem(nd->nd_mrep);
5926 free(nd, M_TEMP);
5927 return (error);
5928 }
5929
5930 struct nfsrvsetacldsdorpc {
5931 int done;
5932 int inprog;
5933 struct task tsk;
5934 fhandle_t fh;
5935 struct nfsmount *nmp;
5936 struct vnode *vp;
5937 struct ucred *cred;
5938 NFSPROC_T *p;
5939 struct acl *aclp;
5940 int err;
5941 };
5942
5943 /*
5944 * Start up the thread that will execute nfsrv_setacldsdorpc().
5945 */
5946 static void
start_setacldsdorpc(void * arg,int pending)5947 start_setacldsdorpc(void *arg, int pending)
5948 {
5949 struct nfsrvsetacldsdorpc *drpc;
5950
5951 drpc = (struct nfsrvsetacldsdorpc *)arg;
5952 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p,
5953 drpc->vp, drpc->nmp, drpc->aclp);
5954 drpc->done = 1;
5955 }
5956
5957 static int
nfsrv_setacldsrpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,struct acl * aclp,int * failposp)5958 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
5959 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp,
5960 int *failposp)
5961 {
5962 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL;
5963 int error, i, ret, timo;
5964
5965 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n");
5966 drpc = NULL;
5967 if (mirrorcnt > 1)
5968 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
5969 M_WAITOK);
5970
5971 /*
5972 * Do the setattr RPC for every DS, using a separate kernel process
5973 * for every DS except the last one.
5974 */
5975 error = 0;
5976 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5977 tdrpc->done = 0;
5978 tdrpc->inprog = 0;
5979 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
5980 tdrpc->nmp = *nmpp;
5981 tdrpc->vp = vp;
5982 tdrpc->cred = cred;
5983 tdrpc->p = p;
5984 tdrpc->aclp = aclp;
5985 tdrpc->err = 0;
5986 ret = EIO;
5987 if (nfs_pnfsiothreads != 0) {
5988 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc);
5989 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n",
5990 ret);
5991 }
5992 if (ret != 0) {
5993 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp,
5994 aclp);
5995 if (nfsds_failerr(ret) && *failposp == -1)
5996 *failposp = i;
5997 else if (error == 0 && ret != 0)
5998 error = ret;
5999 }
6000 nmpp++;
6001 fhp++;
6002 }
6003 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp);
6004 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
6005 *failposp = mirrorcnt - 1;
6006 else if (error == 0 && ret != 0)
6007 error = ret;
6008 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error);
6009 tdrpc = drpc;
6010 timo = hz / 50; /* Wait for 20msec. */
6011 if (timo < 1)
6012 timo = 1;
6013 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
6014 /* Wait for RPCs on separate threads to complete. */
6015 while (tdrpc->inprog != 0 && tdrpc->done == 0)
6016 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo);
6017 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
6018 *failposp = i;
6019 else if (error == 0 && tdrpc->err != 0)
6020 error = tdrpc->err;
6021 }
6022 free(drpc, M_TEMP);
6023 return (error);
6024 }
6025
6026 /*
6027 * Getattr call to the DS for the attributes that change due to writing.
6028 */
6029 static int
nfsrv_getattrdsrpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount * nmp,struct nfsvattr * nap)6030 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
6031 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap)
6032 {
6033 struct nfsrv_descript *nd;
6034 int error;
6035 nfsattrbit_t attrbits;
6036
6037 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n");
6038 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
6039 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp,
6040 sizeof(fhandle_t), NULL, NULL, 0, 0);
6041 NFSZERO_ATTRBIT(&attrbits);
6042 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
6043 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
6044 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
6045 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
6046 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
6047 (void) nfsrv_putattrbit(nd, &attrbits);
6048 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6049 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6050 if (error != 0) {
6051 free(nd, M_TEMP);
6052 return (error);
6053 }
6054 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n",
6055 nd->nd_repstat);
6056 if (nd->nd_repstat == 0) {
6057 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
6058 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
6059 NULL, NULL);
6060 /*
6061 * We can only save the updated values in the extended
6062 * attribute if the vp is exclusively locked.
6063 * This should happen when any of the following operations
6064 * occur on the vnode:
6065 * Close, Delegreturn, LayoutCommit, LayoutReturn
6066 * As such, the updated extended attribute should get saved
6067 * before nfsrv_checkdsattr() returns 0 and allows the cached
6068 * attributes to be returned without calling this function.
6069 */
6070 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
6071 error = nfsrv_setextattr(vp, nap, p);
6072 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n",
6073 error);
6074 }
6075 } else
6076 error = nd->nd_repstat;
6077 m_freem(nd->nd_mrep);
6078 free(nd, M_TEMP);
6079 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error);
6080 return (error);
6081 }
6082
6083 /*
6084 * Seek call to a DS.
6085 */
6086 static int
nfsrv_seekdsrpc(fhandle_t * fhp,off_t * offp,int content,bool * eofp,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp)6087 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp,
6088 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp)
6089 {
6090 uint32_t *tl;
6091 struct nfsrv_descript *nd;
6092 nfsv4stateid_t st;
6093 int error;
6094
6095 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n");
6096 /*
6097 * Use a stateid where other is an alternating 01010 pattern and
6098 * seqid is 0xffffffff. This value is not defined as special by
6099 * the RFC and is used by the FreeBSD NFS server to indicate an
6100 * MDS->DS proxy operation.
6101 */
6102 st.other[0] = 0x55555555;
6103 st.other[1] = 0x55555555;
6104 st.other[2] = 0x55555555;
6105 st.seqid = 0xffffffff;
6106 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
6107 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp,
6108 sizeof(fhandle_t), NULL, NULL, 0, 0);
6109 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
6110 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6111 txdr_hyper(*offp, tl); tl += 2;
6112 *tl = txdr_unsigned(content);
6113 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6114 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6115 if (error != 0) {
6116 free(nd, M_TEMP);
6117 return (error);
6118 }
6119 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat);
6120 if (nd->nd_repstat == 0) {
6121 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER);
6122 if (*tl++ == newnfs_true)
6123 *eofp = true;
6124 else
6125 *eofp = false;
6126 *offp = fxdr_hyper(tl);
6127 } else
6128 error = nd->nd_repstat;
6129 nfsmout:
6130 m_freem(nd->nd_mrep);
6131 free(nd, M_TEMP);
6132 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error);
6133 return (error);
6134 }
6135
6136 /*
6137 * Get the device id and file handle for a DS file.
6138 */
6139 int
nfsrv_dsgetdevandfh(struct vnode * vp,NFSPROC_T * p,int * mirrorcntp,fhandle_t * fhp,char * devid)6140 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp,
6141 fhandle_t *fhp, char *devid)
6142 {
6143 int buflen, error;
6144 char *buf;
6145
6146 buflen = 1024;
6147 buf = malloc(buflen, M_TEMP, M_WAITOK);
6148 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL,
6149 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL);
6150 free(buf, M_TEMP);
6151 return (error);
6152 }
6153
6154 /*
6155 * Do a Lookup against the DS for the filename.
6156 */
6157 static int
nfsrv_pnfslookupds(struct vnode * vp,struct vnode * dvp,struct pnfsdsfile * pf,struct vnode ** nvpp,NFSPROC_T * p)6158 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
6159 struct vnode **nvpp, NFSPROC_T *p)
6160 {
6161 struct nameidata named;
6162 struct ucred *tcred;
6163 char *bufp;
6164 u_long *hashp;
6165 struct vnode *nvp;
6166 int error;
6167
6168 tcred = newnfs_getcred();
6169 named.ni_cnd.cn_nameiop = LOOKUP;
6170 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY;
6171 named.ni_cnd.cn_cred = tcred;
6172 named.ni_cnd.cn_thread = p;
6173 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
6174 nfsvno_setpathbuf(&named, &bufp, &hashp);
6175 named.ni_cnd.cn_nameptr = bufp;
6176 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename);
6177 strlcpy(bufp, pf->dsf_filename, NAME_MAX);
6178 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp);
6179 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
6180 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error);
6181 NFSFREECRED(tcred);
6182 nfsvno_relpathbuf(&named);
6183 if (error == 0)
6184 *nvpp = nvp;
6185 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error);
6186 return (error);
6187 }
6188
6189 /*
6190 * Set the file handle to the correct one.
6191 */
6192 static void
nfsrv_pnfssetfh(struct vnode * vp,struct pnfsdsfile * pf,char * devid,char * fnamep,struct vnode * nvp,NFSPROC_T * p)6193 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid,
6194 char *fnamep, struct vnode *nvp, NFSPROC_T *p)
6195 {
6196 struct nfsnode *np;
6197 int ret = 0;
6198
6199 np = VTONFS(nvp);
6200 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
6201 /*
6202 * We can only do a vn_set_extattr() if the vnode is exclusively
6203 * locked and vn_start_write() has been done. If devid != NULL or
6204 * fnamep != NULL or the vnode is shared locked, vn_start_write()
6205 * may not have been done.
6206 * If not done now, it will be done on a future call.
6207 */
6208 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
6209 LK_EXCLUSIVE)
6210 ret = vn_extattr_set(vp, IO_NODELOCKED,
6211 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
6212 (char *)pf, p);
6213 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
6214 }
6215
6216 /*
6217 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point
6218 * when the DS has failed.
6219 */
6220 void
nfsrv_killrpcs(struct nfsmount * nmp)6221 nfsrv_killrpcs(struct nfsmount *nmp)
6222 {
6223
6224 /*
6225 * Call newnfs_nmcancelreqs() to cause
6226 * any RPCs in progress on the mount point to
6227 * fail.
6228 * This will cause any process waiting for an
6229 * RPC to complete while holding a vnode lock
6230 * on the mounted-on vnode (such as "df" or
6231 * a non-forced "umount") to fail.
6232 * This will unlock the mounted-on vnode so
6233 * a forced dismount can succeed.
6234 * The NFSMNTP_CANCELRPCS flag should be set when this function is
6235 * called.
6236 */
6237 newnfs_nmcancelreqs(nmp);
6238 }
6239
6240 /*
6241 * Sum up the statfs info for each of the DSs, so that the client will
6242 * receive the total for all DSs.
6243 */
6244 static int
nfsrv_pnfsstatfs(struct statfs * sf,struct mount * mp)6245 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp)
6246 {
6247 struct statfs *tsf;
6248 struct nfsdevice *ds;
6249 struct vnode **dvpp, **tdvpp, *dvp;
6250 uint64_t tot;
6251 int cnt, error = 0, i;
6252
6253 if (nfsrv_devidcnt <= 0)
6254 return (ENXIO);
6255 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
6256 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
6257
6258 /* Get an array of the dvps for the DSs. */
6259 tdvpp = dvpp;
6260 i = 0;
6261 NFSDDSLOCK();
6262 /* First, search for matches for same file system. */
6263 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
6264 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 &&
6265 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) {
6266 if (++i > nfsrv_devidcnt)
6267 break;
6268 *tdvpp++ = ds->nfsdev_dvp;
6269 }
6270 }
6271 /*
6272 * If no matches for same file system, total all servers not assigned
6273 * to a file system.
6274 */
6275 if (i == 0) {
6276 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
6277 if (ds->nfsdev_nmp != NULL &&
6278 ds->nfsdev_mdsisset == 0) {
6279 if (++i > nfsrv_devidcnt)
6280 break;
6281 *tdvpp++ = ds->nfsdev_dvp;
6282 }
6283 }
6284 }
6285 NFSDDSUNLOCK();
6286 cnt = i;
6287
6288 /* Do a VFS_STATFS() for each of the DSs and sum them up. */
6289 tdvpp = dvpp;
6290 for (i = 0; i < cnt && error == 0; i++) {
6291 dvp = *tdvpp++;
6292 error = VFS_STATFS(dvp->v_mount, tsf);
6293 if (error == 0) {
6294 if (sf->f_bsize == 0) {
6295 if (tsf->f_bsize > 0)
6296 sf->f_bsize = tsf->f_bsize;
6297 else
6298 sf->f_bsize = 8192;
6299 }
6300 if (tsf->f_blocks > 0) {
6301 if (sf->f_bsize != tsf->f_bsize) {
6302 tot = tsf->f_blocks * tsf->f_bsize;
6303 sf->f_blocks += (tot / sf->f_bsize);
6304 } else
6305 sf->f_blocks += tsf->f_blocks;
6306 }
6307 if (tsf->f_bfree > 0) {
6308 if (sf->f_bsize != tsf->f_bsize) {
6309 tot = tsf->f_bfree * tsf->f_bsize;
6310 sf->f_bfree += (tot / sf->f_bsize);
6311 } else
6312 sf->f_bfree += tsf->f_bfree;
6313 }
6314 if (tsf->f_bavail > 0) {
6315 if (sf->f_bsize != tsf->f_bsize) {
6316 tot = tsf->f_bavail * tsf->f_bsize;
6317 sf->f_bavail += (tot / sf->f_bsize);
6318 } else
6319 sf->f_bavail += tsf->f_bavail;
6320 }
6321 }
6322 }
6323 free(tsf, M_TEMP);
6324 free(dvpp, M_TEMP);
6325 return (error);
6326 }
6327
6328 /*
6329 * Set an NFSv4 acl.
6330 */
6331 int
nfsrv_setacl(struct vnode * vp,NFSACL_T * aclp,struct ucred * cred,NFSPROC_T * p)6332 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p)
6333 {
6334 int error;
6335
6336 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) {
6337 error = NFSERR_ATTRNOTSUPP;
6338 goto out;
6339 }
6340 /*
6341 * With NFSv4 ACLs, chmod(2) may need to add additional entries.
6342 * Make sure it has enough room for that - splitting every entry
6343 * into two and appending "canonical six" entries at the end.
6344 * Cribbed out of kern/vfs_acl.c - Rick M.
6345 */
6346 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) {
6347 error = NFSERR_ATTRNOTSUPP;
6348 goto out;
6349 }
6350 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
6351 if (error == 0) {
6352 error = nfsrv_dssetacl(vp, aclp, cred, p);
6353 if (error == ENOENT)
6354 error = 0;
6355 }
6356
6357 out:
6358 NFSEXITCODE(error);
6359 return (error);
6360 }
6361
6362 /*
6363 * Seek vnode op call (actually it is a VOP_IOCTL()).
6364 * This function is called with the vnode locked, but unlocks and vrele()s
6365 * the vp before returning.
6366 */
6367 int
nfsvno_seek(struct nfsrv_descript * nd,struct vnode * vp,u_long cmd,off_t * offp,int content,bool * eofp,struct ucred * cred,NFSPROC_T * p)6368 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd,
6369 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p)
6370 {
6371 struct nfsvattr at;
6372 int error, ret;
6373
6374 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp");
6375 /*
6376 * Attempt to seek on a DS file. A return of ENOENT implies
6377 * there is no DS file to seek on.
6378 */
6379 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL,
6380 NULL, NULL, NULL, NULL, offp, content, eofp);
6381 if (error != ENOENT) {
6382 vput(vp);
6383 return (error);
6384 }
6385
6386 /*
6387 * Do the VOP_IOCTL() call. For the case where *offp == file_size,
6388 * VOP_IOCTL() will return ENXIO. However, the correct reply for
6389 * NFSv4.2 is *eofp == true and error == 0 for this case.
6390 */
6391 NFSVOPUNLOCK(vp);
6392 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p);
6393 *eofp = false;
6394 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) {
6395 /* Handle the cases where we might be at EOF. */
6396 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL);
6397 if (ret == 0 && *offp == at.na_size) {
6398 *eofp = true;
6399 error = 0;
6400 }
6401 if (ret != 0 && error == 0)
6402 error = ret;
6403 }
6404 vrele(vp);
6405 NFSEXITCODE(error);
6406 return (error);
6407 }
6408
6409 /*
6410 * Allocate vnode op call.
6411 */
6412 int
nfsvno_allocate(struct vnode * vp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p)6413 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
6414 NFSPROC_T *p)
6415 {
6416 int error;
6417 off_t olen;
6418
6419 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp");
6420 /*
6421 * Attempt to allocate on a DS file. A return of ENOENT implies
6422 * there is no DS file to allocate on.
6423 */
6424 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL,
6425 NULL, NULL, NULL, NULL, &len, 0, NULL);
6426 if (error != ENOENT)
6427 return (error);
6428
6429 /*
6430 * Do the actual VOP_ALLOCATE(), looping so long as
6431 * progress is being made, to achieve completion.
6432 */
6433 do {
6434 olen = len;
6435 error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred);
6436 if (error == 0 && len > 0 && olen > len)
6437 maybe_yield();
6438 } while (error == 0 && len > 0 && olen > len);
6439 if (error == 0 && len > 0)
6440 error = NFSERR_IO;
6441 NFSEXITCODE(error);
6442 return (error);
6443 }
6444
6445 /*
6446 * Get Extended Atribute vnode op into an mbuf list.
6447 */
6448 int
nfsvno_getxattr(struct vnode * vp,char * name,uint32_t maxresp,struct ucred * cred,uint64_t flag,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp,int * lenp)6449 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp,
6450 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p,
6451 struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
6452 {
6453 struct iovec *iv;
6454 struct uio io, *uiop = &io;
6455 struct mbuf *m, *m2;
6456 int alen, error, len, tlen;
6457 size_t siz;
6458
6459 /* First, find out the size of the extended attribute. */
6460 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
6461 &siz, cred, p);
6462 if (error != 0)
6463 return (NFSERR_NOXATTR);
6464 if (siz > maxresp - NFS_MAXXDR)
6465 return (NFSERR_XATTR2BIG);
6466 len = siz;
6467 tlen = NFSM_RNDUP(len);
6468 if (tlen > 0) {
6469 /*
6470 * If cnt > MCLBYTES and the reply will not be saved, use
6471 * ext_pgs mbufs for TLS.
6472 * For NFSv4.0, we do not know for sure if the reply will
6473 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
6474 * Always use ext_pgs mbufs if ND_EXTPG is set.
6475 */
6476 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES &&
6477 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS &&
6478 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4))
6479 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen,
6480 maxextsiz, &m, &m2, &iv);
6481 else
6482 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2,
6483 &iv);
6484 uiop->uio_iov = iv;
6485 } else {
6486 uiop->uio_iovcnt = 0;
6487 uiop->uio_iov = iv = NULL;
6488 m = m2 = NULL;
6489 }
6490 uiop->uio_offset = 0;
6491 uiop->uio_resid = tlen;
6492 uiop->uio_rw = UIO_READ;
6493 uiop->uio_segflg = UIO_SYSSPACE;
6494 uiop->uio_td = p;
6495 #ifdef MAC
6496 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER,
6497 name);
6498 if (error != 0)
6499 goto out;
6500 #endif
6501
6502 if (tlen > 0)
6503 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
6504 NULL, cred, p);
6505 if (error != 0)
6506 goto out;
6507 if (uiop->uio_resid > 0) {
6508 alen = tlen;
6509 len = tlen - uiop->uio_resid;
6510 tlen = NFSM_RNDUP(len);
6511 if (alen != tlen)
6512 printf("nfsvno_getxattr: weird size read\n");
6513 if (tlen == 0) {
6514 m_freem(m);
6515 m = m2 = NULL;
6516 } else if (alen != tlen || tlen != len)
6517 m2 = nfsrv_adj(m, alen - tlen, tlen - len);
6518 }
6519 *lenp = len;
6520 *mpp = m;
6521 *mpendp = m2;
6522
6523 out:
6524 if (error != 0) {
6525 if (m != NULL)
6526 m_freem(m);
6527 *lenp = 0;
6528 }
6529 free(iv, M_TEMP);
6530 NFSEXITCODE(error);
6531 return (error);
6532 }
6533
6534 /*
6535 * Set Extended attribute vnode op from an mbuf list.
6536 */
6537 int
nfsvno_setxattr(struct vnode * vp,char * name,int len,struct mbuf * m,char * cp,struct ucred * cred,struct thread * p)6538 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m,
6539 char *cp, struct ucred *cred, struct thread *p)
6540 {
6541 struct iovec *iv;
6542 struct uio uio, *uiop = &uio;
6543 int cnt, error;
6544
6545 error = 0;
6546 #ifdef MAC
6547 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER,
6548 name);
6549 #endif
6550 if (error != 0)
6551 goto out;
6552
6553 uiop->uio_rw = UIO_WRITE;
6554 uiop->uio_segflg = UIO_SYSSPACE;
6555 uiop->uio_td = p;
6556 uiop->uio_offset = 0;
6557 uiop->uio_resid = len;
6558 if (len > 0) {
6559 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt);
6560 uiop->uio_iov = iv;
6561 uiop->uio_iovcnt = cnt;
6562 } else {
6563 uiop->uio_iov = iv = NULL;
6564 uiop->uio_iovcnt = 0;
6565 }
6566 if (error == 0) {
6567 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
6568 cred, p);
6569 free(iv, M_TEMP);
6570 }
6571
6572 out:
6573 NFSEXITCODE(error);
6574 return (error);
6575 }
6576
6577 /*
6578 * Remove Extended attribute vnode op.
6579 */
6580 int
nfsvno_rmxattr(struct nfsrv_descript * nd,struct vnode * vp,char * name,struct ucred * cred,struct thread * p)6581 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name,
6582 struct ucred *cred, struct thread *p)
6583 {
6584 int error;
6585
6586 /*
6587 * Get rid of any delegations. I am not sure why this is required,
6588 * but RFC-8276 says so.
6589 */
6590 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p);
6591 if (error != 0)
6592 goto out;
6593 #ifdef MAC
6594 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER,
6595 name);
6596 if (error != 0)
6597 goto out;
6598 #endif
6599
6600 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p);
6601 if (error == EOPNOTSUPP)
6602 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
6603 cred, p);
6604 out:
6605 NFSEXITCODE(error);
6606 return (error);
6607 }
6608
6609 /*
6610 * List Extended Atribute vnode op into an mbuf list.
6611 */
6612 int
nfsvno_listxattr(struct vnode * vp,uint64_t cookie,struct ucred * cred,struct thread * p,u_char ** bufp,uint32_t * lenp,bool * eofp)6613 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred,
6614 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp)
6615 {
6616 struct iovec iv;
6617 struct uio io;
6618 int error;
6619 size_t siz;
6620
6621 *bufp = NULL;
6622 /* First, find out the size of the extended attribute. */
6623 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred,
6624 p);
6625 if (error != 0)
6626 return (NFSERR_NOXATTR);
6627 if (siz <= cookie) {
6628 *lenp = 0;
6629 *eofp = true;
6630 goto out;
6631 }
6632 if (siz > cookie + *lenp) {
6633 siz = cookie + *lenp;
6634 *eofp = false;
6635 } else
6636 *eofp = true;
6637 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */
6638 if (siz > 10 * 1024 * 1024) {
6639 error = NFSERR_XATTR2BIG;
6640 goto out;
6641 }
6642 *bufp = malloc(siz, M_TEMP, M_WAITOK);
6643 iv.iov_base = *bufp;
6644 iv.iov_len = siz;
6645 io.uio_iovcnt = 1;
6646 io.uio_iov = &iv;
6647 io.uio_offset = 0;
6648 io.uio_resid = siz;
6649 io.uio_rw = UIO_READ;
6650 io.uio_segflg = UIO_SYSSPACE;
6651 io.uio_td = p;
6652 #ifdef MAC
6653 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER);
6654 if (error != 0)
6655 goto out;
6656 #endif
6657
6658 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred,
6659 p);
6660 if (error != 0)
6661 goto out;
6662 if (io.uio_resid > 0)
6663 siz -= io.uio_resid;
6664 *lenp = siz;
6665
6666 out:
6667 if (error != 0) {
6668 free(*bufp, M_TEMP);
6669 *bufp = NULL;
6670 }
6671 NFSEXITCODE(error);
6672 return (error);
6673 }
6674
6675 /*
6676 * Trim trailing data off the mbuf list being built.
6677 */
6678 void
nfsm_trimtrailing(struct nfsrv_descript * nd,struct mbuf * mb,char * bpos,int bextpg,int bextpgsiz)6679 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos,
6680 int bextpg, int bextpgsiz)
6681 {
6682 vm_page_t pg;
6683 int fullpgsiz, i;
6684
6685 if (mb->m_next != NULL) {
6686 m_freem(mb->m_next);
6687 mb->m_next = NULL;
6688 }
6689 if ((mb->m_flags & M_EXTPG) != 0) {
6690 KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs,
6691 ("nfsm_trimtrailing: bextpg out of range"));
6692 KASSERT(bpos == (char *)(void *)
6693 PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz,
6694 ("nfsm_trimtrailing: bextpgsiz bad!"));
6695
6696 /* First, get rid of any pages after this position. */
6697 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) {
6698 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]);
6699 vm_page_unwire_noq(pg);
6700 vm_page_free(pg);
6701 }
6702 mb->m_epg_npgs = bextpg + 1;
6703 if (bextpg == 0)
6704 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off;
6705 else
6706 fullpgsiz = PAGE_SIZE;
6707 mb->m_epg_last_len = fullpgsiz - bextpgsiz;
6708 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off);
6709 for (i = 1; i < mb->m_epg_npgs; i++)
6710 mb->m_len += m_epg_pagelen(mb, i, 0);
6711 nd->nd_bextpgsiz = bextpgsiz;
6712 nd->nd_bextpg = bextpg;
6713 } else
6714 mb->m_len = bpos - mtod(mb, char *);
6715 nd->nd_mb = mb;
6716 nd->nd_bpos = bpos;
6717 }
6718
6719
6720 /*
6721 * Check to see if a put file handle operation should test for
6722 * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR.
6723 * When Open is the next operation, NFSERR_WRONGSEC cannot be
6724 * replied for the Open cases that use a component. Thia can
6725 * be identified by the fact that the file handle's type is VDIR.
6726 */
6727 bool
nfsrv_checkwrongsec(struct nfsrv_descript * nd,int nextop,enum vtype vtyp)6728 nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, enum vtype vtyp)
6729 {
6730
6731 if ((nd->nd_flag & ND_NFSV4) == 0) {
6732 if (nd->nd_procnum == NFSPROC_FSINFO)
6733 return (false);
6734 return (true);
6735 }
6736
6737 if ((nd->nd_flag & ND_LASTOP) != 0)
6738 return (false);
6739
6740 if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH ||
6741 nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH ||
6742 nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP ||
6743 nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME)
6744 return (false);
6745 if (nextop == NFSV4OP_OPEN && vtyp == VDIR)
6746 return (false);
6747 return (true);
6748 }
6749
6750 /*
6751 * Check DSs marked no space.
6752 */
6753 void
nfsrv_checknospc(void)6754 nfsrv_checknospc(void)
6755 {
6756 struct statfs *tsf;
6757 struct nfsdevice *ds;
6758 struct vnode **dvpp, **tdvpp, *dvp;
6759 char *devid, *tdevid;
6760 int cnt, error = 0, i;
6761
6762 if (nfsrv_devidcnt <= 0)
6763 return;
6764 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
6765 devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK);
6766 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
6767
6768 /* Get an array of the dvps for the DSs. */
6769 tdvpp = dvpp;
6770 tdevid = devid;
6771 i = 0;
6772 NFSDDSLOCK();
6773 /* First, search for matches for same file system. */
6774 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
6775 if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) {
6776 if (++i > nfsrv_devidcnt)
6777 break;
6778 *tdvpp++ = ds->nfsdev_dvp;
6779 NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID);
6780 tdevid += NFSX_V4DEVICEID;
6781 }
6782 }
6783 NFSDDSUNLOCK();
6784
6785 /* Do a VFS_STATFS() for each of the DSs and clear no space. */
6786 cnt = i;
6787 tdvpp = dvpp;
6788 tdevid = devid;
6789 for (i = 0; i < cnt && error == 0; i++) {
6790 dvp = *tdvpp++;
6791 error = VFS_STATFS(dvp->v_mount, tsf);
6792 if (error == 0 && tsf->f_bavail > 0) {
6793 NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n");
6794 nfsrv_marknospc(tdevid, false);
6795 }
6796 tdevid += NFSX_V4DEVICEID;
6797 }
6798 free(tsf, M_TEMP);
6799 free(dvpp, M_TEMP);
6800 free(devid, M_TEMP);
6801 }
6802
6803 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
6804
6805 /*
6806 * Called once to initialize data structures...
6807 */
6808 static int
nfsd_modevent(module_t mod,int type,void * data)6809 nfsd_modevent(module_t mod, int type, void *data)
6810 {
6811 int error = 0, i;
6812 static int loaded = 0;
6813
6814 switch (type) {
6815 case MOD_LOAD:
6816 if (loaded)
6817 goto out;
6818 newnfs_portinit();
6819 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
6820 mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL,
6821 MTX_DEF);
6822 mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL,
6823 MTX_DEF);
6824 }
6825 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
6826 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
6827 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
6828 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF);
6829 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF);
6830 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
6831 callout_init(&nfsd_callout, 1);
6832 nfsrvd_initcache();
6833 nfsd_init();
6834 NFSD_LOCK();
6835 nfsrvd_init(0);
6836 NFSD_UNLOCK();
6837 nfsd_mntinit();
6838 #ifdef VV_DISABLEDELEG
6839 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
6840 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
6841 #endif
6842 nfsd_call_nfsd = nfssvc_nfsd;
6843 loaded = 1;
6844 break;
6845
6846 case MOD_UNLOAD:
6847 if (newnfs_numnfsd != 0) {
6848 error = EBUSY;
6849 break;
6850 }
6851
6852 #ifdef VV_DISABLEDELEG
6853 vn_deleg_ops.vndeleg_recall = NULL;
6854 vn_deleg_ops.vndeleg_disable = NULL;
6855 #endif
6856 nfsd_call_nfsd = NULL;
6857 callout_drain(&nfsd_callout);
6858
6859 /* Clean out all NFSv4 state. */
6860 nfsrv_throwawayallstate(curthread);
6861
6862 /* Clean the NFS server reply cache */
6863 nfsrvd_cleancache();
6864
6865 /* Free up the krpc server pool. */
6866 if (nfsrvd_pool != NULL)
6867 svcpool_destroy(nfsrvd_pool);
6868
6869 /* and get rid of the locks */
6870 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
6871 mtx_destroy(&nfsrchash_table[i].mtx);
6872 mtx_destroy(&nfsrcahash_table[i].mtx);
6873 }
6874 mtx_destroy(&nfsrc_udpmtx);
6875 mtx_destroy(&nfs_v4root_mutex);
6876 mtx_destroy(&nfsv4root_mnt.mnt_mtx);
6877 mtx_destroy(&nfsrv_dontlistlock_mtx);
6878 mtx_destroy(&nfsrv_recalllock_mtx);
6879 for (i = 0; i < nfsrv_sessionhashsize; i++)
6880 mtx_destroy(&nfssessionhash[i].mtx);
6881 if (nfslayouthash != NULL) {
6882 for (i = 0; i < nfsrv_layouthashsize; i++)
6883 mtx_destroy(&nfslayouthash[i].mtx);
6884 free(nfslayouthash, M_NFSDSESSION);
6885 }
6886 lockdestroy(&nfsv4root_mnt.mnt_explock);
6887 free(nfsclienthash, M_NFSDCLIENT);
6888 free(nfslockhash, M_NFSDLOCKFILE);
6889 free(nfssessionhash, M_NFSDSESSION);
6890 loaded = 0;
6891 break;
6892 default:
6893 error = EOPNOTSUPP;
6894 break;
6895 }
6896
6897 out:
6898 NFSEXITCODE(error);
6899 return (error);
6900 }
6901 static moduledata_t nfsd_mod = {
6902 "nfsd",
6903 nfsd_modevent,
6904 NULL,
6905 };
6906 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
6907
6908 /* So that loader and kldload(2) can find us, wherever we are.. */
6909 MODULE_VERSION(nfsd, 1);
6910 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
6911 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
6912 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
6913 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
6914