afs: Add comments on abort handling
Add some comments on AFS abort code handling in the rotation algorithm and adjust the errors produced to match. Reported-by: Jeffrey E Altman <jaltman@auristor.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeffrey Altman <jaltman@auristor.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org
This commit is contained in:
parent
bad1a11c0f
commit
fe245c8fcd
|
@ -13,6 +13,7 @@
|
|||
#include <linux/sched/signal.h>
|
||||
#include "internal.h"
|
||||
#include "afs_fs.h"
|
||||
#include "protocol_uae.h"
|
||||
|
||||
/*
|
||||
* Begin iteration through a server list, starting with the vnode's last used
|
||||
|
@ -143,6 +144,11 @@ bool afs_select_fileserver(struct afs_operation *op)
|
|||
case -ECONNABORTED:
|
||||
/* The far side rejected the operation on some grounds. This
|
||||
* might involve the server being busy or the volume having been moved.
|
||||
*
|
||||
* Note that various V* errors should not be sent to a cache manager
|
||||
* by a fileserver as they should be translated to more modern UAE*
|
||||
* errors instead. IBM AFS and OpenAFS fileservers, however, do leak
|
||||
* these abort codes.
|
||||
*/
|
||||
switch (op->ac.abort_code) {
|
||||
case VNOVOL:
|
||||
|
@ -150,6 +156,11 @@ bool afs_select_fileserver(struct afs_operation *op)
|
|||
* - May indicate that the VL is wrong - retry once and compare
|
||||
* the results.
|
||||
* - May indicate that the fileserver couldn't attach to the vol.
|
||||
* - The volume might have been temporarily removed so that it can
|
||||
* be replaced by a volume restore. "vos" might have ended one
|
||||
* transaction and has yet to create the next.
|
||||
* - The volume might not be blessed or might not be in-service
|
||||
* (administrative action).
|
||||
*/
|
||||
if (op->flags & AFS_OPERATION_VNOVOL) {
|
||||
op->error = -EREMOTEIO;
|
||||
|
@ -183,16 +194,56 @@ bool afs_select_fileserver(struct afs_operation *op)
|
|||
_leave(" = t [vnovol]");
|
||||
return true;
|
||||
|
||||
case VSALVAGE: /* TODO: Should this return an error or iterate? */
|
||||
case VVOLEXISTS:
|
||||
case VNOSERVICE:
|
||||
case VONLINE:
|
||||
case VDISKFULL:
|
||||
case VOVERQUOTA:
|
||||
op->error = afs_abort_to_error(op->ac.abort_code);
|
||||
/* These should not be returned from the fileserver. */
|
||||
pr_warn("Fileserver returned unexpected abort %d\n",
|
||||
op->ac.abort_code);
|
||||
op->error = -EREMOTEIO;
|
||||
goto next_server;
|
||||
|
||||
case VNOSERVICE:
|
||||
/* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
|
||||
* if the volume was neither in-service nor administratively
|
||||
* blessed. All usage was replaced by VNOVOL because AFS 3.1 and
|
||||
* earlier cache managers did not handle VNOSERVICE and assumed
|
||||
* it was the client OSes errno 105.
|
||||
*
|
||||
* Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
|
||||
* fileserver idle dead time error which was sent in place of
|
||||
* RX_CALL_TIMEOUT (-3). The error was intended to be sent if the
|
||||
* fileserver took too long to send a reply to the client.
|
||||
* RX_CALL_TIMEOUT would have caused the cache manager to mark the
|
||||
* server down whereas VNOSERVICE since AFS 3.2 would cause cache
|
||||
* manager to temporarily (up to 15 minutes) mark the volume
|
||||
* instance as unusable.
|
||||
*
|
||||
* The idle dead logic resulted in cache inconsistency since a
|
||||
* state changing call that the cache manager assumed was dead
|
||||
* could still be processed to completion by the fileserver. This
|
||||
* logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
|
||||
* returned. However, many 1.4.8 through 1.6.24 fileservers are
|
||||
* still in existence.
|
||||
*
|
||||
* AuriStorFS fileservers have never returned VNOSERVICE.
|
||||
*
|
||||
* VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
|
||||
*/
|
||||
case RX_CALL_TIMEOUT:
|
||||
op->error = -ETIMEDOUT;
|
||||
goto next_server;
|
||||
|
||||
case VSALVAGING: /* This error should not be leaked to cache managers
|
||||
* but is from OpenAFS demand attach fileservers.
|
||||
* It should be treated as an alias for VOFFLINE.
|
||||
*/
|
||||
case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
|
||||
case VOFFLINE:
|
||||
/* The volume is in use by the volserver or another volume utility
|
||||
* for an operation that might alter the contents. The volume is
|
||||
* expected to come back but it might take a long time (could be
|
||||
* days).
|
||||
*/
|
||||
if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
|
||||
afs_busy(op->volume, op->ac.abort_code);
|
||||
clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
|
||||
|
@ -207,11 +258,20 @@ bool afs_select_fileserver(struct afs_operation *op)
|
|||
}
|
||||
goto busy;
|
||||
|
||||
case VSALVAGING:
|
||||
case VRESTARTING:
|
||||
case VRESTARTING: /* The fileserver is either shutting down or starting up. */
|
||||
case VBUSY:
|
||||
/* Retry after going round all the servers unless we
|
||||
* have a file lock we need to maintain.
|
||||
/* The volume is in use by the volserver or another volume
|
||||
* utility for an operation that is not expected to alter the
|
||||
* contents of the volume. VBUSY does not need to be returned
|
||||
* for a ROVOL or BACKVOL bound to an ITBusy volserver
|
||||
* transaction. The fileserver is permitted to continue serving
|
||||
* content from ROVOLs and BACKVOLs during an ITBusy transaction
|
||||
* because the content will not change. However, many fileserver
|
||||
* releases do return VBUSY for ROVOL and BACKVOL instances under
|
||||
* many circumstances.
|
||||
*
|
||||
* Retry after going round all the servers unless we have a file
|
||||
* lock we need to maintain.
|
||||
*/
|
||||
if (op->flags & AFS_OPERATION_NO_VSLEEP) {
|
||||
op->error = -EBUSY;
|
||||
|
@ -270,10 +330,29 @@ bool afs_select_fileserver(struct afs_operation *op)
|
|||
|
||||
goto restart_from_beginning;
|
||||
|
||||
case VDISKFULL:
|
||||
case UAENOSPC:
|
||||
/* The partition is full. Only applies to RWVOLs.
|
||||
* Translate locally and return ENOSPC.
|
||||
* No replicas to failover to.
|
||||
*/
|
||||
op->error = -ENOSPC;
|
||||
goto failed_but_online;
|
||||
|
||||
case VOVERQUOTA:
|
||||
case UAEDQUOT:
|
||||
/* Volume is full. Only applies to RWVOLs.
|
||||
* Translate locally and return EDQUOT.
|
||||
* No replicas to failover to.
|
||||
*/
|
||||
op->error = -EDQUOT;
|
||||
goto failed_but_online;
|
||||
|
||||
default:
|
||||
op->error = afs_abort_to_error(op->ac.abort_code);
|
||||
failed_but_online:
|
||||
clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
|
||||
clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
|
||||
op->error = afs_abort_to_error(op->ac.abort_code);
|
||||
goto failed;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue