ceph: reset osd connections after fault

A single osd connection fault (e.g. tcp disconnect) wasn't
reopening the connection, which causes all current and future
requests for that osd to hang.

Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
Sage Weil 2010-02-15 12:11:51 -08:00
parent 6c5d1a49e5
commit 153a008bf7

View File

@ -369,7 +369,6 @@ static void osd_reset(struct ceph_connection *con)
return; return;
dout("osd_reset osd%d\n", osd->o_osd); dout("osd_reset osd%d\n", osd->o_osd);
osdc = osd->o_osdc; osdc = osd->o_osdc;
osd->o_incarnation++;
down_read(&osdc->map_sem); down_read(&osdc->map_sem);
kick_requests(osdc, osd); kick_requests(osdc, osd);
up_read(&osdc->map_sem); up_read(&osdc->map_sem);
@ -921,7 +920,9 @@ static void kick_requests(struct ceph_osd_client *osdc,
dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
mutex_lock(&osdc->request_mutex); mutex_lock(&osdc->request_mutex);
if (!kickosd) { if (kickosd) {
__reset_osd(osdc, kickosd);
} else {
for (p = rb_first(&osdc->osds); p; p = n) { for (p = rb_first(&osdc->osds); p; p = n) {
struct ceph_osd *osd = struct ceph_osd *osd =
rb_entry(p, struct ceph_osd, o_node); rb_entry(p, struct ceph_osd, o_node);