Don't cache the socket-IO while connection setup

The file_no of the socket IO can change while connecting.
This can happen when alternative hosts are tried,
while GSS authentication
and when falling back to unencrypted in sslmode:prefer .
Therefore expire the socket IO at each connect_poll and reset_poll call.

Caching the IO previosly led to occasional errors kind of:
  Errno::EBADF: Bad file descriptor

With the recreation of an IO object per connect_poll the fileno can change in the TcpGateScheduler when running on Windows.
I didn't dig deeper why this happens, but it fails in spec
  "with a Fiber scheduler connects several times concurrently"
and sometimes in other specs.
This commit is contained in:
Lars Kanis 2022-03-09 15:18:02 +01:00
parent f270b714c6
commit c76b123bf2
5 changed files with 35 additions and 19 deletions

View File

@ -475,9 +475,7 @@ pgconn_connect_poll(VALUE self)
PostgresPollingStatusType status;
status = gvl_PQconnectPoll(pg_get_pgconn(self));
if ( status == PGRES_POLLING_FAILED ) {
pgconn_close_socket_io(self);
}
pgconn_close_socket_io(self);
return INT2FIX((int)status);
}
@ -556,9 +554,7 @@ pgconn_reset_poll(VALUE self)
PostgresPollingStatusType status;
status = gvl_PQresetPoll(pg_get_pgconn(self));
if ( status == PGRES_POLLING_FAILED ) {
pgconn_close_socket_io(self);
}
pgconn_close_socket_io(self);
return INT2FIX((int)status);
}

View File

@ -612,9 +612,6 @@ class PG::Connection
alias async_cancel cancel
private def async_connect_or_reset(poll_meth)
# Now grab a reference to the underlying socket so we know when the connection is established
socket = socket_io
# Track the progress of the connection, waiting for the socket to become readable/writable before polling it
poll_status = PG::PGRES_POLLING_WRITING
until poll_status == PG::PGRES_POLLING_OK ||
@ -623,11 +620,11 @@ class PG::Connection
# If the socket needs to read, wait 'til it becomes readable to poll again
case poll_status
when PG::PGRES_POLLING_READING
socket.wait_readable
socket_io.wait_readable
# ...and the same for when the socket needs to write
when PG::PGRES_POLLING_WRITING
socket.wait_writable
socket_io.wait_writable
end
# Check to see if it's finished or failed yet

View File

@ -27,10 +27,6 @@ conn = PG::Connection.connect_start( :dbname => 'test' ) or
abort "Connection failed: %s" % [ conn.error_message ] if
conn.status == PG::CONNECTION_BAD
# Now grab a reference to the underlying socket so we know when the
# connection is established
socket = conn.socket_io
# Track the progress of the connection, waiting for the socket to become readable/writable
# before polling it
poll_status = PG::PGRES_POLLING_WRITING
@ -41,13 +37,13 @@ until poll_status == PG::PGRES_POLLING_OK ||
case poll_status
when PG::PGRES_POLLING_READING
output_progress " waiting for socket to become readable"
select( [socket], nil, nil, TIMEOUT ) or
select( [conn.socket_io], nil, nil, TIMEOUT ) or
raise "Asynchronous connection timed out!"
# ...and the same for when the socket needs to write
when PG::PGRES_POLLING_WRITING
output_progress " waiting for socket to become writable"
select( nil, [socket], nil, TIMEOUT ) or
select( nil, [conn.socket_io], nil, TIMEOUT ) or
raise "Asynchronous connection timed out!"
end
@ -85,7 +81,7 @@ loop do
# Buffer any incoming data on the socket until a full result is ready.
conn.consume_input
while conn.is_busy
select( [socket], nil, nil, TIMEOUT ) or
select( [conn.socket_io], nil, nil, TIMEOUT ) or
raise "Timeout waiting for query response."
conn.consume_input
end

View File

@ -277,7 +277,7 @@ class TcpGateScheduler < Scheduler
# compare and store the fileno for debugging
if conn.observed_fd && conn.observed_fd != io.fileno
raise "observed fd changed: old:#{conn.observed_fd} new:#{io.fileno}"
puts "observed fd changed: old:#{conn.observed_fd} new:#{io.fileno}"
end
conn.observed_fd = io.fileno

View File

@ -378,6 +378,33 @@ describe PG::Connection do
res = @conn2.query("SELECT 4")
end
it "can work with changing IO while connection setup" do
# The file_no of the socket IO can change while connecting.
# This can happen when alternative hosts are tried,
# while GSS authentication
# and when falling back to unencrypted in sslmode:prefer
# Consume some file descriptors and free them while the connection is established.
pipes = 100.times.map{ IO.pipe }
Thread.new do
pipes.reverse_each do |ios|
ios.each(&:close)
sleep 0.01
end
end
# Connect with SSL, but use a wrong client cert, so that connection is aborted.
# A second connection is then started with a new IO.
# And since the pipes above were freed in the concurrent thread above, there is a high chance that it's a lower file descriptor than before.
conn = PG.connect( @conninfo + " sslcert=tmp_test_specs/data/ruby-pg-ca-cert" )
# The new connection should work even when the file descriptor has changed.
res = conn.exec("SELECT 1")
expect( res.values ).to eq([["1"]])
conn.close
end
it "can use conn.reset_start to restart the connection" do
ios = IO.pipe
conn = described_class.connect_start( @conninfo )