posix server: changed the method of checking and setting the server file lock (#21243)

* Changed the method of checking and setting the server file lock on Posix to avoid conditions where the server can indicate that it is running but still hasn't finished it's initialization
This commit is contained in:
Eric Katzfey 2023-03-06 06:55:57 -08:00 committed by GitHub
parent 5cade89499
commit 21c7f8ad74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 83 additions and 32 deletions

View File

@ -105,7 +105,8 @@ static int create_dirs();
static int run_startup_script(const std::string &commands_file, const std::string &absolute_binary_path, int instance); static int run_startup_script(const std::string &commands_file, const std::string &absolute_binary_path, int instance);
static std::string get_absolute_binary_path(const std::string &argv0); static std::string get_absolute_binary_path(const std::string &argv0);
static void wait_to_exit(); static void wait_to_exit();
static bool is_server_running(int instance, bool server); static int get_server_running(int instance, bool *is_running);
static int set_server_running(int instance);
static void print_usage(); static void print_usage();
static bool dir_exists(const std::string &path); static bool dir_exists(const std::string &path);
static bool file_exists(const std::string &name); static bool file_exists(const std::string &name);
@ -124,6 +125,7 @@ int main(int argc, char **argv)
{ {
bool is_client = false; bool is_client = false;
bool pxh_off = false; bool pxh_off = false;
bool server_is_running = false;
/* Symlinks point to all commands that can be used as a client with a prefix. */ /* Symlinks point to all commands that can be used as a client with a prefix. */
const char prefix[] = PX4_SHELL_COMMAND_PREFIX; const char prefix[] = PX4_SHELL_COMMAND_PREFIX;
@ -131,6 +133,9 @@ int main(int argc, char **argv)
std::string absolute_binary_path; // full path to the px4 binary being executed std::string absolute_binary_path; // full path to the px4 binary being executed
int ret = PX4_OK;
int instance = 0;
if (argc > 0) { if (argc > 0) {
/* The executed binary name could start with a path, so strip it away */ /* The executed binary name could start with a path, so strip it away */
const std::string full_binary_name = argv[0]; const std::string full_binary_name = argv[0];
@ -146,8 +151,6 @@ int main(int argc, char **argv)
} }
if (is_client) { if (is_client) {
int instance = 0;
if (argc >= 3 && strcmp(argv[1], "--instance") == 0) { if (argc >= 3 && strcmp(argv[1], "--instance") == 0) {
instance = strtoul(argv[2], nullptr, 10); instance = strtoul(argv[2], nullptr, 10);
/* update argv so that "--instance <instance>" is not visible anymore */ /* update argv so that "--instance <instance>" is not visible anymore */
@ -160,15 +163,16 @@ int main(int argc, char **argv)
PX4_DEBUG("instance: %i", instance); PX4_DEBUG("instance: %i", instance);
if (!is_server_running(instance, false)) { ret = get_server_running(instance, &server_is_running);
if (errno) {
PX4_ERR("Failed to communicate with daemon: %s", strerror(errno));
} else { if (ret != PX4_OK) {
PX4_ERR("PX4 daemon not running yet"); PX4_ERR("PX4 client failed to get server status");
} return ret;
}
return -1; if (!server_is_running) {
PX4_ERR("PX4 server not running");
return PX4_ERROR;
} }
/* Remove the path and prefix. */ /* Remove the path and prefix. */
@ -202,7 +206,6 @@ int main(int argc, char **argv)
bool working_directory_default = false; bool working_directory_default = false;
int instance = 0;
bool instance_provided = false; bool instance_provided = false;
int myoptind = 1; int myoptind = 1;
@ -292,20 +295,27 @@ int main(int argc, char **argv)
PX4_INFO("working directory %s", working_directory.c_str()); PX4_INFO("working directory %s", working_directory.c_str());
} }
int ret = change_directory(working_directory); ret = change_directory(working_directory);
if (ret != PX4_OK) { if (ret != PX4_OK) {
return ret; return ret;
} }
} }
if (is_server_running(instance, true)) { ret = get_server_running(instance, &server_is_running);
// allow running multiple instances, but the server is only started for the first
PX4_INFO("PX4 daemon already running for instance %i (%s)", instance, strerror(errno)); if (ret != PX4_OK) {
return -1; PX4_ERR("Failed to get server status");
return ret;
} }
int ret = create_symlinks_if_needed(data_path); if (server_is_running) {
// allow running multiple instances, but the server is only started for the first
PX4_INFO("PX4 server already running for instance %i", instance);
return PX4_ERROR;
}
ret = create_symlinks_if_needed(data_path);
if (ret != PX4_OK) { if (ret != PX4_OK) {
return ret; return ret;
@ -343,6 +353,13 @@ int main(int argc, char **argv)
px4::init_once(); px4::init_once();
px4::init(argc, argv, "px4"); px4::init(argc, argv, "px4");
// Don't set this up until PX4 is up and running
ret = set_server_running(instance);
if (ret != PX4_OK) {
return ret;
}
ret = run_startup_script(commands_file, absolute_binary_path, instance); ret = run_startup_script(commands_file, absolute_binary_path, instance);
if (ret == 0) { if (ret == 0) {
@ -618,39 +635,73 @@ void print_usage()
printf(" e.g.: px4-commander status\n"); printf(" e.g.: px4-commander status\n");
} }
bool is_server_running(int instance, bool server) int get_server_running(int instance, bool *is_server_running)
{ {
const std::string file_lock_path = std::string(LOCK_FILE_PATH) + '-' + std::to_string(instance); const std::string file_lock_path = std::string(LOCK_FILE_PATH) + '-' + std::to_string(instance);
int fd = open(file_lock_path.c_str(), O_RDWR | O_CREAT, 0666); int fd = open(file_lock_path.c_str(), O_RDWR | O_CREAT, 0666);
if (fd < 0) { if (fd < 0) {
PX4_ERR("is_server_running: failed to create lock file: %s, reason=%s", file_lock_path.c_str(), strerror(errno)); PX4_ERR("%s: failed to create lock file: %s, reason=%s", __func__, file_lock_path.c_str(), strerror(errno));
return false; return PX4_ERROR;
} }
bool result = false; int status = PX4_OK;
struct flock lock;
memset(&lock, 0, sizeof(struct flock));
// Server is running if the file is already locked. // Exclusive write lock, cover the entire file (regardless of size)
if (flock(fd, LOCK_EX | LOCK_NB) < 0) { lock.l_type = F_WRLCK;
if (errno == EWOULDBLOCK) { lock.l_whence = SEEK_SET;
// a server is running!
result = true; if (fcntl(fd, F_GETLK, &lock) < 0) {
PX4_ERR("%s: failed to get check for lock on file: %s, reason=%s", __func__, file_lock_path.c_str(), strerror(errno));
status = PX4_ERROR;
} else {
// F_GETLK will set l_type to F_UNLCK if no one had a lock on the file. Otherwise,
// it means that the server is running and has a lock on the file
if (lock.l_type != F_UNLCK) {
*is_server_running = true;
} else { } else {
PX4_ERR("is_server_running: failed to get lock on file: %s, reason=%s", file_lock_path.c_str(), strerror(errno)); *is_server_running = false;
result = false;
} }
} }
if (result || !server) { close(fd);
return status;
}
int set_server_running(int instance)
{
const std::string file_lock_path = std::string(LOCK_FILE_PATH) + '-' + std::to_string(instance);
int fd = open(file_lock_path.c_str(), O_RDWR | O_CREAT, 0666);
if (fd < 0) {
PX4_ERR("%s: failed to create lock file: %s, reason=%s", __func__, file_lock_path.c_str(), strerror(errno));
return PX4_ERROR;
}
int status = PX4_OK;
struct flock lock;
memset(&lock, 0, sizeof(struct flock));
// Exclusive lock, cover the entire file (regardless of size).
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
if (fcntl(fd, F_SETLK, &lock) < 0) {
PX4_ERR("%s: failed to set lock on file: %s, reason=%s", __func__, file_lock_path.c_str(), strerror(errno));
status = PX4_ERROR;
close(fd); close(fd);
} }
// note: server leaks the file handle once, on purpose, in order to keep the lock on the file until the process terminates. // note: server leaks the file handle, on purpose, in order to keep the lock on the file until the process terminates.
// In this case we return false so the server code path continues now that we have the lock. // In this case we return false so the server code path continues now that we have the lock.
errno = 0; return status;
return result;
} }
bool file_exists(const std::string &name) bool file_exists(const std::string &name)