forked from Archive/PX4-Autopilot
uorb: fix several race conditions during topic initialization
Possible race conditions (they all happen between the check of existence of a topic and trying to create the node): - single instance, with multiple advertisers during the first advertise: both advertisers see the topic as non-existent and try to advertise it. One of them will fail, leading to an error message. This is the cause for telemetry_status advert failure seen in SITL in rare cases. - multi-instance: subscription to non-existing instance -> px4_open fails, and the subscriber tries to create the node. If during that time a publisher publishes that instance, the subscriber will get (instance+1) (or fails if the max number of instances is exceeded). This is a race that goes pretty much unnoticed. - multi-instance: 2 publishers can get the same instance (if is_published() is false in case both have not published data yet). This can also go unnoticed. Therefore the patch changes where _advertised is set: it is now set directly during the advertisement instead of during publication.
This commit is contained in:
parent
63b2befeed
commit
6c8048d057
|
@ -55,7 +55,7 @@ uORB::DeviceMaster::~DeviceMaster()
|
|||
}
|
||||
|
||||
int
|
||||
uORB::DeviceMaster::advertise(const struct orb_metadata *meta, int *instance, int priority)
|
||||
uORB::DeviceMaster::advertise(const struct orb_metadata *meta, bool is_advertiser, int *instance, int priority)
|
||||
{
|
||||
int ret = PX4_ERROR;
|
||||
|
||||
|
@ -119,22 +119,45 @@ uORB::DeviceMaster::advertise(const struct orb_metadata *meta, int *instance, in
|
|||
delete node;
|
||||
|
||||
if (ret == -EEXIST) {
|
||||
/* if the node exists already, get the existing one and check if
|
||||
* something has been published yet. */
|
||||
/* if the node exists already, get the existing one and check if it's advertised. */
|
||||
uORB::DeviceNode *existing_node = getDeviceNodeLocked(meta, group_tries);
|
||||
|
||||
if (existing_node != nullptr && !existing_node->is_advertised()) {
|
||||
/* nothing has been published yet, lets claim it */
|
||||
existing_node->set_priority(priority);
|
||||
/*
|
||||
* We can claim an existing node in these cases:
|
||||
* - The node is not advertised (yet). It means there is already one or more subscribers or it was
|
||||
* unadvertised.
|
||||
* - We are a single-instance advertiser requesting the first instance.
|
||||
* (Usually we don't end up here, but we might in case of a race condition between 2
|
||||
* advertisers).
|
||||
* - We are a subscriber requesting a certain instance.
|
||||
* (Also we usually don't end up in that case, but we might in case of a race condtion
|
||||
* between an advertiser and subscriber).
|
||||
*/
|
||||
bool is_single_instance_advertiser = is_advertiser && !instance;
|
||||
|
||||
if (existing_node != nullptr &&
|
||||
(!existing_node->is_advertised() || is_single_instance_advertiser || !is_advertiser)) {
|
||||
if (is_advertiser) {
|
||||
existing_node->set_priority(priority);
|
||||
/* Set as advertised to avoid race conditions (otherwise 2 multi-instance advertisers
|
||||
* could get the same instance).
|
||||
*/
|
||||
existing_node->mark_as_advertised();
|
||||
}
|
||||
|
||||
ret = PX4_OK;
|
||||
|
||||
} else {
|
||||
/* otherwise: data has already been published, keep looking */
|
||||
/* otherwise: already advertised, keep looking */
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// add to the node map;.
|
||||
if (is_advertiser) {
|
||||
node->mark_as_advertised();
|
||||
}
|
||||
|
||||
// add to the node map.
|
||||
_node_list.add(node);
|
||||
}
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ class uORB::DeviceMaster
|
|||
{
|
||||
public:
|
||||
|
||||
int advertise(const struct orb_metadata *meta, int *instance, int priority);
|
||||
int advertise(const struct orb_metadata *meta, bool is_advertiser, int *instance, int priority);
|
||||
|
||||
/**
|
||||
* Public interface for getDeviceNodeLocked(). Takes care of synchronization.
|
||||
|
|
|
@ -90,6 +90,7 @@ uORB::DeviceNode::open(cdev::file_t *filp)
|
|||
ret = -EBUSY;
|
||||
}
|
||||
|
||||
mark_as_advertised();
|
||||
unlock();
|
||||
|
||||
/* now complete the open */
|
||||
|
@ -306,7 +307,6 @@ uORB::DeviceNode::write(cdev::file_t *filp, const char *buffer, size_t buflen)
|
|||
/* update the timestamp and generation count */
|
||||
_last_update = hrt_absolute_time();
|
||||
|
||||
_advertised = true;
|
||||
|
||||
// callbacks
|
||||
for (auto item : _callbacks) {
|
||||
|
|
|
@ -164,6 +164,8 @@ public:
|
|||
* and publish to this node or if another node should be tried. */
|
||||
bool is_advertised() const { return _advertised; }
|
||||
|
||||
void mark_as_advertised() { _advertised = true; }
|
||||
|
||||
/**
|
||||
* Try to change the size of the queue. This can only be done as long as nobody published yet.
|
||||
* This is the case, for example when orb_subscribe was called before an orb_advertise.
|
||||
|
|
|
@ -327,14 +327,12 @@ int uORB::Manager::orb_get_interval(int handle, unsigned *interval)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int uORB::Manager::node_advertise(const struct orb_metadata *meta, int *instance, int priority)
|
||||
int uORB::Manager::node_advertise(const struct orb_metadata *meta, bool is_advertiser, int *instance, int priority)
|
||||
{
|
||||
int ret = PX4_ERROR;
|
||||
|
||||
/* fill advertiser data */
|
||||
|
||||
if (get_device_master()) {
|
||||
ret = _device_master->advertise(meta, instance, priority);
|
||||
ret = _device_master->advertise(meta, is_advertiser, instance, priority);
|
||||
}
|
||||
|
||||
/* it's PX4_OK if it already exists */
|
||||
|
@ -384,7 +382,7 @@ int uORB::Manager::node_open(const struct orb_metadata *meta, bool advertiser, i
|
|||
if (fd < 0) {
|
||||
|
||||
/* try to create the node */
|
||||
ret = node_advertise(meta, instance, priority);
|
||||
ret = node_advertise(meta, advertiser, instance, priority);
|
||||
|
||||
if (ret == PX4_OK) {
|
||||
/* update the path, as it might have been updated during the node_advertise call */
|
||||
|
@ -396,7 +394,7 @@ int uORB::Manager::node_open(const struct orb_metadata *meta, bool advertiser, i
|
|||
}
|
||||
}
|
||||
|
||||
/* on success, try the open again */
|
||||
/* on success, try to open again */
|
||||
if (ret == PX4_OK) {
|
||||
fd = px4_open(path, (advertiser) ? PX4_F_WRONLY : PX4_F_RDONLY);
|
||||
}
|
||||
|
|
|
@ -392,11 +392,9 @@ private: // class methods
|
|||
/**
|
||||
* Advertise a node; don't consider it an error if the node has
|
||||
* already been advertised.
|
||||
*
|
||||
* @todo verify that the existing node is the same as the one
|
||||
* we tried to advertise.
|
||||
*/
|
||||
int node_advertise(const struct orb_metadata *meta, int *instance = nullptr, int priority = ORB_PRIO_DEFAULT);
|
||||
int node_advertise(const struct orb_metadata *meta, bool is_advertiser, int *instance = nullptr,
|
||||
int priority = ORB_PRIO_DEFAULT);
|
||||
|
||||
/**
|
||||
* Common implementation for orb_advertise and orb_subscribe.
|
||||
|
|
Loading…
Reference in New Issue