From 9522a218f7dff95c490ff359cc60e8c2af35f5c8 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Mon, 27 Nov 2017 14:40:10 -0500 Subject: [PATCH] bpo-32107 - Better merge of #4494 (#4576) Improve UUID1 MAC address calculation and related tests. There are two bits in the MAC address that are relevant to UUID1. The first is the locally administered vs. universally administered bit (second least significant of the first octet). Physical network interfaces such as ethernet ports and wireless adapters will always be universally administered, but some interfaces --such as the interface that MacBook Pros communicate with their Touch Bars-- are locally administered. The former are guaranteed to be globally unique, while the latter are demonstrably *not* globally unique and are in fact the same on every MBP with a Touch Bar. With this bit is set, the MAC is locally administered; with it unset it is universally administered. The other bit is the multicast bit (least significant bit of the first octet). When no other MAC address can be found, RFC 4122 mandates that a random 48-bit number be generated. This randomly generated number *must* have the multicast bit set. The improvements in uuid.py include: * Preferentially return a universally administered MAC address, falling back to a locally administered address if none of the former can be found. * Improve several coding style issues, such as adding explicit returns of None, using a more readable bitmask pattern, and assuming that the ultimate fallback, random MAC generation will not fail (and propagating any exception there instead of swallowing them). Improvements in test_uuid.py include: * Always testing the calculated MAC for universal administration, unless explicitly disabled (i.e. for the random case), or implicitly disabled due to running in the Travis environment. Travis test machines have *no* universally administered MAC address at the time of this writing. --- Lib/test/test_uuid.py | 47 ++++++++----- Lib/uuid.py | 70 ++++++++++++++++--- .../2017-11-26-18-48-17.bpo-32107.h2ph2K.rst | 4 ++ 3 files changed, 93 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 083c2aa8aab..54d73f7391e 100644 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -512,60 +512,69 @@ eth0 Link encap:Ethernet HWaddr 12:34:56:78:90:ab self.assertEqual(mac, 0x1234567890ab) - def check_node(self, node, requires=None, network=False): + def check_node(self, node, requires=None, *, check_bit=True): if requires and node is None: self.skipTest('requires ' + requires) hex = '%012x' % node if support.verbose >= 2: print(hex, end=' ') - if network: - # 47 bit will never be set in IEEE 802 addresses obtained - # from network cards. - self.assertFalse(node & 0x010000000000, hex) + # The MAC address will be universally administered (i.e. the second + # least significant bit of the first octet must be unset) for any + # physical interface, such as an ethernet port or wireless adapter. + # There are some cases where this won't be the case. Randomly + # generated MACs may not be universally administered, but they must + # have their multicast bit set, though this is tested in the + # `test_random_getnode()` method specifically. Another case is the + # Travis-CI case, which apparently only has locally administered MAC + # addresses. + if check_bit and not os.getenv('TRAVIS'): + self.assertFalse(node & (1 << 41), '%012x' % node) self.assertTrue(0 < node < (1 << 48), "%s is not an RFC 4122 node ID" % hex) @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_ifconfig_getnode(self): node = self.uuid._ifconfig_getnode() - self.check_node(node, 'ifconfig', True) + self.check_node(node, 'ifconfig') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_ip_getnode(self): node = self.uuid._ip_getnode() - self.check_node(node, 'ip', True) + self.check_node(node, 'ip') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_arp_getnode(self): node = self.uuid._arp_getnode() - self.check_node(node, 'arp', True) + self.check_node(node, 'arp') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_lanscan_getnode(self): node = self.uuid._lanscan_getnode() - self.check_node(node, 'lanscan', True) + self.check_node(node, 'lanscan') @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_netstat_getnode(self): node = self.uuid._netstat_getnode() - self.check_node(node, 'netstat', True) + self.check_node(node, 'netstat') @unittest.skipUnless(os.name == 'nt', 'requires Windows') def test_ipconfig_getnode(self): node = self.uuid._ipconfig_getnode() - self.check_node(node, 'ipconfig', True) + self.check_node(node, 'ipconfig') @unittest.skipUnless(importable('win32wnet'), 'requires win32wnet') @unittest.skipUnless(importable('netbios'), 'requires netbios') def test_netbios_getnode(self): node = self.uuid._netbios_getnode() - self.check_node(node, network=True) + self.check_node(node) def test_random_getnode(self): node = self.uuid._random_getnode() - # Least significant bit of first octet must be set. - self.assertTrue(node & 0x010000000000, '%012x' % node) - self.check_node(node) + # The multicast bit, i.e. the least significant bit of first octet, + # must be set for randomly generated MAC addresses. See RFC 4122, + # $4.1.6. + self.assertTrue(node & (1 << 40), '%012x' % node) + self.check_node(node, check_bit=False) @unittest.skipUnless(os.name == 'posix', 'requires Posix') def test_unix_getnode(self): @@ -575,13 +584,17 @@ eth0 Link encap:Ethernet HWaddr 12:34:56:78:90:ab node = self.uuid._unix_getnode() except TypeError: self.skipTest('requires uuid_generate_time') - self.check_node(node, 'unix') + # Since we don't know the provenance of the MAC address, don't check + # whether it is locally or universally administered. + self.check_node(node, 'unix', check_bit=False) @unittest.skipUnless(os.name == 'nt', 'requires Windows') @unittest.skipUnless(importable('ctypes'), 'requires ctypes') def test_windll_getnode(self): node = self.uuid._windll_getnode() - self.check_node(node) + # Since we don't know the provenance of the MAC address, don't check + # whether it is locally or universally administered. + self.check_node(node, check_bit=False) class TestInternalsWithoutExtModule(BaseTestInternals, unittest.TestCase): diff --git a/Lib/uuid.py b/Lib/uuid.py index 020c6e73c86..9e7c672f7a0 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -342,11 +342,29 @@ def _popen(command, *args): env=env) return proc +# For MAC (a.k.a. IEEE 802, or EUI-48) addresses, the second least significant +# bit of the first octet signifies whether the MAC address is universally (0) +# or locally (1) administered. Network cards from hardware manufacturers will +# always be universally administered to guarantee global uniqueness of the MAC +# address, but any particular machine may have other interfaces which are +# locally administered. An example of the latter is the bridge interface to +# the Touch Bar on MacBook Pros. +# +# This bit works out to be the 42nd bit counting from 1 being the least +# significant, or 1<<41. We'll skip over any locally administered MAC +# addresses, as it makes no sense to use those in UUID calculation. +# +# See https://en.wikipedia.org/wiki/MAC_address#Universal_vs._local + +def _is_universal(mac): + return not (mac & (1 << 41)) + def _find_mac(command, args, hw_identifiers, get_index): + first_local_mac = None try: proc = _popen(command, *args.split()) if not proc: - return + return None with proc: for line in proc.stdout: words = line.lower().rstrip().split() @@ -355,8 +373,9 @@ def _find_mac(command, args, hw_identifiers, get_index): try: word = words[get_index(i)] mac = int(word.replace(b':', b''), 16) - if mac: + if _is_universal(mac): return mac + first_local_mac = first_local_mac or mac except (ValueError, IndexError): # Virtual interfaces, such as those provided by # VPNs, do not have a colon-delimited MAC address @@ -366,6 +385,7 @@ def _find_mac(command, args, hw_identifiers, get_index): pass except OSError: pass + return first_local_mac or None def _ifconfig_getnode(): """Get the hardware address on Unix by running ifconfig.""" @@ -375,6 +395,7 @@ def _ifconfig_getnode(): mac = _find_mac('ifconfig', args, keywords, lambda i: i+1) if mac: return mac + return None def _ip_getnode(): """Get the hardware address on Unix by running ip.""" @@ -382,6 +403,7 @@ def _ip_getnode(): mac = _find_mac('ip', 'link list', [b'link/ether'], lambda i: i+1) if mac: return mac + return None def _arp_getnode(): """Get the hardware address on Unix by running arp.""" @@ -404,8 +426,10 @@ def _arp_getnode(): # This works on Linux, FreeBSD and NetBSD mac = _find_mac('arp', '-an', [os.fsencode('(%s)' % ip_addr)], lambda i: i+2) + # Return None instead of 0. if mac: return mac + return None def _lanscan_getnode(): """Get the hardware address on Unix by running lanscan.""" @@ -415,32 +439,36 @@ def _lanscan_getnode(): def _netstat_getnode(): """Get the hardware address on Unix by running netstat.""" # This might work on AIX, Tru64 UNIX. + first_local_mac = None try: proc = _popen('netstat', '-ia') if not proc: - return + return None with proc: words = proc.stdout.readline().rstrip().split() try: i = words.index(b'Address') except ValueError: - return + return None for line in proc.stdout: try: words = line.rstrip().split() word = words[i] if len(word) == 17 and word.count(b':') == 5: mac = int(word.replace(b':', b''), 16) - if mac: + if _is_universal(mac): return mac + first_local_mac = first_local_mac or mac except (ValueError, IndexError): pass except OSError: pass + return first_local_mac or None def _ipconfig_getnode(): """Get the hardware address on Windows by running ipconfig.exe.""" import os, re + first_local_mac = None dirs = ['', r'c:\windows\system32', r'c:\winnt\system32'] try: import ctypes @@ -458,18 +486,23 @@ def _ipconfig_getnode(): for line in pipe: value = line.split(':')[-1].strip().lower() if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value): - return int(value.replace('-', ''), 16) + mac = int(value.replace('-', ''), 16) + if _is_universal(mac): + return mac + first_local_mac = first_local_mac or mac + return first_local_mac or None def _netbios_getnode(): """Get the hardware address on Windows using NetBIOS calls. See http://support.microsoft.com/kb/118623 for details.""" import win32wnet, netbios + first_local_mac = None ncb = netbios.NCB() ncb.Command = netbios.NCBENUM ncb.Buffer = adapters = netbios.LANA_ENUM() adapters._pack() if win32wnet.Netbios(ncb) != 0: - return + return None adapters._unpack() for i in range(adapters.length): ncb.Reset() @@ -488,7 +521,11 @@ def _netbios_getnode(): bytes = status.adapter_address[:6] if len(bytes) != 6: continue - return int.from_bytes(bytes, 'big') + mac = int.from_bytes(bytes, 'big') + if _is_universal(mac): + return mac + first_local_mac = first_local_mac or mac + return first_local_mac or None _generate_time_safe = _UuidCreate = None @@ -601,9 +638,19 @@ def _windll_getnode(): return UUID(bytes=bytes_(_buffer.raw)).node def _random_getnode(): - """Get a random node ID, with eighth bit set as suggested by RFC 4122.""" + """Get a random node ID.""" + # RFC 4122, $4.1.6 says "For systems with no IEEE address, a randomly or + # pseudo-randomly generated value may be used; see Section 4.5. The + # multicast bit must be set in such addresses, in order that they will + # never conflict with addresses obtained from network cards." + # + # The "multicast bit" of a MAC address is defined to be "the least + # significant bit of the first octet". This works out to be the 41st bit + # counting from 1 being the least significant bit, or 1<<40. + # + # See https://en.wikipedia.org/wiki/MAC_address#Unicast_vs._multicast import random - return random.getrandbits(48) | 0x010000000000 + return random.getrandbits(48) | (1 << 40) _node = None @@ -626,13 +673,14 @@ def getnode(): getters = [_unix_getnode, _ifconfig_getnode, _ip_getnode, _arp_getnode, _lanscan_getnode, _netstat_getnode] - for getter in getters + [_random_getnode]: + for getter in getters: try: _node = getter() except: continue if _node is not None: return _node + return _random_getnode() _last_timestamp = None diff --git a/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst b/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst new file mode 100644 index 00000000000..fc4f5fe9f19 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-11-26-18-48-17.bpo-32107.h2ph2K.rst @@ -0,0 +1,4 @@ +Improve the private ``*_getnode()`` methods for UUID1 such that universally +administered MAC addresses are preferred over locally administered MAC +addresses. If only the latter is available, the first such one is returned. +Improve the related tests and fix some bugs there as well.