elf: Check that --list-diagnostics output has the expected syntax

Parts of elf/tst-rtld-list-diagnostics.py have been copied from scripts/tst-ld-trace.py. The abnf module is entirely optional and used to verify the ABNF grammar as included in the manual. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
2025-03-06 20:58:33 +01:00 · 2023-08-25 14:15:29 +02:00 · 2023-08-25 14:15:29 +02:00 · 3d9265467e
commit 3d9265467e
parent f21962ddfc
4 changed files with 327 additions and 0 deletions
--- a/7
+++ b/7
@ -585,6 +585,13 @@ build the GNU C Library:
     in your system.  As of release time PExpect 4.8.0 is the newest
     verified to work to test the pretty printers.
   • The Python ‘abnf’ module.
     This module is optional and used to verify some ABNF grammars in
     the manual.  Version 2.2.0 has been confirmed to work as expected.
     A missing ‘abnf’ module does not reduce the test coverage of the
     library itself.
   • GDB 7.8 or later with support for Python 2.7/3.4 or later
     GDB itself needs to be configured with Python support in order to
--- a/elf/Makefile
+++ b/elf/Makefile
@ -1123,6 +1123,7 @@ tests-special += \
  $(objpfx)argv0test.out \
  $(objpfx)tst-pathopt.out \
  $(objpfx)tst-rtld-help.out \
  $(objpfx)tst-rtld-list-diagnostics.out \
  $(objpfx)tst-rtld-load-self.out \
  $(objpfx)tst-rtld-preload.out \
  $(objpfx)tst-sprof-basic.out \
@ -2799,6 +2800,14 @@ $(objpfx)tst-ro-dynamic-mod.so: $(objpfx)tst-ro-dynamic-mod.os \
 		-Wl,--script=tst-ro-dynamic-mod.map \
 		$(objpfx)tst-ro-dynamic-mod.os
 $(objpfx)tst-rtld-list-diagnostics.out: tst-rtld-list-diagnostics.py \
  $(..)manual/dynlink.texi $(objpfx)$(rtld-installed-name)
 	$(PYTHON) tst-rtld-list-diagnostics.py \
 	  --manual=$(..)manual/dynlink.texi \
 	  "$(test-wrapper-env) $(objpfx)$(rtld-installed-name) --list-diagnostics" \
 	  > $@; \
 	$(evaluate-test)
 $(objpfx)tst-rtld-run-static.out: $(objpfx)/ldconfig
 $(objpfx)tst-dl_find_object.out: \
--- a/elf/tst-rtld-list-diagnostics.py
+++ b/elf/tst-rtld-list-diagnostics.py
@ -0,0 +1,303 @@
 #!/usr/bin/python3
 # Test that the ld.so --list-diagnostics output has the expected syntax.
 # Copyright (C) 2022-2023 Free Software Foundation, Inc.
 # Copyright The GNU Toolchain Authors.
 # This file is part of the GNU C Library.
 #
 # The GNU C Library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
 #
 # The GNU C Library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public
 # License along with the GNU C Library; if not, see
 # <https://www.gnu.org/licenses/>.
 import argparse
 import collections
 import subprocess
 import sys
 try:
    subprocess.run
 except:
    class _CompletedProcess:
        def __init__(self, args, returncode, stdout=None, stderr=None):
            self.args = args
            self.returncode = returncode
            self.stdout = stdout
            self.stderr = stderr
    def _run(*popenargs, input=None, timeout=None, check=False, **kwargs):
        assert(timeout is None)
        with subprocess.Popen(*popenargs, **kwargs) as process:
            try:
                stdout, stderr = process.communicate(input)
            except:
                process.kill()
                process.wait()
                raise
            returncode = process.poll()
            if check and returncode:
                raise subprocess.CalledProcessError(returncode, popenargs)
        return _CompletedProcess(popenargs, returncode, stdout, stderr)
    subprocess.run = _run
 # Number of errors encountered.  Zero means no errors (test passes).
 errors = 0
 def parse_line(line):
    """Parse a line of --list-diagnostics output.
    This function returns a pair (SUBSCRIPTS, VALUE).  VALUE is either
    a byte string or an integer.  SUBSCRIPT is a tuple of (LABEL,
    INDEX) pairs, where LABEL is a field identifier (a string), and
    INDEX is an integer or None, to indicate that this field is not
    indexed.
    """
    # Extract the list of subscripts before the value.
    idx = 0
    subscripts = []
    while line[idx] != '=':
        start_idx = idx
        # Extract the label.
        while line[idx] not in '[.=':
            idx += 1
        label = line[start_idx:idx]
        if line[idx] == '[':
            # Subscript with a 0x index.
            assert label
            close_bracket = line.index(']', idx)
            index = line[idx + 1:close_bracket]
            assert index.startswith('0x')
            index = int(index, 0)
            subscripts.append((label, index))
            idx = close_bracket + 1
        else: # '.' or '='.
            if label:
                subscripts.append((label, None))
            if line[idx] == '.':
                idx += 1
    # The value is either a string or a 0x number.
    value = line[idx + 1:]
    if value[0] == '"':
        # Decode the escaped string into a byte string.
        assert value[-1] == '"'
        idx = 1
        result = []
        while True:
            ch = value[idx]
            if ch == '\\':
                if value[idx + 1] in '"\\':
                    result.append(ord(value[idx + 1]))
                    idx += 2
                else:
                    result.append(int(value[idx + 1:idx + 4], 8))
                    idx += 4
            elif ch == '"':
                assert idx == len(value) - 1
                break
            else:
                result.append(ord(value[idx]))
                idx += 1
        value = bytes(result)
    else:
        # Convert the value into an integer.
        assert value.startswith('0x')
        value = int(value, 0)
    return (tuple(subscripts), value)
 assert parse_line('a.b[0x1]=0x2') == ((('a', None), ('b', 1)), 2)
 assert parse_line(r'b[0x3]="four\040\"\\"') == ((('b', 3),), b'four \"\\')
 # ABNF for a line of --list-diagnostics output.
 diagnostics_abnf = r"""
 HEXDIG = %x30-39 / %x61-6f ; lowercase a-f only
 ALPHA = %x41-5a / %x61-7a / %x7f ; letters and underscore
 ALPHA-NUMERIC = ALPHA / %x30-39 / "_"
 DQUOTE = %x22 ; "
 ; Numbers are always hexadecimal and use a 0x prefix.
 hex-value-prefix = %x30 %x78
 hex-value = hex-value-prefix 1*HEXDIG
 ; Strings use octal escape sequences and \\, \".
 string-char = %x20-21 / %x23-5c / %x5d-7e ; printable but not "\
 string-quoted-octal = %x30-33 2*2%x30-37
 string-quoted = "\" ("\" / DQUOTE / string-quoted-octal)
 string-value = DQUOTE *(string-char / string-quoted) DQUOTE
 value = hex-value / string-value
 label = ALPHA *ALPHA-NUMERIC
 index = "[" hex-value "]"
 subscript = label [index]
 line = subscript *("." subscript) "=" value
 """
 def check_consistency_with_manual(manual_path):
    """Verify that the code fragments in the manual match this script.
    The code fragments are duplicated to clarify the dual license.
    """
    global errors
    def extract_lines(path, start_line, end_line, skip_lines=()):
        result = []
        with open(path) as inp:
            capturing = False
            for line in inp:
                if line.strip() == start_line:
                    capturing = True
                elif not capturing or line.strip() in skip_lines:
                    continue
                elif line.strip() == end_line:
                    capturing = False
                else:
                    result.append(line)
        if not result:
            raise ValueError('{!r} not found in {!r}'.format(start_line, path))
        if capturing:
            raise ValueError('{!r} not found in {!r}'.format(end_line, path))
        return result
    def check(name, manual, script):
        global errors
        if manual == script:
            return
        print('error: {} fragment in manual is different'.format(name))
        import difflib
        sys.stdout.writelines(difflib.unified_diff(
            manual, script, fromfile='manual', tofile='script'))
        errors += 1
    manual_abnf = extract_lines(manual_path,
                                '@c ABNF-START', '@end smallexample',
                                skip_lines=('@smallexample',))
    check('ABNF', diagnostics_abnf.splitlines(keepends=True)[1:], manual_abnf)
 # If the abnf module can be imported, run an additional check that the
 # 'line' production from the ABNF grammar matches --list-diagnostics
 # output lines.
 try:
    import abnf
 except ImportError:
    abnf = None
    print('info: skipping ABNF validation because the abnf module is missing')
 if abnf is not None:
    class Grammar(abnf.Rule):
        pass
    Grammar.load_grammar(diagnostics_abnf)
    def parse_abnf(line):
        global errors
        # Just verify that the line parses.
        try:
            Grammar('line').parse_all(line)
        except abnf.ParseError:
            print('error: ABNF parse error:', repr(line))
            errors += 1
 else:
    def parse_abnf(line):
        pass
 def parse_diagnostics(cmd):
    global errors
    diag_out = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
                              universal_newlines=True).stdout
    if diag_out[-1] != '\n':
        print('error: ld.so output does not end in newline')
        errors += 1
    PathType = collections.namedtuple('PathType',
                                      'has_index value_type original_line')
    # Mapping tuples of labels to PathType values.
    path_types = {}
    seen_subscripts = {}
    for line in diag_out.splitlines():
        parse_abnf(line)
        subscripts, value = parse_line(line)
        # Check for duplicates.
        if subscripts in seen_subscripts:
            print('error: duplicate value assignment:', repr(line))
            print('  previous line:,', repr(seen_subscripts[line]))
            errors += 1
        else:
            seen_subscripts[subscripts] = line
        # Compare types against the previously seen labels.
        labels = tuple([label for label, index in subscripts])
        has_index = tuple([index is not None for label, index in subscripts])
        value_type = type(value)
        if labels in path_types:
            previous_type = path_types[labels]
            if has_index != previous_type.has_index:
                print('error: line has mismatch of indexing:', repr(line))
                print('  index types:', has_index)
                print('  previous:   ', previous_type.has_index)
                print('  previous line:', repr(previous_type.original_line))
                errors += 1
            if value_type != previous_type.value_type:
                print('error: line has mismatch of value type:', repr(line))
                print('  value type:', value_type.__name__)
                print('  previous:  ', previous_type.value_type.__name__)
                print('  previous line:', repr(previous_type.original_line))
                errors += 1
        else:
            path_types[labels] = PathType(has_index, value_type, line)
        # Check that this line does not add indexing to a previous value.
        for idx in range(1, len(subscripts) - 1):
            if subscripts[:idx] in path_types:
                print('error: line assigns to atomic value:', repr(line))
                print('  previous line:', repr(previous_type.original_line))
                errors += 1
    if errors:
        sys.exit(1)
 def get_parser():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('--manual',
                        help='path to .texi file for consistency checks')
    parser.add_argument('command',
                        help='comand to run')
    return parser
 def main(argv):
    parser = get_parser()
    opts = parser.parse_args(argv)
    if opts.manual:
        check_consistency_with_manual(opts.manual)
    # Remove the initial 'env' command.
    parse_diagnostics(opts.command.split()[1:])
    if errors:
        sys.exit(1)
 if __name__ == '__main__':
    main(sys.argv[1:])
--- a/manual/install.texi
+++ b/manual/install.texi
@ -632,6 +632,14 @@ GDB, and should be compatible with the Python version in your system.
 As of release time PExpect 4.8.0 is the newest verified to work to test
 the pretty printers.
@item
 The Python @code{abnf} module.
 This module is optional and used to verify some ABNF grammars in the
 manual.  Version 2.2.0 has been confirmed to work as expected.  A
 missing @code{abnf} module does not reduce the test coverage of the
 library itself.
@item
 GDB 7.8 or later with support for Python 2.7/3.4 or later