BaseTools/Plugin/CodeQL: Add CodeQL build plugin

Adds a CodeQL plugin that supports CodeQL in the build system. 1. CodeQlBuildPlugin - Generates a CodeQL database for a given build. 2. CodeQlAnalyzePlugin - Analyzes a CodeQL database and interprets results. 3. External dependencies - Assist with downloading the CodeQL CLI and making it available to the CodeQL plugins. 4. CodeQlQueries.qls - A C/C++ CodeQL query set run against the code. 5. Readme.md - A comprehensive readme file to help: - Platform integrators understand how to configure the plugin - Developers understand how to modify the plugin - Users understand how to use the plugin Read Readme.md for additional details. Cc: Bob Feng <bob.c.feng@intel.com> Cc: Liming Gao <gaoliming@byosoft.com.cn> Cc: Michael D Kinney <michael.d.kinney@intel.com> Cc: Rebecca Cran <rebecca@bsdio.com> Cc: Sean Brogan <sean.brogan@microsoft.com> Cc: Yuwei Chen <yuwei.chen@intel.com> Signed-off-by: Michael Kubacki <michael.kubacki@microsoft.com> Reviewed-by: Yuwei Chen <yuwei.chen@intel.com> Reviewed-by: Sean Brogan <sean.brogan@microsoft.com> Acked-by: Laszlo Ersek <lersek@redhat.com> Acked-by: Michael D Kinney <michael.d.kinney@intel.com>
2023-09-25 12:11:13 -04:00
parent c1393bd486
commit 5464d0bed6
14 changed files with 1339 additions and 0 deletions
--- a/BaseTools/Plugin/CodeQL/analyze/init.py
+++ b/BaseTools/Plugin/CodeQL/analyze/init.py
--- a/BaseTools/Plugin/CodeQL/analyze/analyze_filter.py
+++ b/BaseTools/Plugin/CodeQL/analyze/analyze_filter.py
@@ -0,0 +1,184 @@
+# @file analyze_filter.py
+#
+# Filters results in a SARIF file.
+#
+#            Apache License
+#      Version 2.0, January 2004
+#   http://www.apache.org/licenses/
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file has been altered from its original form. Based on code in:
+#   https://github.com/advanced-security/filter-sarif
+#
+# It primarily contains modifications made to integrate with the CodeQL plugin.
+#
+# Specifically:
+#   https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py
+#
+# View the full and complete license as provided by that repository here:
+#   https://github.com/advanced-security/filter-sarif/blob/main/LICENSE
+#
+# SPDX-License-Identifier: Apache-2.0
+##
+
+import json
+import logging
+import re
+from os import PathLike
+from typing import Iterable, List, Tuple
+
+from analyze.globber import match
+
+
+def _match_path_and_rule(
+    path: str, rule: str, patterns: Iterable[str]) -> bool:
+    """Returns whether a given path matches a given rule.
+
+    Args:
+        path (str): A file path string.
+        rule (str): A rule file path string.
+        patterns (Iterable[str]): An iterable of pattern strings.
+
+    Returns:
+        bool: True if the path matches a rule. Otherwise, False.
+    """
+    result = True
+    for s, fp, rp in patterns:
+        if match(rp, rule) and match(fp, path):
+            result = s
+    return result
+
+
+def _parse_pattern(line: str) -> Tuple[str]:
+    """Parses a given pattern line.
+
+    Args:
+        line (str): The line string that contains the rule.
+
+    Returns:
+        Tuple[str]: The parsed sign, file pattern, and rule pattern from the
+                    line.
+    """
+    sep_char = ':'
+    esc_char = '\\'
+    file_pattern = ''
+    rule_pattern = ''
+    seen_separator = False
+    sign = True
+
+    # inclusion or exclusion pattern?
+    u_line = line
+    if line:
+        if line[0] == '-':
+            sign = False
+            u_line = line[1:]
+        elif line[0] == '+':
+            u_line = line[1:]
+
+    i = 0
+    while i < len(u_line):
+        c = u_line[i]
+        i = i + 1
+        if c == sep_char:
+            if seen_separator:
+                raise Exception(
+                    'Invalid pattern: "' + line + '" Contains more than one '
+                    'separator!')
+            seen_separator = True
+            continue
+        elif c == esc_char:
+            next_c = u_line[i] if (i < len(u_line)) else None
+            if next_c in ['+' , '-', esc_char, sep_char]:
+                i = i + 1
+                c = next_c
+        if seen_separator:
+            rule_pattern = rule_pattern + c
+        else:
+            file_pattern = file_pattern + c
+
+    if not rule_pattern:
+        rule_pattern = '**'
+
+    return sign, file_pattern, rule_pattern
+
+
+def filter_sarif(input_sarif: PathLike,
+                 output_sarif: PathLike,
+                 patterns: List[str],
+                 split_lines: bool) -> None:
+    """Filters a SARIF file with a given set of filter patterns.
+
+    Args:
+        input_sarif (PathLike): Input SARIF file path.
+        output_sarif (PathLike): Output SARIF file path.
+        patterns (PathLike): List of filter pattern strings.
+        split_lines (PathLike): Whether to split lines in individual patterns.
+    """
+    if split_lines:
+        tmp = []
+        for p in patterns:
+            tmp = tmp + re.split('\r?\n', p)
+        patterns = tmp
+
+    patterns = [_parse_pattern(p) for p in patterns if p]
+
+    logging.debug('Given patterns:')
+    for s, fp, rp in patterns:
+        logging.debug(
+            'files: {file_pattern}    rules: {rule_pattern} ({sign})'.format(
+                file_pattern=fp,
+                rule_pattern=rp,
+                sign='positive' if s else 'negative'))
+
+    with open(input_sarif, 'r') as f:
+        s = json.load(f)
+
+    for run in s.get('runs', []):
+        if run.get('results', []):
+            new_results = []
+            for r in run['results']:
+                if r.get('locations', []):
+                    new_locations = []
+                    for l in r['locations']:
+                        # TODO: The uri field is optional. We might have to
+                        #       fetch the actual uri from "artifacts" via
+                        #       "index"
+                        # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#-linking-results-to-artifacts)
+                        uri = l.get(
+                                    'physicalLocation', {}).get(
+                                        'artifactLocation', {}).get(
+                                            'uri', None)
+
+                        # TODO: The ruleId field is optional and potentially
+                        #       ambiguous. We might have to fetch the actual
+                        #       ruleId from the rule metadata via the ruleIndex
+                        #       field.
+                        # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#rule-metadata)
+                        ruleId = r['ruleId']
+
+                        if (uri is None or
+                            _match_path_and_rule(uri, ruleId, patterns)):
+                            new_locations.append(l)
+                    r['locations'] = new_locations
+                    if new_locations:
+                        new_results.append(r)
+                else:
+                    # locations array doesn't exist or is empty, so we can't
+                    # match on anything. Therefore, we include the result in
+                    # the output.
+                    new_results.append(r)
+            run['results'] = new_results
+
+    with open(output_sarif, 'w') as f:
+        json.dump(s, f, indent=2)
--- a/BaseTools/Plugin/CodeQL/analyze/globber.py
+++ b/BaseTools/Plugin/CodeQL/analyze/globber.py
@@ -0,0 +1,127 @@
+# @file globber.py
+#
+# Provides global functionality for use by the CodeQL plugin.
+#
+# Copyright 2019 Jaakko Kangasharju
+#
+#            Apache License
+#      Version 2.0, January 2004
+#   http://www.apache.org/licenses/
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file has been altered from its original form. Based on code in:
+#   https://github.com/advanced-security/filter-sarif
+#
+# Specifically:
+#   https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py
+#
+# It primarily contains modifications made to integrate with the CodeQL plugin.
+#
+# SPDX-License-Identifier: Apache-2.0
+##
+
+import re
+
+_double_star_after_invalid_regex = re.compile(r'[^/\\]\*\*')
+_double_star_first_before_invalid_regex = re.compile('^\\*\\*[^/]')
+_double_star_middle_before_invalid_regex = re.compile(r'[^\\]\*\*[^/]')
+
+
+def _match_component(pattern_component, file_name_component):
+    if len(pattern_component) == 0 and len(file_name_component) == 0:
+        return True
+    elif len(pattern_component) == 0:
+        return False
+    elif len(file_name_component) == 0:
+        return pattern_component == '*'
+    elif pattern_component[0] == '*':
+        return (_match_component(pattern_component, file_name_component[1:]) or
+                _match_component(pattern_component[1:], file_name_component))
+    elif pattern_component[0] == '?':
+        return _match_component(pattern_component[1:], file_name_component[1:])
+    elif pattern_component[0] == '\\':
+        return (len(pattern_component) >= 2 and
+                pattern_component[1] == file_name_component[0] and
+                _match_component(
+                    pattern_component[2:], file_name_component[1:]))
+    elif pattern_component[0] != file_name_component[0]:
+        return False
+    else:
+        return _match_component(pattern_component[1:], file_name_component[1:])
+
+
+def _match_components(pattern_components, file_name_components):
+    if len(pattern_components) == 0 and len(file_name_components) == 0:
+        return True
+    if len(pattern_components) == 0:
+        return False
+    if len(file_name_components) == 0:
+        return len(pattern_components) == 1 and pattern_components[0] == '**'
+    if pattern_components[0] == '**':
+        return (_match_components(pattern_components, file_name_components[1:])
+                or _match_components(
+                    pattern_components[1:], file_name_components))
+    else:
+        return (
+            _match_component(
+                pattern_components[0], file_name_components[0]) and
+            _match_components(
+                pattern_components[1:], file_name_components[1:]))
+
+
+def match(pattern: str, file_name: str):
+    """Match a glob pattern against a file name.
+
+    Glob pattern matching is for file names, which do not need to exist as
+    files on the file system.
+
+    A file name is a sequence of directory names, possibly followed by the name
+    of a file, with the components separated by a path separator. A glob
+    pattern is similar, except it may contain special characters: A '?' matches
+    any character in a name. A '*' matches any sequence of characters (possibly
+    empty) in a name. Both of these match only within a single component, i.e.,
+    they will not match a path separator. A component in a pattern may also be
+    a literal '**', which matches zero or more components in the complete file
+    name. A backslash '\\' in a pattern acts as an escape character, and
+    indicates that the following character is to be matched literally, even if
+    it is a special character.
+
+    Args:
+        pattern (str): The pattern to match. The path separator in patterns is
+                       always '/'.
+        file_name (str): The file name to match against. The path separator in
+                         file names is the platform separator
+
+    Returns:
+        bool: True if the pattern matches, False otherwise.
+    """
+    if (_double_star_after_invalid_regex.search(pattern) is not None or
+        _double_star_first_before_invalid_regex.search(
+            pattern) is not None or
+        _double_star_middle_before_invalid_regex.search(pattern) is not None):
+        raise ValueError(
+            '** in {} not alone between path separators'.format(pattern))
+
+    pattern = pattern.rstrip('/')
+    file_name = file_name.rstrip('/')
+
+    while '**/**' in pattern:
+        pattern = pattern.replace('**/**', '**')
+
+    pattern_components = pattern.split('/')
+
+    # We split on '\' as well as '/' to support unix and windows-style paths
+    file_name_components = re.split(r'[\\/]', file_name)
+
+    return _match_components(pattern_components, file_name_components)