Python: Speedup our eight slowest pytests (one at a time please)

Feature description

At the end of our GitHub Actions build jobs, there is a list of the slowest ones.

Are there ways to speed up these tests without reducing our functionality or our code coverage?

Please only fix one algorithm per pull request.

  • 16.64s call neural_network/simple_neural_network.py::neural_network.simple_neural_network.forward_propagation
  • 6.15s call backtracking/word_search.py::backtracking.word_search.word_exists
  • 4.61s call digital_image_processing/test_digital_image_processing.py::test_local_binary_pattern
  • 2.59s call backtracking/power_sum.py::backtracking.power_sum.backtrack
  • 2.19s call machine_learning/xgboost_regressor.py::machine_learning.xgboost_regressor.main
  • 1.73s call maths/prime_numbers.py::maths.prime_numbers.slow_primes
  • 1.43s call backtracking/power_sum.py::backtracking.power_sum.solve
  • 1.00s call other/dijkstra_bankers_algorithm.py::other.dijkstra_bankers_algorithm.BankersAlgorithm.main ================= 1506 passed, 25 warnings in 71.23s (0:01:11) =================

Also, those 25 pytest warnings are worth fixing!!!

About this issue

  • Original URL
  • State: closed
  • Created 9 months ago
  • Comments: 19 (19 by maintainers)

Commits related to this issue

Most upvoted comments

@cclauss tried backtracking/word_search.py to measure the execution time of each option:

code: called each option 10 times
"""
Author  : Alexander Pantyukhin
Date    : November 24, 2022

Task:
Given an m x n grid of characters board and a string word,
return true if word exists in the grid.

The word can be constructed from letters of sequentially adjacent cells,
where adjacent cells are horizontally or vertically neighboring.
The same letter cell may not be used more than once.

Example:

Matrix:
---------
|A|B|C|E|
|S|F|C|S|
|A|D|E|E|
---------

Word:
"ABCCED"

Result:
True

Implementation notes: Use backtracking approach.
At each point, check all neighbors to try to find the next letter of the word.

leetcode: https://leetcode.com/problems/word-search/

"""


def get_point_key(len_board: int, len_board_column: int, row: int, column: int) -> int:
    """
    Returns the hash key of matrix indexes.

    >>> get_point_key(10, 20, 1, 0)
    200
    """

    return len_board * len_board_column * row + column


def exits_word(
    board: list[list[str]],
    word: str,
    row: int,
    column: int,
    word_index: int,
    visited_points_set: set[int],
) -> bool:
    """
    Return True if it's possible to search the word suffix
    starting from the word_index.

    >>> exits_word([["A"]], "B", 0, 0, 0, set())
    False
    """

    if board[row][column] != word[word_index]:
        return False

    if word_index == len(word) - 1:
        return True

    traverts_directions = [(0, 1), (0, -1), (-1, 0), (1, 0)]
    len_board = len(board)
    len_board_column = len(board[0])
    for direction in traverts_directions:
        next_i = row + direction[0]
        next_j = column + direction[1]
        if not (0 <= next_i < len_board and 0 <= next_j < len_board_column):
            continue

        key = get_point_key(len_board, len_board_column, next_i, next_j)
        if key in visited_points_set:
            continue

        visited_points_set.add(key)
        if exits_word(board, word, next_i, next_j, word_index + 1, visited_points_set):
            return True

        visited_points_set.remove(key)

    return False


def word_exists(board: list[list[str]], word: str) -> bool:
    """
    >>> word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], "ABCCED")
    True
    >>> word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], "SEE")
    True
    >>> word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], "ABCB")
    False
    >>> word_exists([["A"]], "A")
    True
    >>> word_exists([["A","A","A","A","A","A"],
    ...              ["A","A","A","A","A","A"],
    ...              ["A","A","A","A","A","A"],
    ...              ["A","A","A","A","A","A"],
    ...              ["A","A","A","A","A","B"],
    ...              ["A","A","A","A","B","A"]],
    ...             "AAAAAAAAAAAAABB")
    False
    >>> word_exists([["A"]], 123)
    Traceback (most recent call last):
        ...
    ValueError: The word parameter should be a string of length greater than 0.
    >>> word_exists([["A"]], "")
    Traceback (most recent call last):
        ...
    ValueError: The word parameter should be a string of length greater than 0.
    >>> word_exists([[]], "AB")
    Traceback (most recent call last):
        ...
    ValueError: The board should be a non empty matrix of single chars strings.
    >>> word_exists([], "AB")
    Traceback (most recent call last):
        ...
    ValueError: The board should be a non empty matrix of single chars strings.
    >>> word_exists([["A"], [21]], "AB")
    Traceback (most recent call last):
        ...
    ValueError: The board should be a non empty matrix of single chars strings.
    """

    # Validate board
    board_error_message = (
        "The board should be a non empty matrix of single chars strings."
    )

    len_board = len(board)
    if not isinstance(board, list) or len(board) == 0:
        raise ValueError(board_error_message)

    for row in board:
        if not isinstance(row, list) or len(row) == 0:
            raise ValueError(board_error_message)

        for item in row:
            if not isinstance(item, str) or len(item) != 1:
                raise ValueError(board_error_message)

    # Validate word
    if not isinstance(word, str) or len(word) == 0:
        raise ValueError(
            "The word parameter should be a string of length greater than 0."
        )

    len_board_column = len(board[0])
    for i in range(len_board):
        for j in range(len_board_column):
            if exits_word(
                board, word, i, j, 0, {get_point_key(len_board, len_board_column, i, j)}
            ):
                return True

    return False


if __name__ == "__main__":
    import doctest

    doctest.testmod()

    import timeit

    n = 10

    ABCCED = timeit.timeit('word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], '
                        '"ABCCED")', globals=globals(), number=n)

    SEE = timeit.timeit('word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]],'
                        ' "SEE")', globals=globals(), number=n)

    ABCB = timeit.timeit('word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], '
                        '"ABCB")', globals=globals(), number=n)

    A = timeit.timeit('word_exists([["A"]], "A")', globals=globals(), number=n)


    AAAAAAAAAAAAABB = timeit.timeit('word_exists([["A","A","A","A","A","A"], ["A","A","A","A","A","A"], '
                        '["A","A","A","A","A","A"], ["A","A","A","A","A","A"], ["A","A","A","A","A","B"]'
                        ', ["A","A","A","A","B","A"]],"AAAAAAAAAAAAABB")',
                        globals=globals(), number=n)

    print(f"{ABCCED} - ABCCED\n{SEE} - SEE\n{ABCB} - ABCB\n{A} - A\n"
          f"{AAAAAAAAAAAAABB} - AAAAAAAAAAAAABB")

The longest time, as expected, is ‘AAAAAAAAAAAAABB’, it is a gigantic number of times longer.

Output:

0.00034418300128891133 - ABCCED
0.0004890400014119223 - SEE
0.0004235089982103091 - ABCB
4.959400030202232e-05 - A
29.599618363001355 - AAAAAAAAAAAAABB

I pointed out in #8785 that neural_network/simple_neural_network.py has multiple issues (I don’t think it even implements a neural network) and could do with a rewrite. Ideally whatever new implementation replaces it should run a lot more quickly.