diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..cc9aef1
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,37 @@
+[flake8]
+extend-ignore =
+    # whitespace before ':' (currently conflicts with black formatting):
+    E203,
+    # missing docstring in public module:
+    D100,
+    # missing docstring in public class:
+    D101,
+    # missing docstring in public method:
+    D103,
+    # missing docstring in public package:
+    D104,
+    # missing docstring in magic method:
+    D105,
+    # missing docstring in __init__:
+    D107,
+    # 1 blank line required between summary line and description:
+    D205,
+    # first line should end with a period:
+    D400,
+    # first line should be in imperative mood:
+    D401,
+    # first line should not be the function's "signature":
+    D402,
+
+per-file-ignores =
+    mkl_random/__init__.py: F401
+    mkl_random/interfaces/__init__.py: F401
+
+filename = *.py, *.pyx, *.pxi, *.pxd
+max_line_length = 80
+max-doc-length = 80
+show-source = True
+
+# Print detailed statistic if any issue detected
+count = True
+statistics = True
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 7417668..bb15c50 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -2,3 +2,6 @@
 
 # Sporadic fixes in test_random.py
 e76aa3a5a4b889c0434f0103ec102a50b93ab1ca
+
+# Added pre-commit hooks
+1982deb5c4c189e4bab5733972027a5f82b8868e
diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
index 3d93966..a2979b0 100644
--- a/.github/workflows/build-docs.yml
+++ b/.github/workflows/build-docs.yml
@@ -53,7 +53,7 @@ jobs:
         run: |
           # Ensure that SYCL libraries are on LD_LIBRARY_PATH
           source /opt/intel/oneapi/setvars.sh
-          pip install --no-deps --no-build-isolation -e . --verbose	  
+          pip install --no-deps --no-build-isolation -e . --verbose
           python -c "import mkl_random; print(mkl_random.__version__)" || exit 1
           sphinx-build -M html docs/source docs/build
           mkdir -p ~/rendered_docs
diff --git a/.github/workflows/conda-package-cf.yml b/.github/workflows/conda-package-cf.yml
index e177342..7a6e1d6 100644
--- a/.github/workflows/conda-package-cf.yml
+++ b/.github/workflows/conda-package-cf.yml
@@ -110,7 +110,7 @@ jobs:
       - name: Install conda-build
         run: conda install -n base -y conda-build
       - name: Setup MSVC
-        uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0        
+        uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
       - name: Build conda package
         run: conda build --no-test --python ${{ matrix.python }} --numpy ${{ matrix.numpy }} -c conda-forge --override-channels conda-recipe-cf
       - name: Upload artifact
diff --git a/.github/workflows/pre-commit-autoupdate.yml b/.github/workflows/pre-commit-autoupdate.yml
new file mode 100644
index 0000000..e7d274e
--- /dev/null
+++ b/.github/workflows/pre-commit-autoupdate.yml
@@ -0,0 +1,49 @@
+name: Autoupdate pre-commit
+
+on:
+  workflow_dispatch:
+  # To guarantee Maintained check is occasionally updated. See
+  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
+  schedule:
+    - cron: '28 2 * * 6' # Saturday at 02:28 UTC
+
+permissions: read-all
+
+jobs:
+  autoupdate:
+    name: Autoupdate
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    permissions:
+      # Needed to create a PR with autoupdate changes
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v5.6.0
+        with:
+          python-version: '3.14'
+
+      - name: Install pre-commit
+        run: pip install pre-commit
+
+      - name: Run pre-commit autoupdate
+        run: pre-commit autoupdate
+
+      - name: Create a PR with autoupdate changes
+        uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
+        with:
+          commit-message: 'chore: update pre-commit hooks'
+          add-paths: .pre-commit-config.yaml
+          branch: 'bot/pre-commit-autoupdate'
+          delete-branch: true
+          title: Weekly pre-commit autoupdate
+          body: |
+            This PR updates the `.pre-commit-config.yaml` using `pre-commit autoupdate`.
+          labels: autoupdate
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 0000000..e7b2380
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,31 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [master]
+
+permissions: read-all
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+    - name: Checkout repo
+      uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+    - name: Set up python
+      uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      with:
+        python-version: '3.14'
+
+    - name: Set up pip packages
+      uses: BSFishy/pip-action@8f2d471d809dc20b6ada98c91910b6ae6243f318 # v1
+      with:
+        packages: |
+          codespell
+          pylint
+
+    - name: Run pre-commit checks
+      uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
diff --git a/.gitignore b/.gitignore
index 38677c1..89fa448 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,3 @@ __pycache__/
 mkl_random/src/mklrand.c
 mkl_random/mklrand.cpp
 mkl_random/mklrand.cpython*.so
-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..430281c
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,103 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v6.0.0
+  hooks:
+    - id: check-ast
+    - id: check-builtin-literals
+    - id: check-case-conflict
+    - id: check-executables-have-shebangs
+    - id: check-merge-conflict
+    - id: check-toml
+    - id: debug-statements
+    - id: destroyed-symlinks
+    - id: end-of-file-fixer
+    - id: fix-byte-order-marker
+    - id: mixed-line-ending
+    - id: trailing-whitespace
+
+-   repo: https://github.com/pre-commit/pygrep-hooks
+    rev: v1.10.0
+    hooks:
+    -   id: python-check-blanket-noqa
+    -   id: python-check-blanket-type-ignore
+    -   id: python-check-mock-methods
+    -   id: python-no-eval
+    -   id: python-no-log-warn
+    -   id: python-use-type-annotations
+    -   id: rst-backticks
+    -   id: rst-directive-colons
+    -   id: rst-inline-touching-normal
+    -   id: text-unicode-replacement-char
+
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.4.1
+  hooks:
+  - id: codespell
+    args: ["-L", "nd,hart,elemente,wirth"]
+    additional_dependencies:
+    - tomli
+
+- repo: https://github.com/psf/black
+  rev: 26.1.0
+  hooks:
+  - id: black
+
+- repo: https://github.com/pocc/pre-commit-hooks
+  rev: v1.3.5
+  hooks:
+  - id: clang-format
+    args: ["-i"]
+
+- repo: https://github.com/MarcoGorelli/cython-lint
+  rev: v0.19.0
+  hooks:
+  - id: cython-lint
+  - id: double-quote-cython-strings
+
+- repo: https://github.com/pycqa/flake8
+  rev: 7.3.0
+  hooks:
+  - id: flake8
+    args: ["--config=.flake8"]
+    additional_dependencies:
+      - flake8-docstrings==1.7.0
+      - flake8-bugbear==25.11.29
+
+- repo: https://github.com/pycqa/isort
+  rev: 8.0.0
+  hooks:
+  - id: isort
+    name: isort (python)
+  - id: isort
+    name: isort (cython)
+    types: [cython]
+  - id: isort
+    name: isort (pyi)
+    types: [pyi]
+
+- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
+  rev: v2.16.0
+  hooks:
+  - id: pretty-format-toml
+    args: [--autofix]
+
+- repo: local
+  hooks:
+  - id: pylint
+    name: pylint
+    entry: pylint
+    language: system
+    types: [python]
+    require_serial: true
+    args:
+        [
+        "-rn", # Only display messages
+        "-sn", # Don't display the score
+        "--errors-only",
+        "--disable=import-error",
+        ]
+
+- repo: https://github.com/jumanjihouse/pre-commit-hooks
+  rev: 3.0.0
+  hooks:
+  - id: shellcheck
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index 0909e03..f513746 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -11,7 +11,7 @@ read -r GLIBC_MAJOR GLIBC_MINOR <<<"$(conda list '^sysroot_linux-64$' \
 if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then
    $PYTHON -m pip wheel --no-build-isolation --no-deps .
    ${PYTHON} -m wheel tags --remove --platform-tag "manylinux_${GLIBC_MAJOR}_${GLIBC_MINOR}_x86_64" mkl_random*.whl
-   cp mkl_random*.whl ${WHEELS_OUTPUT_FOLDER}
+   cp mkl_random*.whl "${WHEELS_OUTPUT_FOLDER}"
 else
    # Build conda package
    $PYTHON -m pip install --no-build-isolation --no-deps .
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5feec0e..666e0b3 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -6,10 +6,10 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = 'mkl_random'
-copyright = '2017-2025, Intel Corp.'
-author = 'Intel Corp.'
-release = '1.4.0dev1'
+project = "mkl_random"
+copyright = "2017-2025, Intel Corp."
+author = "Intel Corp."
+release = "1.4.0dev1"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -26,16 +26,15 @@
     "sphinx.ext.viewcode",
     "sphinxcontrib.programoutput",
     # "sphinxcontrib.googleanalytics",
-    'sphinx_design',
+    "sphinx_design",
 ]
 
-templates_path = ['_templates']
+templates_path = ["_templates"]
 exclude_patterns = []
 
 
-
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'furo'
-html_static_path = ['_static']
+html_theme = "furo"
+html_static_path = ["_static"]
diff --git a/docs/source/how_to.rst b/docs/source/how_to.rst
index 3ae4b06..985d574 100644
--- a/docs/source/how_to.rst
+++ b/docs/source/how_to.rst
@@ -23,11 +23,11 @@ the internal state of the pseudo-random number generators.
         # draw some numbers as if computation were to continue
         post_draw = rs.gamma(5, 1, size=100)
 
-        # restore random state, and continue from 
+        # restore random state, and continue from
         restored_rs = pickle.loads(saved)
         resumed_draw = restored_rs.gamma(5, 1, size=100)
-        
-        # sample from restored stated is the same as sample 
+
+        # sample from restored stated is the same as sample
         # from the original one
         assert np.array_equal(restored_rs, resumed_draw)
 
@@ -47,14 +47,14 @@ indistinguishable from independent.
 .. py:method:: skipahead(nskips)
     :canonical: mkl_random.RandomState.skipahead
 
-    Advance the state of the generator using skip-ahead method, or raise :code:`ValueError` 
-    exception if not supported. 
+    Advance the state of the generator using skip-ahead method, or raise :code:`ValueError`
+    exception if not supported.
 
     The argument `nskips` must be a positive Python integer.
 
-    The method is supported for :ref:`"philox4x32x10" <philox4x32x10_brng>`, :ref:`"mrg32k3a" <mrg32k3a_brng>`, 
+    The method is supported for :ref:`"philox4x32x10" <philox4x32x10_brng>`, :ref:`"mrg32k3a" <mrg32k3a_brng>`,
     :ref:`"mcg31m1" <mcg31m1_brng>`, :ref:`"mcg59" <mcg59_brng>`, :ref:`"wh" <wh_brng>`,
-    :ref:`"mt19937" <mt19937_brng>`, :ref:`"sfmt19937" <sfmt19937_brng>`, and :ref:`"ars5" <ars5_brng>` 
+    :ref:`"mt19937" <mt19937_brng>`, :ref:`"sfmt19937" <sfmt19937_brng>`, and :ref:`"ars5" <ars5_brng>`
     basic random number generators.
 
 .. note::
@@ -68,20 +68,20 @@ indistinguishable from independent.
     Initialize the state of the generator using leap-frog method, or raise :code:`ValueError`
     exception if not supported.
 
-    The leap-frog method partitions state trajectory into :code:`nstream` interleaved non-overlapping 
+    The leap-frog method partitions state trajectory into :code:`nstream` interleaved non-overlapping
     sub-sequences, and argument :code:`k` identifies the subsequence.
 
-    The method is supported for :ref:`"mcg31m1" <mcg31m1_brng>`, :ref:`"mcg59" <mcg59_brng>`, and :ref:`"wh" <wh_brng>` 
+    The method is supported for :ref:`"mcg31m1" <mcg31m1_brng>`, :ref:`"mcg59" <mcg59_brng>`, and :ref:`"wh" <wh_brng>`
     basic pseudo-random number generators.
 
 .. note::
-    When using :meth:`leapfrog` or :meth:`skipahead` methods one must remember that parallel tasks partition 
-    generators period and choose a generator with sufficiently long period to avoid cycling over the period 
+    When using :meth:`leapfrog` or :meth:`skipahead` methods one must remember that parallel tasks partition
+    generators period and choose a generator with sufficiently long period to avoid cycling over the period
     more than once, as doing so also breaks the assumption of statistical independence and may compromise
     correctness of the simulation.
 
 :mod:`mkl_random` also provides two families of basic pseudo-random number generators, :ref:`"mt2203" <mt2203_brng>` and
-:ref:`"wh" <wh_brng>`, with property that members from particular family, initialized equally, produce streams of 
+:ref:`"wh" <wh_brng>`, with property that members from particular family, initialized equally, produce streams of
 randomness stasistically indistunguishable from independent. To use such families in parallel computation, assign
 difference family generators to different parallel workers and sample those assigned generators in each parallel worker.
-Please refer to "examples/" folder in the `GitHub repo <https://github.com/IntelPython/mkl_random>`_ for more details.
\ No newline at end of file
+Please refer to "examples/" folder in the `GitHub repo <https://github.com/IntelPython/mkl_random>`_ for more details.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index fd65e8e..f7f7e5c 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,10 +1,10 @@
 :mod:`mkl_random`: random numbers fast
 ======================================
 
-:mod:`mkl_random` is Python package exposing :ref:`pseudo-random <pseudorandom_vs_truerandom>` and 
-:ref:`non-deterministic <pseudorandom_vs_truerandom>` 
-random  number generators and sampling from continuous and discrete distribution available in 
-Intel(R) oneAPI Math Kernel Library 
+:mod:`mkl_random` is Python package exposing :ref:`pseudo-random <pseudorandom_vs_truerandom>` and
+:ref:`non-deterministic <pseudorandom_vs_truerandom>`
+random  number generators and sampling from continuous and discrete distribution available in
+Intel(R) oneAPI Math Kernel Library
 (`oneMKL <https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html>`_).
 
 .. grid:: 2
diff --git a/docs/source/maintenance/index.rst b/docs/source/maintenance/index.rst
index d1b7b68..128b824 100644
--- a/docs/source/maintenance/index.rst
+++ b/docs/source/maintenance/index.rst
@@ -1,7 +1,7 @@
 Contributing
 ============
 
-:mod:`mkl_random` is an free and open source project. 
+:mod:`mkl_random` is an free and open source project.
 We welcome and appreciate your contributions.
 
 To contribute, fork the repo https://github.com/IntelPython/mkl_random.git,
@@ -13,7 +13,7 @@ clone it:
          git clone https://github.com/<fork-org>/mkl_random.git
 
 
-A working compiler is needed build :mod:`mkl_random`. 
+A working compiler is needed build :mod:`mkl_random`.
 Both Gnu :code:`g++` and Intel LLVM :code:`icpx` are supported.
 
 Make sure to install Python packages required to build :mod:`mkl_random`:
@@ -47,4 +47,4 @@ To build documentation, install dependencies and running
 
       $ sphinx-build -M html docs/source docs/build
 
-Rendered documentation can be found in "docs/build/html".
\ No newline at end of file
+Rendered documentation can be found in "docs/build/html".
diff --git a/docs/source/reference/ars5.rst b/docs/source/reference/ars5.rst
index a3578ac..e4f118f 100644
--- a/docs/source/reference/ars5.rst
+++ b/docs/source/reference/ars5.rst
@@ -3,7 +3,7 @@
 ARS5 brng
 =========
 
-The ARS5 counter-based pseudo-random number generator based on AES encryption algorithm can be 
+The ARS5 counter-based pseudo-random number generator based on AES encryption algorithm can be
 initialized with either an integral seed, a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/mcg31.rst b/docs/source/reference/mcg31.rst
index 7b06745..66f51d7 100644
--- a/docs/source/reference/mcg31.rst
+++ b/docs/source/reference/mcg31.rst
@@ -3,7 +3,7 @@
 MCG31 brng
 ==========
 
-The 31-bit multiplicative congruential pseudo-random number generator :math:`mcg(1132489760, 2^{31} -1)` can be 
+The 31-bit multiplicative congruential pseudo-random number generator :math:`mcg(1132489760, 2^{31} -1)` can be
 initialized with  either an integral seed, a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/mcg59.rst b/docs/source/reference/mcg59.rst
index 947d319..d5a2b65 100644
--- a/docs/source/reference/mcg59.rst
+++ b/docs/source/reference/mcg59.rst
@@ -3,7 +3,7 @@
 MCG59 brng
 ==========
 
-The 59-bit multiplicative congruential pseudo-random number generator can be 
+The 59-bit multiplicative congruential pseudo-random number generator can be
 initialized with  either an integral seed, a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/mrg32k3a.rst b/docs/source/reference/mrg32k3a.rst
index bfe71fb..433fcb8 100644
--- a/docs/source/reference/mrg32k3a.rst
+++ b/docs/source/reference/mrg32k3a.rst
@@ -3,7 +3,7 @@
 MRG32k3a brng
 =============
 
-The combined multiple recursive pseudo-random number generator MRG32k3a can be 
+The combined multiple recursive pseudo-random number generator MRG32k3a can be
 initialized with  either an integral seed, a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/mt19937.rst b/docs/source/reference/mt19937.rst
index 4d47096..f165bff 100644
--- a/docs/source/reference/mt19937.rst
+++ b/docs/source/reference/mt19937.rst
@@ -3,7 +3,7 @@
 MT19937 brng
 ============
 
-The Mersenne Twister pseudo-random number generator can be initialized with either an integral seed, 
+The Mersenne Twister pseudo-random number generator can be initialized with either an integral seed,
 a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/mt2203.rst b/docs/source/reference/mt2203.rst
index 2a61b55..9b8d019 100644
--- a/docs/source/reference/mt2203.rst
+++ b/docs/source/reference/mt2203.rst
@@ -3,10 +3,10 @@
 MT2203 brng
 ===========
 
-Each generator from the set of `6024 Mersenne Twister pseudorandom number generators <philoxrng_>`_ can be 
+Each generator from the set of `6024 Mersenne Twister pseudorandom number generators <philoxrng_>`_ can be
 initialized with either an integral seed, a list of integral seeds, or automatically.
 
-An individual member of the set can be addressed by using a tuple to specify the generator 
+An individual member of the set can be addressed by using a tuple to specify the generator
 :code:`brng=("MT2203", set_id)` where :math:`0 \leq \text{set_id} < 6024`.
 
 .. code-block:: python
@@ -48,8 +48,8 @@ When seed is not specified, the generator is initialized using system clock, e.g
         isample = rs_def.randint(1, 6 + 1, size=1000)
 
 Different members of the set of generators initialized with the same seed are designed to generate
-statistically independent streams of randomness. This property makes MT2203 generator suitable for 
+statistically independent streams of randomness. This property makes MT2203 generator suitable for
 parallelizing stochastic algorithms. Please refer to "examples/" folder in the `GitHub repo
 <https://github.com/IntelPython/mkl_random>`_.
 
-.. _philoxrng: https://spec.oneapi.io/versions/1.0-rev-2/elements/oneMKL/source/domains/rng/mkl-rng-philox4x32x10.html
\ No newline at end of file
+.. _philoxrng: https://spec.oneapi.io/versions/1.0-rev-2/elements/oneMKL/source/domains/rng/mkl-rng-philox4x32x10.html
diff --git a/docs/source/reference/nondeterministic.rst b/docs/source/reference/nondeterministic.rst
index 3b3b752..13aa643 100644
--- a/docs/source/reference/nondeterministic.rst
+++ b/docs/source/reference/nondeterministic.rst
@@ -19,4 +19,4 @@ The generator with non-deterministic source of randomness, such as a hardware de
         esample = rs.uniform(0, 1, size=1000)
 
 Seed parameter provided to the constructor of :class:`mkl_random.RandomState`,
-or :meth:`mkl_random.RandomState.seed` is ignored.
\ No newline at end of file
+or :meth:`mkl_random.RandomState.seed` is ignored.
diff --git a/docs/source/reference/philox4x32x10.rst b/docs/source/reference/philox4x32x10.rst
index fcc0a52..0cb0f26 100644
--- a/docs/source/reference/philox4x32x10.rst
+++ b/docs/source/reference/philox4x32x10.rst
@@ -3,7 +3,7 @@
 Philox4x32x10 brng
 ==================
 
-The Philox 4x32x10 counter-based pseudo-random number generator can be 
+The Philox 4x32x10 counter-based pseudo-random number generator can be
 initialized with  either an integral seed, a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/r250.rst b/docs/source/reference/r250.rst
index 36942c6..40ad6de 100644
--- a/docs/source/reference/r250.rst
+++ b/docs/source/reference/r250.rst
@@ -3,7 +3,7 @@
 R250 brng
 =========
 
-The 32-bit generalized feedback shift register pseudo-random number generator GFSR(250,103) can be 
+The 32-bit generalized feedback shift register pseudo-random number generator GFSR(250,103) can be
 initialized with  either an integral seed, a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/sfmt19937.rst b/docs/source/reference/sfmt19937.rst
index 56ee04e..a7b736f 100644
--- a/docs/source/reference/sfmt19937.rst
+++ b/docs/source/reference/sfmt19937.rst
@@ -3,7 +3,7 @@
 SFMT19937 brng
 ==============
 
-The SIMD-oriented Mersenne Twister pseudo-random number generator can be initialized with 
+The SIMD-oriented Mersenne Twister pseudo-random number generator can be initialized with
 either an integral seed, a list of integral seeds, or automatically.
 
 .. code-block:: python
diff --git a/docs/source/reference/wichmann_hill.rst b/docs/source/reference/wichmann_hill.rst
index 9d49f4e..ca6d68e 100644
--- a/docs/source/reference/wichmann_hill.rst
+++ b/docs/source/reference/wichmann_hill.rst
@@ -3,8 +3,8 @@
 Wichmann-Hill brng
 ==================
 
-Each generator from the set of 273 Wichmann-Hill’s combined multiplicative congruential 
-`generators <whrng_>`_ can be initialized with  either an integral seed, a list of integral seeds, 
+Each generator from the set of 273 Wichmann-Hill’s combined multiplicative congruential
+`generators <whrng_>`_ can be initialized with  either an integral seed, a list of integral seeds,
 or automatically.
 
 An individual member of the set can be addressed by using a tuple to specify the generator as
@@ -49,8 +49,8 @@ When seed is not specified, the generator is initialized using system clock, e.g
         isample = rs_def.randint(1, 6 + 1, size=1000)
 
 Different members of the set of generators initialized with the same seed are designed to generate
-statistically independent streams of randomness. This property makes MT2203 generator suitable for 
+statistically independent streams of randomness. This property makes MT2203 generator suitable for
 parallelizing stochastic algorithms. Please refer to "examples/" folder in the `GitHub repo
 <https://github.com/IntelPython/mkl_random>`_.
 
-.. _whrng: https://spec.oneapi.io/versions/1.0-rev-2/elements/oneMKL/source/domains/rng/mkl-rng-wichmann_hill.html
\ No newline at end of file
+.. _whrng: https://spec.oneapi.io/versions/1.0-rev-2/elements/oneMKL/source/domains/rng/mkl-rng-wichmann_hill.html
diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
index 1c72fd3..40d3d21 100644
--- a/docs/source/tutorials.rst
+++ b/docs/source/tutorials.rst
@@ -4,7 +4,7 @@ Beginner's guide
 Installation
 ------------
 
-The package :mod:`mkl_random` is available in `conda <https://docs.conda.io/en/latest/>`_ ecosystem on "conda-forge", "main", and 
+The package :mod:`mkl_random` is available in `conda <https://docs.conda.io/en/latest/>`_ ecosystem on "conda-forge", "main", and
 "intel" `channels <https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/channels.html>`_ (i.e. locations).
 
 .. code-block:: bash
@@ -39,16 +39,16 @@ The :mod:`mkl_random` is also distributed as part of `Intel® Distribution for P
 First steps
 -----------
 
-The :mod:`mkl_random` package has followed the design of :class:`numpy.random` package to 
+The :mod:`mkl_random` package has followed the design of :class:`numpy.random` package to
 make :mod:`mkl_random` easy to use for those already familiar with the :mod:`numpy.random` module.
 
 .. note::
-    Since the first release of :mod:`mkl_random`, NumPy introduced new classes :class:`numpy.random.Generator` and 
+    Since the first release of :mod:`mkl_random`, NumPy introduced new classes :class:`numpy.random.Generator` and
     :class:`numpy.random.BitGenerator`, while also retaining :class:`numpy.random.RandomState` for backwards
-    compatibility. :mod:`mkl_random`, at present, does not provide classes mirroring :class:`Generator` or 
+    compatibility. :mod:`mkl_random`, at present, does not provide classes mirroring :class:`Generator` or
     :class:`BitGenerators`.
 
-The state of pseudo-random number generator is stored in :class:`mkl_random.RandomState` class, 
+The state of pseudo-random number generator is stored in :class:`mkl_random.RandomState` class,
 so using :mod:`mkl_random` begins with creating an instance of this class:
 
 .. code-block:: python
@@ -64,8 +64,8 @@ Sampling from difference probability distribution is done by calling the class m
 
         s = rs.uniform(0, 1, size=1_000_000)
 
-Drawing samples updates the state of pseudo-random number generator so that next sample is statistically 
-independent from the previous one (with caveats of using pseudo-random generators implied). 
+Drawing samples updates the state of pseudo-random number generator so that next sample is statistically
+independent from the previous one (with caveats of using pseudo-random generators implied).
 
 Here is an example of estimating value of :math:`\pi` by using Monte-Carlo method:
 
@@ -74,7 +74,7 @@ Here is an example of estimating value of :math:`\pi` by using Monte-Carlo metho
 
         import numpy as np
         import mkl_random
- 
+
         rs = mkl_random.RandomState(seed=1234)
 
         sample_size = 10**8
@@ -86,7 +86,7 @@ Here is an example of estimating value of :math:`\pi` by using Monte-Carlo metho
             x = rs.uniform(0, 1, size=batch_size)
             y = rs.uniform(0, 1, size=batch_size)
             accepted += np.sum(x*x + y*y < 1.0)
-        
+
         print("Pi estimate: ", 4. * (accepted / sample_size))
 
 Sample output of running such an example:
@@ -103,11 +103,11 @@ Pseudo-random vs. non-deterministic generators
 
 .. _pseudorandom_vs_truerandom:
 
-Stochastic computations often need to work with *independent* samples 
-from either the same probability distribution, or a set of probability 
+Stochastic computations often need to work with *independent* samples
+from either the same probability distribution, or a set of probability
 distributions of interest.
 
-`True random generator <https://en.wikipedia.org/wiki/Hardware_random_number_generator>`_ relies on 
+`True random generator <https://en.wikipedia.org/wiki/Hardware_random_number_generator>`_ relies on
 laws of physics to provide those, leveraging dedicated hardware providing a source of entropy.
 
 `Psuedo-random generator <https://en.wikipedia.org/wiki/Pseudorandom_number_generator>`_ is an algorithm that outputs a sequence that emulates true randomness.
@@ -115,8 +115,8 @@ The quality of emulation is tested statistically through a battery of test, e.g.
 These tests check if various statistical tests can separate the pseudo-random sequence from a true random one.
 
 Pseudo-random generators usually have an internal state and require its initialization, also sometimes known as seeding.
-States initialization algorithms take user provided _seed_ value, usually an integer or a finite sequence of integers, and scramble it 
-to populate the internal state of the pseudo-random generator. 
+States initialization algorithms take user provided _seed_ value, usually an integer or a finite sequence of integers, and scramble it
+to populate the internal state of the pseudo-random generator.
 
 The sequence from the pseudo-random generator, unlike from true random generator, is repeatable, provided the internal state can be
 saved and restored, or initialized to the same state.
diff --git a/examples/README.md b/examples/README.md
index 845b1d6..8aecf94 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -8,13 +8,13 @@ Each worker gets `rs` and `n` arguments, `rs` representing RandomState object as
 and `n` being the size of the problem. `rs` is used to generate samples of size `n`, perform Monte-Carlo
 estimate(s) based on the sample and return.
 
-After run is complete, a generator is returns that contains results of each worker. 
+After run is complete, a generator is returns that contains results of each worker.
 
 This data is post-processed as necessary for the application.
 
 ## Stick triangle problem
 
-Code is tested to estimate the probability that 3 segments, obtained by splitting a unit stick 
+Code is tested to estimate the probability that 3 segments, obtained by splitting a unit stick
 in two randomly chosen places, can be sides of a triangle. This probability is known in closed form to be $\frac{1}{4}$.
 
 Run python script "stick_triangle.py" to estimate this probability using parallel Monte-Carlo algorithm:
@@ -33,8 +33,8 @@ Execution time: 64.043 seconds
 
 ## Stick tetrahedron problem
 
-Code is used to estimate the probability that 6 segments, obtained by splitting a unit stick in 
-5 random chosen places, can be sides of a tetrahedron. 
+Code is used to estimate the probability that 6 segments, obtained by splitting a unit stick in
+5 random chosen places, can be sides of a tetrahedron.
 
 The probability is not known in closed form. See
 [math.stackexchange.com/questions/351913](https://math.stackexchange.com/questions/351913/probability-that-a-stick-randomly-broken-in-five-places-can-form-a-tetrahedron) for more details.
diff --git a/examples/arg_parsing.py b/examples/arg_parsing.py
index 754b7ae..f0a5ec8 100644
--- a/examples/arg_parsing.py
+++ b/examples/arg_parsing.py
@@ -1,13 +1,14 @@
 import argparse
 
-__all__ = ['parse_arguments']
+__all__ = ["parse_arguments"]
+
 
 def pos_int(s):
     v = int(s)
     if v > 0:
         return v
     else:
-        raise argparse.ArgumentTypeError('%r is not a positive integer' % s)
+        raise argparse.ArgumentTypeError("%r is not a positive integer" % s)
 
 
 def nonneg_int(s):
@@ -15,22 +16,61 @@ def nonneg_int(s):
     if v >= 0:
         return v
     else:
-        raise argparse.ArgumentTypeError('%r is not a non-negative integer' % s)
+        raise argparse.ArgumentTypeError("%r is not a non-negative integer" % s)
 
 
 def parse_arguments():
     argParser = argparse.ArgumentParser(
         prog="stick_tetrahedron.py",
-        description="Monte-Carlo estimation of probability that 6 segments of a stick randomly broken in 5 places can form a tetrahedron.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    argParser.add_argument('-s', '--seed',        default=7777,   type=pos_int,    help="Random seed to initialize algorithms from MT2203 family")
-    argParser.add_argument('-b', '--batch_size',  default=65536,  type=pos_int,    help="Batch size for the Monte-Carlo run")
-    argParser.add_argument('-n', '--batch_count', default=2048,   type=pos_int,    help="Number of batches executed in parallel")
-    argParser.add_argument('-p', '--processes',   default=-1,     type=int,        help="Number of processes used to execute batches")
-    argParser.add_argument('-d', '--id_offset',   default=0,      type=nonneg_int, help="Offset for the MT2203/WH algorithms id")
-    argParser.add_argument('-j', '--jump_size',   default=0,      type=nonneg_int, help="Jump size for skip-ahead")
-      
+        description="Monte-Carlo estimation of probability that 6 segments of "
+        "a stick randomly broken in 5 places can form a "
+        "tetrahedron.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    argParser.add_argument(
+        "-s",
+        "--seed",
+        default=7777,
+        type=pos_int,
+        help="Random seed to initialize algorithms from MT2203 family",
+    )
+    argParser.add_argument(
+        "-b",
+        "--batch_size",
+        default=65536,
+        type=pos_int,
+        help="Batch size for the Monte-Carlo run",
+    )
+    argParser.add_argument(
+        "-n",
+        "--batch_count",
+        default=2048,
+        type=pos_int,
+        help="Number of batches executed in parallel",
+    )
+    argParser.add_argument(
+        "-p",
+        "--processes",
+        default=-1,
+        type=int,
+        help="Number of processes used to execute batches",
+    )
+    argParser.add_argument(
+        "-d",
+        "--id_offset",
+        default=0,
+        type=nonneg_int,
+        help="Offset for the MT2203/WH algorithms id",
+    )
+    argParser.add_argument(
+        "-j",
+        "--jump_size",
+        default=0,
+        type=nonneg_int,
+        help="Jump size for skip-ahead",
+    )
+
     args = argParser.parse_args()
 
     return args
diff --git a/examples/parallel_mc.py b/examples/parallel_mc.py
index 04f11fd..de985fd 100644
--- a/examples/parallel_mc.py
+++ b/examples/parallel_mc.py
@@ -1,17 +1,21 @@
 import multiprocessing as mp
 from functools import partial
 
-__all__ = ['parallel_mc_run']
+__all__ = ["parallel_mc_run"]
+
 
 def worker_compute(w_id):
-    "Worker function executed on the spawned slave process"
-    global _local_rs, _worker_mc_compute_func
+    """Worker function executed on the spawned slave process"""
     return _worker_mc_compute_func(_local_rs)
 
 
 def init_worker(w_rs, mc_compute_func=None, barrier=None):
     """Assign process local random state variable `rs` the given value"""
-    assert not '_local_rs' in globals(), "Here comes trouble. Process is not expected to have global variable `_local_rs`"
+    assert "_local_rs" not in globals(), (
+        "Here comes trouble. Process is not "
+        "expected to have global variable "
+        "`_local_rs`"
+    )
 
     global _local_rs, _worker_mc_compute_func
     _local_rs = w_rs
@@ -19,21 +23,27 @@ def init_worker(w_rs, mc_compute_func=None, barrier=None):
     # wait to ensure that the assignment takes place for each worker
     barrier.wait()
 
+
 def parallel_mc_run(random_states, n_workers, n_batches, mc_func):
     """
-    Given iterable `random_states` of length `n_workers`, the number of batches `n_batches`,
-    and the function `worker_compute` to execute, return iterator with results returned by 
-    the supplied function. The function is expected to conform to signature f(worker_id), 
-    and has access to worker-local global variable `rs`, containing worker's random states.
+    Given iterable `random_states` of length `n_workers`, the number of batches
+    `n_batches`, and the function `worker_compute` to execute, return iterator
+    with results returned by the supplied function. The function is expected to
+    conform to signature f(worker_id), and has access to worker-local global
+    variable `rs`, containing worker's random states.
     """
     # use of Barrier ensures that every worker gets one
 
     with mp.Manager() as manager:
         b = manager.Barrier(n_workers)
-    
+
         with mp.Pool(processes=n_workers) as pool:
             # 1. map over every worker once to distribute RandomState instances
-            pool.map(partial(init_worker, mc_compute_func=mc_func, barrier=b), random_states, chunksize=1)
+            pool.map(
+                partial(init_worker, mc_compute_func=mc_func, barrier=b),
+                random_states,
+                chunksize=1,
+            )
             # 2. Perform computations on workers
             r = pool.map(worker_compute, range(n_batches), chunksize=1)
 
diff --git a/examples/parallel_random_states.py b/examples/parallel_random_states.py
index cdcaf72..e5f2be9 100644
--- a/examples/parallel_random_states.py
+++ b/examples/parallel_random_states.py
@@ -2,16 +2,21 @@
 
 
 def build_MT2203_random_states(seed, id0, n_workers):
-    # Create instances of RandomState for each worker process from MT2203 family of generators
-    return (rnd.RandomState(seed, brng=('MT2203', id0 + idx)) for idx in range(n_workers))
+    # Create instances of RandomState for each worker process from MT2203
+    # family of generators
+    return (
+        rnd.RandomState(seed, brng=("MT2203", id0 + idx))
+        for idx in range(n_workers)
+    )
 
 
 def build_SFMT19937_random_states(seed, jump_size, n_workers):
     import copy
-    # Create instances of RandomState for each worker process from MT2203 family of generators
-    rs = rnd.RandomState(seed, brng='SFMT19937')
+
+    # Create instances of RandomState for each worker process from MT2203
+    # family of generators
+    rs = rnd.RandomState(seed, brng="SFMT19937")
     yield copy.copy(rs)
     for _ in range(1, n_workers):
         rs.skipahead(jump_size)
         yield copy.copy(rs)
-
diff --git a/examples/stick_tetrahedron.py b/examples/stick_tetrahedron.py
index c01cf6b..a38d675 100644
--- a/examples/stick_tetrahedron.py
+++ b/examples/stick_tetrahedron.py
@@ -1,18 +1,20 @@
 import numpy as np
-from parallel_mc import parallel_mc_run, sequential_mc_run
+from arg_parsing import parse_arguments
+from parallel_mc import parallel_mc_run
 from parallel_random_states import build_MT2203_random_states
 from sticky_math import mc_six_piece_stick_tetrahedron_prob
-from arg_parsing import parse_arguments
+
 
 def mc_runner(rs, batch_size=None):
     return mc_six_piece_stick_tetrahedron_prob(rs, batch_size)
 
+
 def aggregate_mc_counts(counts, n_batches, batch_size):
     ps = counts / batch_size
     # compute sample estimator's mean and standard deviation
     p_est = ps.mean()
-    p_std = ps.std()/np.sqrt(batches)
-    
+    p_std = ps.std() / np.sqrt(n_batches)
+
     # compute parameters for Baysean posterior of the probability
     event_count = 0
     nonevent_count = 0
@@ -22,28 +24,37 @@ def aggregate_mc_counts(counts, n_batches, batch_size):
 
     assert event_count >= 0
     assert nonevent_count >= 0
-    return (p_est, p_std, event_count, nonevent_count) 
+    return (p_est, p_std, event_count, nonevent_count)
 
 
 def print_result(p_est, p_std, mc_size):
-    dig = 3 - int(np.log10(p_std)) # only show 3 digits past width of confidence interval
+    dig = 3 - int(
+        np.log10(p_std)
+    )  # only show 3 digits past width of confidence interval
     frm_str = "{0:0." + str(dig) + "f}"
 
     print(("Monte-Carlo estimate of probability: " + frm_str).format(p_est))
-    print(("Population estimate of the estimator's standard deviation: " + frm_str).format(p_std))
-    print(("Expected standard deviation of the estimator: " + frm_str).format(np.sqrt(p_est * (1-p_est)/mc_size)))
+    print(
+        (
+            "Population estimate of the estimator's standard deviation: "
+            + frm_str
+        ).format(p_std)
+    )
+    print(
+        ("Expected standard deviation of the estimator: " + frm_str).format(
+            np.sqrt(p_est * (1 - p_est) / mc_size)
+        )
+    )
     print("Total MC size: {}".format(mc_size))
-    
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     import multiprocessing as mp
-    from itertools import repeat
-    from timeit import default_timer as timer
-    import sys
     from functools import partial
+    from timeit import default_timer as timer
 
     args = parse_arguments()
-    
+
     seed = args.seed
     n_workers = args.processes
     if n_workers <= 0:
@@ -53,25 +64,35 @@ def print_result(p_est, p_std, mc_size):
     batches = args.batch_count
     id0 = args.id_offset
     print("Parallel Monte-Carlo estimation of stick tetrahedron probability")
-    print("Input parameters: -s {seed} -b {batchSize} -n {numBatches} -p {processes} -d {idOffset}".format(
-        seed=args.seed, batchSize=args.batch_size, numBatches=args.batch_count, processes=n_workers, idOffset=args.id_offset))
+    print(
+        f"Input parameters: -s {args.seed} -b {args.batch_size} -n "
+        f"{args.batch_count} -p {n_workers} -d {args.id_offset}"
+    )
     print("")
 
     t0 = timer()
 
     rss = build_MT2203_random_states(seed, id0, n_workers)
 
-    r = parallel_mc_run(rss, n_workers, batches, partial(mc_runner, batch_size=batch_size))
-    # r = sequential_mc_run(rss, n_workers, batches, partial(mc_runner, batch_size=batch_size))
+    r = parallel_mc_run(
+        rss, n_workers, batches, partial(mc_runner, batch_size=batch_size)
+    )
+    # r = sequential_mc_run(rss, n_workers, batches,
+    # partial(mc_runner, batch_size=batch_size))
 
     # retrieve values of estimates into numpy array
     counts = np.fromiter(r, dtype=np.double)
-    p_est, p_std, event_count, nonevent_count = aggregate_mc_counts(counts, batches, batch_size)
+    p_est, p_std, event_count, nonevent_count = aggregate_mc_counts(
+        counts, batches, batch_size
+    )
 
     t1 = timer()
 
     print_result(p_est, p_std, batches * batch_size)
     print("")
-    print("Bayesian posterior beta distribution parameters: ({0}, {1})".format(event_count, nonevent_count))
+    print(
+        "Bayesian posterior beta distribution parameters: "
+        f"({event_count}, {nonevent_count})"
+    )
     print("")
-    print("Execution time: {0:0.3f} seconds".format(t1-t0))
+    print(f"Execution time: {t1 - t0:0.3f} seconds")
diff --git a/examples/stick_triangle.py b/examples/stick_triangle.py
index 865fa9a..2bbbbd8 100644
--- a/examples/stick_triangle.py
+++ b/examples/stick_triangle.py
@@ -1,33 +1,44 @@
 import numpy as np
+
 import mkl_random as rnd
 
-__doc__ = """
-Let's solve a classic problem of MC-estimating a probability that 3 segments of a unit stick randomly broken in 2 places can form a triangle. 
-Let $u_1$ and $u_2$ be standard uniform random variables, denoting positions where the stick has been broken.
+__doc__ = r"""
+Let's solve a classic problem of MC-estimating a probability that 3 segments
+of a unit stick randomly broken in 2 places can form a triangle.
+Let $u_1$ and $u_2$ be standard uniform random variables, denoting positions
+where the stick has been broken.
 
-Let $w_1 = \min(u_1, u_2)$ and $w_2 = \max(u_1, u_2)$. Then, length of segments are $x_1 = w_1$, $x_2 = w_2-w_1$, $x_3 = 1-w_2$. 
+Let $w_1 = \min(u_1, u_2)$ and $w_2 = \max(u_1, u_2)$. Then, length of segments
+are $x_1 = w_1$, $x_2 = w_2-w_1$, $x_3 = 1-w_2$.
 These lengths must satisfy triangle inequality.
 
 The closed form result is known to be $\frac{1}{4}$.
 
 """
 
+
 def triangle_inequality(x1, x2, x3):
-    """Efficiently finds `np.less(x1,x2+x3)*np.less(x2,x1+x3)*np.less(x3,x1+x2)`"""
+    """
+    Efficiently finds
+    `np.less(x1, x2 + x3) * np.less(x2, x1 + x3) * np.less(x3, x1 + x2)`
+    """
     tmp_sum = x2 + x3
-    res = np.less(x1, tmp_sum)   # x1 < x2 + x3
+    res = np.less(x1, tmp_sum)  # x1 < x2 + x3
     np.add(x1, x3, out=tmp_sum)
-    buf = np.less(x2, tmp_sum)   # x2 < x1 + x3
+    buf = np.less(x2, tmp_sum)  # x2 < x1 + x3
     np.logical_and(res, buf, out=res)
     np.add(x1, x2, out=tmp_sum)
-    np.less(x3, tmp_sum, out=buf) # x3 < x1 + x2
+    np.less(x3, tmp_sum, out=buf)  # x3 < x1 + x2
     np.logical_and(res, buf, out=res)
     return res
 
 
 def mc_dist(rs, n):
-    """Monte Carlo estimate of probability on sample of size `n`, using given random state object `rs`"""
-    ws = np.sort(rs.rand(2,n), axis=0)
+    """
+    Monte Carlo estimate of probability on sample of size `n`, using given
+    random state object `rs`
+    """
+    ws = np.sort(rs.rand(2, n), axis=0)
     x2 = np.empty(n, dtype=np.double)
     x3 = np.empty(n, dtype=np.double)
 
@@ -41,7 +52,10 @@ def mc_dist(rs, n):
 
 def init_worker(w_rs, barrier=None):
     """Assign process local random state variable `rs` the given value"""
-    assert not 'rs' in globals(), "Here comes trouble. Process is not expected to have global variable `rs`"
+    assert "rs" not in globals(), (
+        "Here comes trouble. Process is not "
+        "expected to have global variable `rs`"
+    )
 
     global rs
     rs = w_rs
@@ -53,23 +67,28 @@ def worker_compute(w_id, batch_size=None):
     return mc_dist(rs, batch_size)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import multiprocessing as mp
-    from itertools import repeat
-    from timeit import default_timer as timer
     from functools import partial
+    from timeit import default_timer as timer
 
     seed = 77777
     n_workers = 12
     batch_size = 1024 * 256
     batches = 10000
     print("Parallel Monte-Carlo estimation of stick triangle probability")
-    print(f"Parameters: n_workers={n_workers}, batch_size={batch_size}, n_batches={batches}, seed={seed}")
+    print(
+        f"Parameters: n_workers={n_workers}, batch_size={batch_size}, "
+        f"n_batches={batches}, seed={seed}"
+    )
     print("")
 
     t0 = timer()
-    # Create instances of RandomState for each worker process from MT2203 family of generators
-    rss = [ rnd.RandomState(seed, brng=('MT2203', idx)) for idx in range(n_workers) ]
+    # Create instances of RandomState for each worker process from MT2203
+    # family of generators
+    rss = [
+        rnd.RandomState(seed, brng=("MT2203", idx)) for idx in range(n_workers)
+    ]
     with mp.Manager() as manager:
         # use of Barrier ensures that every worker gets one
         b = manager.Barrier(n_workers)
@@ -78,7 +97,11 @@ def worker_compute(w_id, batch_size=None):
             # map over every worker once to distribute RandomState instances
             pool.map(partial(init_worker, barrier=b), rss, chunksize=1)
             # Perform computations on workers
-            r = pool.map(partial(worker_compute, batch_size=batch_size), range(batches), chunksize=1)
+            r = pool.map(
+                partial(worker_compute, batch_size=batch_size),
+                range(batches),
+                chunksize=1,
+            )
 
     # retrieve values of estimates into numpy array
     ps = np.fromiter(r, dtype=np.double)
@@ -90,6 +113,15 @@ def worker_compute(w_id, batch_size=None):
     dig = 3 - int(np.log10(pop_std))
     frm_str = "{0:0." + str(dig) + "f}"
     print(("Monte-Carlo estimate of probability: " + frm_str).format(p_est))
-    print(("Population estimate of the estimator's standard deviation: " + frm_str).format(pop_std))
-    print(("Expected standard deviation of the estimator: " + frm_str).format(np.sqrt(p_est * (1-p_est)/batch_size)))
-    print("Execution time: {0:0.3f} seconds".format(t1-t0))
+    print(
+        (
+            "Population estimate of the estimator's standard deviation: "
+            + frm_str
+        ).format(pop_std)
+    )
+    print(
+        ("Expected standard deviation of the estimator: " + frm_str).format(
+            np.sqrt(p_est * (1 - p_est) / batch_size)
+        )
+    )
+    print("Execution time: {0:0.3f} seconds".format(t1 - t0))
diff --git a/examples/sticky_math.py b/examples/sticky_math.py
index 6a92be5..c851d16 100644
--- a/examples/sticky_math.py
+++ b/examples/sticky_math.py
@@ -1,42 +1,52 @@
 import numpy as np
 
 __doc__ = """
-https://math.stackexchange.com/questions/351913/probability-that-a-stick-randomly-broken-in-five-places-can-form-a-tetrahedron
-
-Choose 5 locations on a stick to break it into 6 pieces. What is the probability that these 6 pieces can be edge-lengths of a 
-tetrahedron (3D symplex).
+https://math.stackexchange.com/questions/351913/probability-that-a-stick-randomly-broken-in-five-places-can-form-a-tetrahedron  # noqa: E501,W505
 
+Choose 5 locations on a stick to break it into 6 pieces. What is the
+probability that these 6 pieces can be edge-lengths of a tetrahedron
+(3D symplex).
 """
 
-__all__ = ['mc_three_piece_stick_triangle_prob', 'mc_six_piece_stick_tetrahedron_prob']
+__all__ = [
+    "mc_three_piece_stick_triangle_prob",
+    "mc_six_piece_stick_tetrahedron_prob",
+]
+
 
 def triangle_inequality_(x1, x2, x3):
-    """Efficiently finds `np.less(x1,x2+x3)*np.less(x2,x1+x3)*np.less(x3,x1+x2)`"""
+    """
+    Efficiently finds
+    `np.less(x1,x2+x3)*np.less(x2,x1+x3)*np.less(x3,x1+x2)`
+    """
     tmp_sum = x2 + x3
-    res = np.less(x1, tmp_sum)   # x1 < x2 + x3
+    res = np.less(x1, tmp_sum)  # x1 < x2 + x3
     np.add(x1, x3, out=tmp_sum)
-    buf = np.less(x2, tmp_sum)   # x2 < x1 + x3
+    buf = np.less(x2, tmp_sum)  # x2 < x1 + x3
     np.logical_and(res, buf, out=res)
     np.add(x1, x2, out=tmp_sum)
-    np.less(x3, tmp_sum, out=buf) # x3 < x1 + x2
+    np.less(x3, tmp_sum, out=buf)  # x3 < x1 + x2
     np.logical_and(res, buf, out=res)
     return res
 
 
 def triangle_inequality(x1, x2, x3, out=None):
-    """Efficiently finds `np.less(x1,x2+x3)*np.less(x2,x1+x3)*np.less(x3,x1+x2)`, 
-       logically ending this on top of out array, if any"""
+    """
+    Efficiently finds
+    `np.less(x1, x2 + x3) * np.less(x2, x1 + x3) * np.less(x3, x1 + x2)`,
+    logically ending this on top of out array, if any
+    """
     if out is None:
         return triangle_inequality_(x1, x2, x3)
     res = out
     tmp_sum = x2 + x3
-    buf = np.less(x1, tmp_sum)   # x1 < x2 + x3
+    buf = np.less(x1, tmp_sum)  # x1 < x2 + x3
     np.logical_and(res, buf, out=res)
     np.add(x1, x3, out=tmp_sum)
-    np.less(x2, tmp_sum, out=buf)   # x2 < x1 + x3
+    np.less(x2, tmp_sum, out=buf)  # x2 < x1 + x3
     np.logical_and(res, buf, out=res)
     np.add(x1, x2, out=tmp_sum)
-    np.less(x3, tmp_sum, out=buf) # x3 < x1 + x2
+    np.less(x3, tmp_sum, out=buf)  # x3 < x1 + x2
     np.logical_and(res, buf, out=res)
     return res
 
@@ -44,13 +54,14 @@ def triangle_inequality(x1, x2, x3, out=None):
 def facial_tetrahedron(x, y, z, xb, yb, zb):
     """
     Computes boolean mask for facial tetrahedron condition for six side-lengths
-    This condition is necessary, but not sufficient for 3 sticks to form a tetrahedon yet,
-    it needs to be supplemented with positivity of Cayley-Manger determinant.
+    This condition is necessary, but not sufficient for 3 sticks to form a
+    tetrahedon yet, it needs to be supplemented with positivity of
+    Cayley-Manger determinant.
     """
-    success_mask = triangle_inequality(x, y, zb)        # x, y, zb
-    triangle_inequality(x, y, zb, out = success_mask)   # x, yb, z
-    triangle_inequality(xb, y, z, out = success_mask)   # xb, y, z
-    triangle_inequality(xb, yb, zb, out = success_mask) # xb, yb, zb
+    success_mask = triangle_inequality(x, y, zb)  # x, y, zb
+    triangle_inequality(x, y, zb, out=success_mask)  # x, yb, z
+    triangle_inequality(xb, y, z, out=success_mask)  # xb, y, z
+    triangle_inequality(xb, yb, zb, out=success_mask)  # xb, yb, zb
     return success_mask
 
 
@@ -58,25 +69,30 @@ def cayley_menger_mat(x2, y2, z2, xb2, yb2, zb2):
     """
     Menger's determinant.
 
-    If positive, there exist 4 points in R^3, with pair-wise distances squared equal to given 6 arguments.
+    If positive, there exist 4 points in R^3, with pair-wise distances squared
+    equal to given 6 arguments.
 
-    K. Wirth, A.S. Dreiding, Edge lengths determining tetrahedrons, Elemente der Mathematic, vol. 64 (2009) pp. 160-170.
+    K. Wirth, A.S. Dreiding, Edge lengths determining tetrahedrons,
+    Elemente der Mathematic, vol. 64 (2009) pp. 160-170.
     """
     one = np.ones_like(x2)
     zero = np.zeros_like(x2)
-    mat = np.array([[zero, x2, y2, z2, one], 
-                    [x2, zero, zb2, yb2, one], 
-                    [y2, zb2, zero, xb2, one], 
-                    [z2, yb2, xb2, zero, one], 
-                    [one, one, one, one, zero]
-    ]).T
+    mat = np.array(
+        [
+            [zero, x2, y2, z2, one],
+            [x2, zero, zb2, yb2, one],
+            [y2, zb2, zero, xb2, one],
+            [z2, yb2, xb2, zero, one],
+            [one, one, one, one, zero],
+        ]
+    ).T
     return mat
 
 
 def cayley_menger_det_no_linalg(x2, y2, z2, xb2, yb2, zb2):
     """
     D(S) = 2 * x2 * xb2 * (y2 + yb2 + z2 + zb2 - x2 - xb2) +
-           2 * y2 * yb2 * (z2 + zb2 + x2 + xb2 - y2 - yb2) + 
+           2 * y2 * yb2 * (z2 + zb2 + x2 + xb2 - y2 - yb2) +
            2 * z2 * zb2 * (x2 + xb2 + y2 + yb2 - z2 - zb2) +
            (x2 - xb2) * (y2 - yb2) * (z2 - zb2) -
            (x2 + xb2) * (x2 + xb2) * (z2 + zb2)
@@ -87,29 +103,29 @@ def cayley_menger_det_no_linalg(x2, y2, z2, xb2, yb2, zb2):
     buf1 = ys + zs
     buf1 -= xs
     buf2 = x2 * xb2
-    buf1 *= buf2 # buf1 has first term, halved
+    buf1 *= buf2  # buf1 has first term, halved
     np.multiply(y2, yb2, out=buf2)
     buf3 = xs + zs
     buf3 -= ys
-    buf2 *= buf3 # buf2 has second term
-    buf1 += buf2 # buf1 is sum of two terms, halved
+    buf2 *= buf3  # buf2 has second term
+    buf1 += buf2  # buf1 is sum of two terms, halved
     np.multiply(z2, zb2, out=buf3)
-    np.add(xs, ys, out=buf2) # reuse buf2
+    np.add(xs, ys, out=buf2)  # reuse buf2
     buf2 -= zs
-    buf3 *= buf2 # buf3 has third term
-    buf1 += buf3 # buf1 is sum of 3 first terms, halved
+    buf3 *= buf2  # buf3 has third term
+    buf1 += buf3  # buf1 is sum of 3 first terms, halved
     buf1 *= 2
     np.subtract(x2, xb2, out=buf2)
     np.subtract(y2, yb2, out=buf3)
     buf2 *= buf3
     np.subtract(z2, zb2, out=buf3)
     buf2 *= buf3
-    buf1 += buf2 # buf1 is sum of 4 first terms
+    buf1 += buf2  # buf1 is sum of 4 first terms
     np.multiply(xs, ys, out=buf3)
     buf3 *= zs
     buf1 -= buf3
     return buf1
-    
+
 
 def cayley_menger_cond(x2, y2, z2, xb2, yb2, zb2):
     # return np.linalg.det(cayley_menger_mat(x2, y2, z2, xb2, yb2, zb2)) > 0
@@ -118,17 +134,17 @@ def cayley_menger_cond(x2, y2, z2, xb2, yb2, zb2):
 
 def mc_six_piece_stick_tetrahedron_prob(rs, n):
     """
-    Monte-Carlo estimate of the probability that a unit stick, randomly broken in 5 places (making 6 pieces), 
-    can form a tetrahedron.
+    Monte-Carlo estimate of the probability that a unit stick, randomly broken
+    in 5 places (making 6 pieces), can form a tetrahedron.
 
-    Using provided random state instance `rs` routine generates `n` samples, and outputs the number of 
-    tetrahedral 6-tuples.
+    Using provided random state instance `rs` routine generates `n` samples,
+    and outputs the number of tetrahedral 6-tuples.
     """
-    u = rs.rand(6,n)
+    u = rs.rand(6, n)
     u[0, :] = 1
     np.log(u[1], out=u[1])
     u[1] /= 5
-    np.exp(u[1], out=u[1]) # np.power(u[1], 1/5, out=u[1])
+    np.exp(u[1], out=u[1])  # np.power(u[1], 1/5, out=u[1])
     np.sqrt(u[2], out=u[2])
     np.sqrt(u[2], out=u[2])
     np.cbrt(u[3], out=u[3])
@@ -141,21 +157,22 @@ def mc_six_piece_stick_tetrahedron_prob(rs, n):
     u[4] -= u[5]
 
     success_mask = facial_tetrahedron(u[0], u[1], u[2], u[3], u[4], u[5])
-    np.square(u, out=u) # only squares enter Cayler-Manger determinant
+    np.square(u, out=u)  # only squares enter Cayler-Manger determinant
     cm_mask = cayley_menger_cond(u[0], u[1], u[2], u[3], u[4], u[5])
     np.logical_and(success_mask, cm_mask, out=success_mask)
-    
+
     return success_mask.sum()
 
 
 def mc_three_piece_stick_triangle_prob(rs, n):
     """
-    Monte-Carlo estimate of probability that a unit stick, randomly broken in 2 places (making 3 pieces),
-    corresponds to a triple of sides of a triangle.
+    Monte-Carlo estimate of probability that a unit stick, randomly broken in 2
+    places (making 3 pieces), corresponds to a triple of sides of a triangle.
 
-    Using provided random state instance `rs` routine generates `n` samples, and outputs the number of 
-    triangular 3-tuples."""
-    ws = np.sort(rs.rand(2,n), axis=0)
+    Using provided random state instance `rs` routine generates `n` samples,
+    and outputs the number of triangular 3-tuples.
+    """
+    ws = np.sort(rs.rand(2, n), axis=0)
     x2 = np.empty(n, dtype=np.double)
     x3 = np.empty(n, dtype=np.double)
 
diff --git a/mkl_random/__init__.py b/mkl_random/__init__.py
index b15fec7..a26b3c6 100644
--- a/mkl_random/__init__.py
+++ b/mkl_random/__init__.py
@@ -24,72 +24,73 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import division, absolute_import, print_function
+from __future__ import absolute_import, division, print_function
 
 from . import _init_helper
-
+from ._version import __version__
 from .mklrand import (
     MKLRandomState,
     RandomState,
-    seed,
-    get_state,
-    set_state,
-    random_sample,
-    choice,
-    randint,
+    beta,
+    binomial,
     bytes,
-    uniform,
+    chisquare,
+    choice,
+    dirichlet,
+    exponential,
+    f,
+    gamma,
+    geometric,
+    get_state,
+    gumbel,
+    hypergeometric,
+    laplace,
+    logistic,
+    lognormal,
+    logseries,
+    multinomial,
+    multinormal_cholesky,
+    multivariate_normal,
+    negative_binomial,
+    noncentral_chisquare,
+    noncentral_f,
+    normal,
+    pareto,
+    permutation,
+    poisson,
+    power,
     rand,
+    randint,
     randn,
     random_integers,
-    standard_normal,
-    normal,
-    beta,
-    exponential,
+    random_sample,
+    rayleigh,
+    seed,
+    set_state,
+    shuffle,
+    standard_cauchy,
     standard_exponential,
     standard_gamma,
-    gamma,
-    f,
-    noncentral_f,
-    chisquare,
-    noncentral_chisquare,
-    standard_cauchy,
+    standard_normal,
     standard_t,
+    triangular,
+    uniform,
     vonmises,
-    pareto,
-    weibull,
-    power,
-    laplace,
-    gumbel,
-    logistic,
-    lognormal,
-    rayleigh,
     wald,
-    triangular,
-    binomial,
-    negative_binomial,
-    poisson,
+    weibull,
     zipf,
-    geometric,
-    hypergeometric,
-    logseries,
-    multivariate_normal,
-    multinormal_cholesky,
-    multinomial,
-    dirichlet,
-    shuffle,
-    permutation,
 )
-from ._version import __version__
 
 try:
     from numpy.testing.nosetester import _numpy_tester
+
     test = _numpy_tester().test
     bench = _numpy_tester().bench
     del _numpy_tester
 except ModuleNotFoundError:
     # Pytest testing
     from numpy._pytesttester import PytestTester
+
     test = PytestTester(__name__)
     del PytestTester
 
diff --git a/mkl_random/_version.py b/mkl_random/_version.py
index 2d77b27..2e16e46 100644
--- a/mkl_random/_version.py
+++ b/mkl_random/_version.py
@@ -1 +1 @@
-__version__ = '1.4.0dev1'
+__version__ = "1.4.0dev1"
diff --git a/mkl_random/interfaces/_numpy_random.py b/mkl_random/interfaces/_numpy_random.py
index 2a4c7b6..c12b28b 100644
--- a/mkl_random/interfaces/_numpy_random.py
+++ b/mkl_random/interfaces/_numpy_random.py
@@ -32,7 +32,9 @@
 import mkl_random
 
 
-class RandomState(mkl_random.mklrand._MKLRandomState):
+class RandomState(
+    mkl_random.mklrand._MKLRandomState
+):  # pylint: disable=maybe-no-member
     """
     RandomState(seed=None)
 
@@ -52,9 +54,10 @@ class RandomState(mkl_random.mklrand._MKLRandomState):
 
     References
     -----
-    MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html
+    MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html  # noqa: E501,W505
 
     """
+
     def __init__(self, seed=None):
         super().__init__(seed=seed, brng="MT19937")
 
@@ -93,7 +96,7 @@ def set_state(self, state):
         set_state(state)
 
         Set the internal state of the generator.
-        
+
         For full documentation refer to `numpy.random.set_state`.
 
         *Compatibility Notice*
@@ -124,7 +127,7 @@ def randint(self, low, high=None, size=None, dtype=int):
         randint(low, high=None, size=None, dtype=int)
 
         Return random integers from `low` (inclusive) to `high` (exclusive).
-        
+
         For full documentation refer to `numpy.random.randint`.
 
         """
@@ -227,7 +230,11 @@ def beta(self, a, b, size=None):
         For full documentation refer to `numpy.random.beta`.
 
         """
-        return super().beta(a=a, b=b, size=size)
+        return super().beta(
+            a=a,
+            b=b,
+            size=size,
+        )
 
     def exponential(self, scale=1.0, size=None):
         """
@@ -354,7 +361,8 @@ def pareto(self, a, size=None):
         """
         pareto(a, size=None)
 
-        Draw samples from a Pareto II or Lomax distribution with a scale parameter of 1.
+        Draw samples from a Pareto II or Lomax distribution with a scale
+        parameter of 1.
 
         For full documentation refer to `numpy.random.pareto`.
 
@@ -425,7 +433,9 @@ def lognormal(self, mean=0.0, sigma=1.0, size=None):
         For full documentation refer to `numpy.random.lognormal`.
 
         """
-        return super().lognormal(mean=mean, sigma=sigma, size=size, method="ICDF")
+        return super().lognormal(
+            mean=mean, sigma=sigma, size=size, method="ICDF"
+        )
 
     def rayleigh(self, scale=1.0, size=None):
         """
@@ -524,7 +534,9 @@ def hypergeometric(self, ngood, nbad, nsample, size=None):
         For full documentation refer to `numpy.random.hypergeometric`.
 
         """
-        return super().hypergeometric(ngood=ngood, nbad=nbad, nsample=nsample, size=size)
+        return super().hypergeometric(
+            ngood=ngood, nbad=nbad, nsample=nsample, size=size
+        )
 
     def logseries(self, p, size=None):
         """
@@ -537,7 +549,9 @@ def logseries(self, p, size=None):
         """
         return super().logseries(p=p, size=size)
 
-    def multivariate_normal(self, mean, cov, size=None, check_valid='warn', tol=1e-8):
+    def multivariate_normal(
+        self, mean, cov, size=None, check_valid="warn", tol=1e-8
+    ):
         """
         multivariate_normal(mean, cov, size=None, check_valid='warn', tol=1e-8)
 
@@ -599,10 +613,10 @@ def __NPRandomState_ctor():
     """
     Return a RandomState instance.
     This function exists solely to assist (un)pickling.
-    Note that the state of the RandomState returned here is irrelevant, as this function's
-    entire purpose is to return a newly allocated RandomState whose state pickle can set.
-    Consequently the RandomState returned by this function is a freshly allocated copy
-    with a seed=0.
+    Note that the state of the RandomState returned here is irrelevant, as this
+    function's entire purpose is to return a newly allocated RandomState whose
+    state pickle can set. Consequently the RandomState returned by this
+    function is a freshly allocated copy with a seed=0.
     See https://github.com/numpy/numpy/issues/4763 for a detailed discussion
     """
     return RandomState(seed=0)
diff --git a/mkl_random/interfaces/numpy_random.py b/mkl_random/interfaces/numpy_random.py
index 2c9e780..4383b9f 100644
--- a/mkl_random/interfaces/numpy_random.py
+++ b/mkl_random/interfaces/numpy_random.py
@@ -26,53 +26,53 @@
 
 from ._numpy_random import (
     RandomState,
-    seed,
-    get_state,
-    set_state,
-    random_sample,
-    choice,
-    randint,
+    beta,
+    binomial,
     bytes,
-    uniform,
+    chisquare,
+    choice,
+    dirichlet,
+    exponential,
+    f,
+    gamma,
+    geometric,
+    get_state,
+    gumbel,
+    hypergeometric,
+    laplace,
+    logistic,
+    lognormal,
+    logseries,
+    multinomial,
+    multivariate_normal,
+    negative_binomial,
+    noncentral_chisquare,
+    noncentral_f,
+    normal,
+    pareto,
+    permutation,
+    poisson,
+    power,
     rand,
+    randint,
     randn,
     random_integers,
-    standard_normal,
-    normal,
-    beta,
-    exponential,
+    random_sample,
+    rayleigh,
+    seed,
+    set_state,
+    shuffle,
+    standard_cauchy,
     standard_exponential,
     standard_gamma,
-    gamma,
-    f,
-    noncentral_f,
-    chisquare,
-    noncentral_chisquare,
-    standard_cauchy,
+    standard_normal,
     standard_t,
+    triangular,
+    uniform,
     vonmises,
-    pareto,
-    weibull,
-    power,
-    laplace,
-    gumbel,
-    logistic,
-    lognormal,
-    rayleigh,
     wald,
-    triangular,
-    binomial,
-    negative_binomial,
-    poisson,
+    weibull,
     zipf,
-    geometric,
-    hypergeometric,
-    logseries,
-    multivariate_normal,
-    multinomial,
-    dirichlet,
-    shuffle,
-    permutation,
 )
 
 __all__ = [
diff --git a/mkl_random/mklrand.pyx b/mkl_random/mklrand.pyx
index 9c90360..53b2605 100644
--- a/mkl_random/mklrand.pyx
+++ b/mkl_random/mklrand.pyx
@@ -39,10 +39,10 @@ cdef extern from "Python.h":
 cdef extern from "numpy/npy_no_deprecated_api.h":
     pass
 
-cimport numpy as cnp
-from libc.string cimport memset, memcpy
 cimport cpython.tuple
-cimport cython
+cimport numpy as cnp
+from libc.string cimport memcpy, memset
+
 
 cdef extern from "math.h":
     double floor(double x)
@@ -56,9 +56,13 @@ cdef extern from "mklrand_py_helper.h":
     int is_bytes_object(object b)
 
 cdef extern from "numpy_multiiter_workaround.h":
-    cnp.npy_intp cnp_PyArray_MultiIter_SIZE "workaround_PyArray_MultiIter_SIZE"(cnp.broadcast multi) nogil
-    int cnp_PyArray_MultiIter_NDIM "workaround_PyArray_MultiIter_NDIM"(cnp.broadcast multi) nogil
-    cnp.npy_intp* cnp_PyArray_MultiIter_DIMS "workaround_PyArray_MultiIter_DIMS"(cnp.broadcast multi) nogil
+    cnp.npy_intp cnp_PyArray_MultiIter_SIZE \
+        "workaround_PyArray_MultiIter_SIZE"(cnp.broadcast multi) nogil
+    int cnp_PyArray_MultiIter_NDIM "workaround_PyArray_MultiIter_NDIM"(
+        cnp.broadcast multi
+    ) nogil
+    cnp.npy_intp* cnp_PyArray_MultiIter_DIMS \
+        "workaround_PyArray_MultiIter_DIMS"(cnp.broadcast multi) nogil
 
 cdef extern from "randomkit.h":
 
@@ -86,126 +90,410 @@ cdef extern from "randomkit.h":
     void irk_fill(void *buffer, size_t size, irk_state *state) noexcept nogil
 
     void irk_dealloc_stream(irk_state *state)
-    void irk_seed_mkl(irk_state * state, unsigned int seed, irk_brng_t brng, unsigned int stream_id)
-    void irk_seed_mkl_array(irk_state * state, unsigned int * seed_vec, int seed_len, irk_brng_t brng, unsigned int stream_id)
-    irk_error irk_randomseed_mkl(irk_state * state, irk_brng_t brng, unsigned int stream_id)
+    void irk_seed_mkl(
+        irk_state * state,
+        unsigned int seed,
+        irk_brng_t brng,
+        unsigned int stream_id
+    )
+    void irk_seed_mkl_array(
+        irk_state * state,
+        unsigned int * seed_vec,
+        int seed_len,
+        irk_brng_t brng,
+        unsigned int stream_id
+    )
+    irk_error irk_randomseed_mkl(
+        irk_state * state, irk_brng_t brng, unsigned int stream_id
+    )
     int irk_get_stream_size(irk_state * state) noexcept nogil
     void irk_get_state_mkl(irk_state * state, char * buf)
     int irk_set_state_mkl(irk_state * state, char * buf)
     int irk_get_brng_mkl(irk_state *state) noexcept nogil
-    int irk_get_brng_and_stream_mkl(irk_state *state, unsigned int * stream_id) noexcept nogil
-    int irk_leapfrog_stream_mkl(irk_state *state, int k, int nstreams) noexcept nogil
-    int irk_skipahead_stream_mkl(irk_state *state, long long int nskips) noexcept nogil
+    int irk_get_brng_and_stream_mkl(
+        irk_state *state, unsigned int * stream_id
+    ) noexcept nogil
+    int irk_leapfrog_stream_mkl(
+        irk_state *state, int k, int nstreams
+    ) noexcept nogil
+    int irk_skipahead_stream_mkl(
+        irk_state *state, long long int nskips
+    ) noexcept nogil
 
 
 cdef extern from "mkl_distributions.h":
-    void irk_double_vec(irk_state *state, cnp.npy_intp len, double *res) noexcept nogil
-    void irk_uniform_vec(irk_state *state, cnp.npy_intp len, double *res, double dlow, double dhigh) noexcept nogil
-
-    void irk_normal_vec_BM1(irk_state *state, cnp.npy_intp len, double *res, double mean, double sigma) noexcept nogil
-    void irk_normal_vec_BM2(irk_state *state, cnp.npy_intp len, double *res, double mean, double sigma) noexcept nogil
-    void irk_normal_vec_ICDF(irk_state *state, cnp.npy_intp len, double *res, double mean, double sigma) noexcept nogil
-
-    void irk_standard_normal_vec_BM1(irk_state *state, cnp.npy_intp len, double *res) noexcept nogil
-    void irk_standard_normal_vec_BM2(irk_state *state, cnp.npy_intp len, double *res) noexcept nogil
-    void irk_standard_normal_vec_ICDF(irk_state *state, cnp.npy_intp len, double *res) noexcept nogil
-
-    void irk_standard_exponential_vec(irk_state *state, cnp.npy_intp len, double *res) noexcept nogil
-    void irk_exponential_vec(irk_state *state, cnp.npy_intp len, double *res, double scale) noexcept nogil
-
-    void irk_standard_cauchy_vec(irk_state *state, cnp.npy_intp len, double *res) noexcept nogil
-    void irk_standard_gamma_vec(irk_state *state, cnp.npy_intp len, double *res, double shape) noexcept nogil
-    void irk_gamma_vec(irk_state *state, cnp.npy_intp len, double *res, double shape, double scale) noexcept nogil
-
-    void irk_beta_vec(irk_state *state, cnp.npy_intp len, double *res, double p, double q) noexcept nogil
-
-    void irk_chisquare_vec(irk_state *state, cnp.npy_intp len, double *res, double df) noexcept nogil
-    void irk_standard_t_vec(irk_state *state, cnp.npy_intp len, double *res, double df) noexcept nogil
-
-    void irk_rayleigh_vec(irk_state *state, cnp.npy_intp len, double *res, double sigma) noexcept nogil
-    void irk_pareto_vec(irk_state *state, cnp.npy_intp len, double *res, double alp) noexcept nogil
-    void irk_power_vec(irk_state *state, cnp.npy_intp len, double *res, double alp) noexcept nogil
-    void irk_weibull_vec(irk_state *state, cnp.npy_intp len, double *res, double alp) noexcept nogil
-    void irk_f_vec(irk_state *state, cnp.npy_intp len, double *res, double df_num, double df_den) noexcept nogil
-    void irk_noncentral_chisquare_vec(irk_state *state, cnp.npy_intp len, double *res, double df, double nonc) noexcept nogil
-    void irk_laplace_vec(irk_state *state, cnp.npy_intp len, double *res, double loc, double scale) noexcept nogil
-    void irk_gumbel_vec(irk_state *state, cnp.npy_intp len, double *res, double loc, double scale) noexcept nogil
-    void irk_logistic_vec(irk_state *state, cnp.npy_intp len, double *res, double loc, double scale) noexcept nogil
-    void irk_wald_vec(irk_state *state, cnp.npy_intp len, double *res, double mean, double scale) noexcept nogil
-    void irk_lognormal_vec_ICDF(irk_state *state, cnp.npy_intp len, double *res, double mean, double scale) noexcept nogil
-    void irk_lognormal_vec_BM(irk_state *state, cnp.npy_intp len, double *res, double mean, double scale) noexcept nogil
-    void irk_vonmises_vec(irk_state *state, cnp.npy_intp len, double *res, double mu, double kappa) noexcept nogil
-
-    void irk_noncentral_f_vec(irk_state *state, cnp.npy_intp len, double *res, double df_num, double df_den, double nonc) noexcept nogil
-    void irk_triangular_vec(irk_state *state, cnp.npy_intp len, double *res, double left, double mode, double right) noexcept nogil
-
-    void irk_geometric_vec(irk_state *state, cnp.npy_intp len, int *res, double p) noexcept nogil
-    void irk_negbinomial_vec(irk_state *state, cnp.npy_intp len, int *res, double a, double p) noexcept nogil
-    void irk_binomial_vec(irk_state *state, cnp.npy_intp len, int *res, int n, double p) noexcept nogil
-    void irk_multinomial_vec(irk_state *state, cnp.npy_intp len, int *res, int n, int d, double *pvec) noexcept nogil
-    void irk_hypergeometric_vec(irk_state *state, cnp.npy_intp len, int *res, int ls, int ss, int ms) noexcept nogil
-
-    void irk_poisson_vec_PTPE(irk_state *state, cnp.npy_intp len, int *res, double lam) noexcept nogil
-    void irk_poisson_vec_POISNORM(irk_state *state, cnp.npy_intp len, int *res, double lam) noexcept nogil
-    void irk_poisson_vec_V(irk_state *state, cnp.npy_intp len, int *res, double *lam_vec) noexcept nogil
-
-    void irk_zipf_long_vec(irk_state *state, cnp.npy_intp len, long *res, double alpha) noexcept nogil
-    void irk_logseries_vec(irk_state *state, cnp.npy_intp len, int *res, double theta) noexcept nogil
+    void irk_double_vec(
+        irk_state *state, cnp.npy_intp len, double *res
+    ) noexcept nogil
+    void irk_uniform_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double dlow,
+        double dhigh
+    ) noexcept nogil
+
+    void irk_normal_vec_BM1(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double mean,
+        double sigma
+    ) noexcept nogil
+    void irk_normal_vec_BM2(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double mean,
+        double sigma
+    ) noexcept nogil
+    void irk_normal_vec_ICDF(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double mean,
+        double sigma
+    ) noexcept nogil
+
+    void irk_standard_normal_vec_BM1(
+        irk_state *state, cnp.npy_intp len, double *res
+    ) noexcept nogil
+    void irk_standard_normal_vec_BM2(
+        irk_state *state, cnp.npy_intp len, double *res
+    ) noexcept nogil
+    void irk_standard_normal_vec_ICDF(
+        irk_state *state, cnp.npy_intp len, double *res
+    ) noexcept nogil
+
+    void irk_standard_exponential_vec(
+        irk_state *state, cnp.npy_intp len, double *res
+    ) noexcept nogil
+    void irk_exponential_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double scale
+    ) noexcept nogil
+
+    void irk_standard_cauchy_vec(
+        irk_state *state, cnp.npy_intp len, double *res
+    ) noexcept nogil
+    void irk_standard_gamma_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double shape
+    ) noexcept nogil
+    void irk_gamma_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double shape,
+        double scale
+    ) noexcept nogil
+
+    void irk_beta_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double p, double q
+    ) noexcept nogil
+
+    void irk_chisquare_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double df
+    ) noexcept nogil
+    void irk_standard_t_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double df
+    ) noexcept nogil
+
+    void irk_rayleigh_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double sigma
+    ) noexcept nogil
+    void irk_pareto_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double alp
+    ) noexcept nogil
+    void irk_power_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double alp
+    ) noexcept nogil
+    void irk_weibull_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double alp
+    ) noexcept nogil
+    void irk_f_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double df_num,
+        double df_den
+    ) noexcept nogil
+    void irk_noncentral_chisquare_vec(
+        irk_state *state, cnp.npy_intp len, double *res, double df, double nonc
+    ) noexcept nogil
+    void irk_laplace_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double loc,
+        double scale
+    ) noexcept nogil
+    void irk_gumbel_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double loc,
+        double scale
+    ) noexcept nogil
+    void irk_logistic_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double loc,
+        double scale
+    ) noexcept nogil
+    void irk_wald_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double mean,
+        double scale
+    ) noexcept nogil
+    void irk_lognormal_vec_ICDF(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double mean,
+        double scale
+    ) noexcept nogil
+    void irk_lognormal_vec_BM(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double mean,
+        double scale
+    ) noexcept nogil
+    void irk_vonmises_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double mu,
+        double kappa
+    ) noexcept nogil
+
+    void irk_noncentral_f_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double df_num,
+        double df_den,
+        double nonc
+    ) noexcept nogil
+    void irk_triangular_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        double left,
+        double mode,
+        double right
+    ) noexcept nogil
+
+    void irk_geometric_vec(
+        irk_state *state, cnp.npy_intp len, int *res, double p
+    ) noexcept nogil
+    void irk_negbinomial_vec(
+        irk_state *state, cnp.npy_intp len, int *res, double a, double p
+    ) noexcept nogil
+    void irk_binomial_vec(
+        irk_state *state, cnp.npy_intp len, int *res, int n, double p
+    ) noexcept nogil
+    void irk_multinomial_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        int *res,
+        int n,
+        int d,
+        double *pvec
+    ) noexcept nogil
+    void irk_hypergeometric_vec(
+        irk_state *state, cnp.npy_intp len, int *res, int ls, int ss, int ms
+    ) noexcept nogil
+
+    void irk_poisson_vec_PTPE(
+        irk_state *state, cnp.npy_intp len, int *res, double lam
+    ) noexcept nogil
+    void irk_poisson_vec_POISNORM(
+        irk_state *state, cnp.npy_intp len, int *res, double lam
+    ) noexcept nogil
+    void irk_poisson_vec_V(
+        irk_state *state, cnp.npy_intp len, int *res, double *lam_vec
+    ) noexcept nogil
+
+    void irk_zipf_long_vec(
+        irk_state *state, cnp.npy_intp len, long *res, double alp
+    ) noexcept nogil
+    void irk_logseries_vec(
+        irk_state *state, cnp.npy_intp len, int *res, double theta
+    ) noexcept nogil
 
     # random integers madness
-    void irk_discrete_uniform_vec(irk_state *state, cnp.npy_intp len, int *res, int low, int high) noexcept nogil
-    void irk_discrete_uniform_long_vec(irk_state *state, cnp.npy_intp len, long *res, long low, long high) noexcept nogil
-    void irk_rand_bool_vec(irk_state *state, cnp.npy_intp len, cnp.npy_bool *res, cnp.npy_bool low, cnp.npy_bool high) noexcept nogil
-    void irk_rand_uint8_vec(irk_state *state, cnp.npy_intp len, cnp.npy_uint8 *res, cnp.npy_uint8 low, cnp.npy_uint8 high) noexcept nogil
-    void irk_rand_int8_vec(irk_state *state, cnp.npy_intp len, cnp.npy_int8 *res, cnp.npy_int8 low, cnp.npy_int8 high) noexcept nogil
-    void irk_rand_uint16_vec(irk_state *state, cnp.npy_intp len, cnp.npy_uint16 *res, cnp.npy_uint16 low, cnp.npy_uint16 high) noexcept nogil
-    void irk_rand_int16_vec(irk_state *state, cnp.npy_intp len, cnp.npy_int16 *res, cnp.npy_int16 low, cnp.npy_int16 high) noexcept nogil
-    void irk_rand_uint32_vec(irk_state *state, cnp.npy_intp len, cnp.npy_uint32 *res, cnp.npy_uint32 low, cnp.npy_uint32 high) noexcept nogil
-    void irk_rand_int32_vec(irk_state *state, cnp.npy_intp len, cnp.npy_int32 *res, cnp.npy_int32 low, cnp.npy_int32 high) noexcept nogil
-    void irk_rand_uint64_vec(irk_state *state, cnp.npy_intp len, cnp.npy_uint64 *res, cnp.npy_uint64 low, cnp.npy_uint64 high) noexcept nogil
-    void irk_rand_int64_vec(irk_state *state, cnp.npy_intp len, cnp.npy_int64 *res, cnp.npy_int64 low, cnp.npy_int64 high) noexcept nogil
-
-    void irk_long_vec(irk_state *state, cnp.npy_intp len, long *res) noexcept nogil
+    void irk_discrete_uniform_vec(
+        irk_state *state, cnp.npy_intp len, int *res, int low, int high
+    ) noexcept nogil
+    void irk_discrete_uniform_long_vec(
+        irk_state *state, cnp.npy_intp len, long *res, long low, long high
+    ) noexcept nogil
+    void irk_rand_bool_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_bool *res,
+        cnp.npy_bool low,
+        cnp.npy_bool high
+    ) noexcept nogil
+    void irk_rand_uint8_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_uint8 *res,
+        cnp.npy_uint8 low,
+        cnp.npy_uint8 high
+    ) noexcept nogil
+    void irk_rand_int8_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_int8 *res,
+        cnp.npy_int8 low,
+        cnp.npy_int8 high
+    ) noexcept nogil
+    void irk_rand_uint16_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_uint16 *res,
+        cnp.npy_uint16 low,
+        cnp.npy_uint16 high
+    ) noexcept nogil
+    void irk_rand_int16_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_int16 *res,
+        cnp.npy_int16 low,
+        cnp.npy_int16 high
+    ) noexcept nogil
+    void irk_rand_uint32_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_uint32 *res,
+        cnp.npy_uint32 low,
+        cnp.npy_uint32 high
+    ) noexcept nogil
+    void irk_rand_int32_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_int32 *res,
+        cnp.npy_int32 low,
+        cnp.npy_int32 high
+    ) noexcept nogil
+    void irk_rand_uint64_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_uint64 *res,
+        cnp.npy_uint64 low,
+        cnp.npy_uint64 high
+    ) noexcept nogil
+    void irk_rand_int64_vec(
+        irk_state *state,
+        cnp.npy_intp len,
+        cnp.npy_int64 *res,
+        cnp.npy_int64 low,
+        cnp.npy_int64 high
+    ) noexcept nogil
+
+    void irk_long_vec(
+        irk_state *state, cnp.npy_intp len, long *res
+    ) noexcept nogil
 
     ctypedef enum ch_st_enum:
         MATRIX = 0
         PACKED = 1
         DIAGONAL = 2
 
-    void irk_multinormal_vec_ICDF(irk_state *state, cnp.npy_intp len, double *res, int dim, double *mean_vec, double *ch, ch_st_enum storage_mode) noexcept nogil
-    void irk_multinormal_vec_BM1(irk_state *state, cnp.npy_intp len, double *res, int dim, double *mean_vec, double *ch, ch_st_enum storage_mode) noexcept nogil
-    void irk_multinormal_vec_BM2(irk_state *state, cnp.npy_intp len, double *res, int dim, double *mean_vec, double *ch, ch_st_enum storage_mode) noexcept nogil
-
-
-ctypedef void (* irk_cont0_vec)(irk_state *state, cnp.npy_intp len, double *res) noexcept nogil
-ctypedef void (* irk_cont1_vec)(irk_state *state, cnp.npy_intp len, double *res, double a) noexcept nogil
-ctypedef void (* irk_cont2_vec)(irk_state *state, cnp.npy_intp len, double *res, double a, double b) noexcept nogil
-ctypedef void (* irk_cont3_vec)(irk_state *state, cnp.npy_intp len, double *res, double a, double b, double c) noexcept nogil
-
-ctypedef void (* irk_disc0_vec)(irk_state *state, cnp.npy_intp len, int *res) noexcept nogil
-ctypedef void (* irk_disc0_vec_long)(irk_state *state, cnp.npy_intp len, long *res) noexcept nogil
-ctypedef void (* irk_discnp_vec)(irk_state *state, cnp.npy_intp len, int *res, int n, double a) noexcept nogil
-ctypedef void (* irk_discdd_vec)(irk_state *state, cnp.npy_intp len, int *res, double n, double p) noexcept nogil
-ctypedef void (* irk_discnmN_vec)(irk_state *state, cnp.npy_intp len, int *res, int n, int m, int N) noexcept nogil
-ctypedef void (* irk_discd_vec)(irk_state *state, cnp.npy_intp len, int *res, double a) noexcept nogil
-ctypedef void (* irk_discd_long_vec)(irk_state *state, cnp.npy_intp len, long *res, double a) noexcept nogil
-ctypedef void (* irk_discdptr_vec)(irk_state *state, cnp.npy_intp len, int *res, double *a) noexcept nogil
+    void irk_multinormal_vec_ICDF(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        int dim,
+        double *mean_vec,
+        double *ch,
+        ch_st_enum storage_mode
+    ) noexcept nogil
+    void irk_multinormal_vec_BM1(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        int dim,
+        double *mean_vec,
+        double *ch,
+        ch_st_enum storage_mode
+    ) noexcept nogil
+    void irk_multinormal_vec_BM2(
+        irk_state *state,
+        cnp.npy_intp len,
+        double *res,
+        int dim,
+        double *mean_vec,
+        double *ch,
+        ch_st_enum storage_mode
+    ) noexcept nogil
+
+
+ctypedef void (* irk_cont0_vec)(
+    irk_state *state, cnp.npy_intp len, double *res
+) noexcept nogil
+ctypedef void (* irk_cont1_vec)(
+    irk_state *state, cnp.npy_intp len, double *res, double a
+) noexcept nogil
+ctypedef void (* irk_cont2_vec)(
+    irk_state *state, cnp.npy_intp len, double *res, double a, double b
+) noexcept nogil
+ctypedef void (* irk_cont3_vec)(
+    irk_state *state,
+    cnp.npy_intp len,
+    double *res,
+    double a,
+    double b,
+    double c
+) noexcept nogil
+
+ctypedef void (* irk_disc0_vec)(
+    irk_state *state, cnp.npy_intp len, int *res
+) noexcept nogil
+ctypedef void (* irk_disc0_vec_long)(
+    irk_state *state, cnp.npy_intp len, long *res
+) noexcept nogil
+ctypedef void (* irk_discnp_vec)(
+    irk_state *state, cnp.npy_intp len, int *res, int n, double a
+) noexcept nogil
+ctypedef void (* irk_discdd_vec)(
+    irk_state *state, cnp.npy_intp len, int *res, double n, double p
+) noexcept nogil
+ctypedef void (* irk_discnmN_vec)(
+    irk_state *state, cnp.npy_intp len, int *res, int n, int m, int N
+) noexcept nogil
+ctypedef void (* irk_discd_vec)(
+    irk_state *state, cnp.npy_intp len, int *res, double a
+) noexcept nogil
+ctypedef void (* irk_discd_long_vec)(
+    irk_state *state, cnp.npy_intp len, long *res, double a
+) noexcept nogil
+ctypedef void (* irk_discdptr_vec)(
+    irk_state *state, cnp.npy_intp len, int *res, double *a
+) noexcept nogil
 
 
 cdef int r = cnp._import_array()
 if (r < 0):
     raise ImportError("Failed to import NumPy")
 
-import numpy as np
 import operator
 import warnings
+
+import numpy as np
+
 try:
     from threading import Lock
 except ImportError:
     from dummy_threading import Lock
 
-cdef object vec_cont0_array(irk_state *state, irk_cont0_vec func, object size,
-                        object lock):
+cdef object vec_cont0_array(
+    irk_state *state, irk_cont0_vec func, object size, object lock
+):
     cdef double *array_data
     cdef double res
     cdef cnp.ndarray array "arrayObject"
@@ -223,8 +511,9 @@ cdef object vec_cont0_array(irk_state *state, irk_cont0_vec func, object size,
 
         return array
 
-cdef object vec_cont1_array_sc(irk_state *state, irk_cont1_vec func, object size, double a,
-                        object lock):
+cdef object vec_cont1_array_sc(
+    irk_state *state, irk_cont1_vec func, object size, double a, object lock
+):
     cdef double *array_data
     cdef double res
     cdef cnp.ndarray array "arrayObject"
@@ -243,8 +532,13 @@ cdef object vec_cont1_array_sc(irk_state *state, irk_cont1_vec func, object size
         return array
 
 
-cdef object vec_cont1_array(irk_state *state, irk_cont1_vec func, object size,
-                        cnp.ndarray oa, object lock):
+cdef object vec_cont1_array(
+    irk_state *state,
+    irk_cont1_vec func,
+    object size,
+    cnp.ndarray oa,
+    object lock
+):
     cdef double *array_data
     cdef double *oa_data
     cdef cnp.ndarray array "arrayObject"
@@ -257,20 +551,30 @@ cdef object vec_cont1_array(irk_state *state, irk_cont1_vec func, object size,
     cdef cnp.npy_intp *multi_dims
 
     if size is None:
-        array = <cnp.ndarray>cnp.PyArray_SimpleNew(cnp.PyArray_NDIM(oa),
-                cnp.PyArray_DIMS(oa) , cnp.NPY_DOUBLE)
+        array = <cnp.ndarray>cnp.PyArray_SimpleNew(
+                    cnp.PyArray_NDIM(oa),
+                    cnp.PyArray_DIMS(oa),
+                    cnp.NPY_DOUBLE
+                )
         imax = cnp.PyArray_SIZE(array)
         array_data = <double *>cnp.PyArray_DATA(array)
         itera = <cnp.flatiter>cnp.PyArray_IterNew(<object>oa)
         with lock, nogil:
             for i from 0 <= i < imax:
-                func(state, 1, array_data + i, (<double *>(cnp.PyArray_ITER_DATA(itera)))[0])
+                func(
+                    state,
+                    1,
+                    array_data + i,
+                    (<double *>(cnp.PyArray_ITER_DATA(itera)))[0]
+                )
                 cnp.PyArray_ITER_NEXT(itera)
         arr_obj = <object> array
     else:
         array = <cnp.ndarray>np.empty(size, np.float64)
         array_data = <double *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(2, <void *>array, <void *>oa)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            2, <void *>array, <void *>oa
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
@@ -288,20 +592,28 @@ cdef object vec_cont1_array(irk_state *state, irk_cont1_vec func, object size,
         multi_dims = cnp_PyArray_MultiIter_DIMS(multi)
         multi_shape = cpython.tuple.PyTuple_New(multi_nd)
         for i from 0 <= i < multi_nd:
-            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i]) 
+            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i])
         arr_obj.shape = (multi_shape + arr_obj.shape)[:arr_obj.ndim]
         multi_ndim = len(multi_shape)
-        arr_obj = arr_obj.transpose(tuple(range(multi_ndim, arr_obj.ndim)) + tuple(range(0, multi_ndim)))
+        arr_obj = arr_obj.transpose(
+            tuple(range(multi_ndim, arr_obj.ndim))
+            + tuple(range(0, multi_ndim))
+        )
 
     return arr_obj
 
-cdef object vec_cont2_array_sc(irk_state *state, irk_cont2_vec func, object size, double a,
-                        double b, object lock):
+cdef object vec_cont2_array_sc(
+    irk_state *state,
+    irk_cont2_vec func,
+    object size,
+    double a,
+    double b,
+    object lock
+):
     cdef double *array_data
     cdef double res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res, a, b)
@@ -315,8 +627,14 @@ cdef object vec_cont2_array_sc(irk_state *state, irk_cont2_vec func, object size
 
         return array
 
-cdef object vec_cont2_array(irk_state *state, irk_cont2_vec func, object size,
-                        cnp.ndarray oa, cnp.ndarray ob, object lock):
+cdef object vec_cont2_array(
+    irk_state *state,
+    irk_cont2_vec func,
+    object size,
+    cnp.ndarray oa,
+    cnp.ndarray ob,
+    object lock
+):
     cdef double *array_data
     cdef double *oa_data
     cdef double *ob_data
@@ -329,7 +647,9 @@ cdef object vec_cont2_array(irk_state *state, irk_cont2_vec func, object size,
     cdef cnp.npy_intp *multi_dims
 
     if size is None:
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(2, <void *>oa, <void *>ob)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            2, <void *>oa, <void *>ob
+        )
         array = <cnp.ndarray> cnp.PyArray_SimpleNew(
             cnp_PyArray_MultiIter_NDIM(multi),
             cnp_PyArray_MultiIter_DIMS(multi),
@@ -346,12 +666,16 @@ cdef object vec_cont2_array(irk_state *state, irk_cont2_vec func, object size,
     else:
         array = <cnp.ndarray>np.empty(size, np.float64)
         array_data = <double *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast >cnp.PyArray_MultiIterNew(3, <void*>array, <void *>oa, <void *>ob)
+        multi = <cnp.broadcast >cnp.PyArray_MultiIterNew(
+            3, <void*>array, <void *>oa, <void *>ob
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
 
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(2, <void *>oa, <void *>ob)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            2, <void *>oa, <void *>ob
+        )
         imax = cnp_PyArray_MultiIter_SIZE(multi)
         n = res_size // imax
         with lock, nogil:
@@ -365,21 +689,30 @@ cdef object vec_cont2_array(irk_state *state, irk_cont2_vec func, object size,
         multi_dims = cnp_PyArray_MultiIter_DIMS(multi)
         multi_shape = cpython.tuple.PyTuple_New(multi_nd)
         for i from 0 <= i < multi_nd:
-            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i]) 
+            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i])
         arr_obj.shape = (multi_shape + arr_obj.shape)[:arr_obj.ndim]
         multi_ndim = len(multi_shape)
-        arr_obj = arr_obj.transpose(tuple(range(multi_ndim, arr_obj.ndim)) + tuple(range(0, multi_ndim)))
+        arr_obj = arr_obj.transpose(
+            tuple(range(multi_ndim, arr_obj.ndim))
+            + tuple(range(0, multi_ndim))
+        )
 
     return arr_obj
 
 
-cdef object vec_cont3_array_sc(irk_state *state, irk_cont3_vec func, object size, double a,
-                        double b, double c, object lock):
+cdef object vec_cont3_array_sc(
+    irk_state *state,
+    irk_cont3_vec func,
+    object size,
+    double a,
+    double b,
+    double c,
+    object lock
+):
     cdef double *array_data
     cdef double res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res, a, b, c)
@@ -393,8 +726,15 @@ cdef object vec_cont3_array_sc(irk_state *state, irk_cont3_vec func, object size
 
         return array
 
-cdef object vec_cont3_array(irk_state *state, irk_cont3_vec func, object size,
-                        cnp.ndarray oa, cnp.ndarray ob, cnp.ndarray oc, object lock):
+cdef object vec_cont3_array(
+    irk_state *state,
+    irk_cont3_vec func,
+    object size,
+    cnp.ndarray oa,
+    cnp.ndarray ob,
+    cnp.ndarray oc,
+    object lock
+):
     cdef double *array_data
     cdef double *oa_data
     cdef double *ob_data
@@ -408,27 +748,38 @@ cdef object vec_cont3_array(irk_state *state, irk_cont3_vec func, object size,
     cdef cnp.npy_intp *multi_dims
 
     if size is None:
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(3, <void *>oa, <void *>ob, <void *>oc)
-        array = <cnp.ndarray> cnp.PyArray_SimpleNew(cnp_PyArray_MultiIter_NDIM(multi), cnp_PyArray_MultiIter_DIMS(multi), cnp.NPY_DOUBLE)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            3, <void *>oa, <void *>ob, <void *>oc
+        )
+        array = <cnp.ndarray> cnp.PyArray_SimpleNew(
+            cnp_PyArray_MultiIter_NDIM(multi),
+            cnp_PyArray_MultiIter_DIMS(multi),
+            cnp.NPY_DOUBLE
+        )
         array_data = <double *>cnp.PyArray_DATA(array)
         with lock, nogil:
             for i from 0 <= i < cnp_PyArray_MultiIter_SIZE(multi):
                 oa_data = <double *>cnp.PyArray_MultiIter_DATA(multi, 0)
                 ob_data = <double *>cnp.PyArray_MultiIter_DATA(multi, 1)
                 oc_data = <double *>cnp.PyArray_MultiIter_DATA(multi, 2)
-                func(state, 1, &array_data[i], oa_data[0], ob_data[0], oc_data[0])
+                func(
+                    state, 1, &array_data[i], oa_data[0], ob_data[0], oc_data[0]
+                )
                 cnp.PyArray_MultiIter_NEXT(multi)
         arr_obj = <object>array
     else:
         array = <cnp.ndarray>np.empty(size, np.float64)
         array_data = <double *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(4, <void*>array, <void *>oa,
-                                                <void *>ob, <void *>oc)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            4, <void*>array, <void *>oa, <void *>ob, <void *>oc
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
 
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(3, <void *>oa, <void *>ob, <void *>oc)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            3, <void *>oa, <void *>ob, <void *>oc
+        )
         imax = cnp_PyArray_MultiIter_SIZE(multi)
         n = res_size // imax
         with lock, nogil:
@@ -436,17 +787,27 @@ cdef object vec_cont3_array(irk_state *state, irk_cont3_vec func, object size,
                 oa_data = <double *>cnp.PyArray_MultiIter_DATA(multi, 0)
                 ob_data = <double *>cnp.PyArray_MultiIter_DATA(multi, 1)
                 oc_data = <double *>cnp.PyArray_MultiIter_DATA(multi, 2)
-                func(state, n, array_data + n*i, oa_data[0], ob_data[0], oc_data[0])
+                func(
+                    state,
+                    n,
+                    array_data + n * i,
+                    oa_data[0],
+                    ob_data[0],
+                    oc_data[0]
+                )
                 cnp.PyArray_MultiIter_NEXT(multi)
         arr_obj = <object>array
         multi_nd = cnp_PyArray_MultiIter_NDIM(multi)
         multi_dims = cnp_PyArray_MultiIter_DIMS(multi)
         multi_shape = cpython.tuple.PyTuple_New(multi_nd)
         for i from 0 <= i < multi_nd:
-            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i]) 
+            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i])
         arr_obj.shape = (multi_shape + arr_obj.shape)[:arr_obj.ndim]
         multi_ndim = len(multi_shape)
-        arr_obj = arr_obj.transpose(tuple(range(multi_ndim, arr_obj.ndim)) + tuple(range(0, multi_ndim)))
+        arr_obj = arr_obj.transpose(
+            tuple(range(multi_ndim, arr_obj.ndim))
+            + tuple(range(0, multi_ndim))
+        )
 
     return arr_obj
 
@@ -459,7 +820,6 @@ cdef object vec_long_disc0_array(
     cdef long res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res)
@@ -474,14 +834,17 @@ cdef object vec_long_disc0_array(
 
 
 cdef object vec_discnp_array_sc(
-    irk_state *state, irk_discnp_vec func, object size,
-    int n, double p, object lock
+    irk_state *state,
+    irk_discnp_vec func,
+    object size,
+    int n,
+    double p,
+    object lock
 ):
     cdef int *array_data
     cdef int res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res, n, p)
@@ -495,11 +858,16 @@ cdef object vec_discnp_array_sc(
         return array
 
 
-cdef object vec_discnp_array(irk_state *state, irk_discnp_vec func, object size,
-                         cnp.ndarray on, cnp.ndarray op, object lock):
+cdef object vec_discnp_array(
+    irk_state *state,
+    irk_discnp_vec func,
+    object size,
+    cnp.ndarray on,
+    cnp.ndarray op,
+    object lock
+):
     cdef int *array_data
     cdef cnp.ndarray array "arrayObject"
-    cdef cnp.npy_intp length
     cdef cnp.npy_intp i, n, imax, res_size
     cdef double *op_data
     cdef int *on_data
@@ -511,10 +879,14 @@ cdef object vec_discnp_array(irk_state *state, irk_discnp_vec func, object size,
     cdef int multi_nd_i
 
     if size is None:
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(2, <void *>on, <void *>op)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            2, <void *>on, <void *>op
+        )
         multi_nd_i = cnp_PyArray_MultiIter_NDIM(multi)
         multi_dims = cnp_PyArray_MultiIter_DIMS(multi)
-        array = <cnp.ndarray> cnp.PyArray_SimpleNew(multi_nd_i, multi_dims, cnp.NPY_INT)
+        array = <cnp.ndarray> cnp.PyArray_SimpleNew(
+            multi_nd_i, multi_dims, cnp.NPY_INT
+        )
         array_data = <int *>cnp.PyArray_DATA(array)
         with lock, nogil:
             for i from 0 <= i < cnp_PyArray_MultiIter_SIZE(multi):
@@ -526,12 +898,16 @@ cdef object vec_discnp_array(irk_state *state, irk_discnp_vec func, object size,
     else:
         array = <cnp.ndarray>np.empty(size, np.intc)
         array_data = <int *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(3, <void*>array, <void *>on, <void *>op)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            3, <void*>array, <void *>on, <void *>op
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
 
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(2, <void *>on, <void *>op)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            2, <void *>on, <void *>op
+        )
         imax = cnp_PyArray_MultiIter_SIZE(multi)
         n = res_size // imax
         with lock, nogil:
@@ -545,21 +921,29 @@ cdef object vec_discnp_array(irk_state *state, irk_discnp_vec func, object size,
         multi_dims = cnp_PyArray_MultiIter_DIMS(multi)
         multi_shape = cpython.tuple.PyTuple_New(multi_nd)
         for i from 0 <= i < multi_nd:
-            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i]) 
+            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i])
         arr_obj.shape = (multi_shape + arr_obj.shape)[:arr_obj.ndim]
         multi_ndim = len(multi_shape)
-        arr_obj = arr_obj.transpose(tuple(range(multi_ndim, arr_obj.ndim)) + tuple(range(0, multi_ndim)))
+        arr_obj = arr_obj.transpose(
+            tuple(range(multi_ndim, arr_obj.ndim))
+            + tuple(range(0, multi_ndim))
+        )
 
     return arr_obj
 
 
-cdef object vec_discdd_array_sc(irk_state *state, irk_discdd_vec func, object size,
-                            double n, double p, object lock):
+cdef object vec_discdd_array_sc(
+    irk_state *state,
+    irk_discdd_vec func,
+    object size,
+    double n,
+    double p,
+    object lock
+):
     cdef int *array_data
     cdef int res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res, n, p)
@@ -574,8 +958,14 @@ cdef object vec_discdd_array_sc(irk_state *state, irk_discdd_vec func, object si
         return array
 
 
-cdef object vec_discdd_array(irk_state *state, irk_discdd_vec func, object size,
-                         cnp.ndarray on, cnp.ndarray op, object lock):
+cdef object vec_discdd_array(
+    irk_state *state,
+    irk_discdd_vec func,
+    object size,
+    cnp.ndarray on,
+    cnp.ndarray op,
+    object lock
+):
     cdef int *array_data
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp i, imax, n, res_size
@@ -588,8 +978,14 @@ cdef object vec_discdd_array(irk_state *state, irk_discdd_vec func, object size,
     cdef cnp.npy_intp *multi_dims
 
     if size is None:
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(2, <void *>on, <void *>op)
-        array = <cnp.ndarray> cnp.PyArray_SimpleNew(cnp_PyArray_MultiIter_NDIM(multi), cnp_PyArray_MultiIter_DIMS(multi), cnp.NPY_INT)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            2, <void *>on, <void *>op
+        )
+        array = <cnp.ndarray> cnp.PyArray_SimpleNew(
+            cnp_PyArray_MultiIter_NDIM(multi),
+            cnp_PyArray_MultiIter_DIMS(multi),
+            cnp.NPY_INT
+        )
         array_data = <int *>cnp.PyArray_DATA(array)
         with lock, nogil:
             for i from 0 <= i < cnp_PyArray_MultiIter_SIZE(multi):
@@ -602,11 +998,15 @@ cdef object vec_discdd_array(irk_state *state, irk_discdd_vec func, object size,
         array = <cnp.ndarray>np.empty(size, np.intc)
         array_data = <int *>cnp.PyArray_DATA(array)
         res_size = cnp.PyArray_SIZE(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(3, <void*>array, <void *>on, <void *>op)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            3, <void*>array, <void *>on, <void *>op
+        )
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
 
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(2, <void *>on, <void *>op)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            2, <void *>on, <void *>op
+        )
         imax = cnp_PyArray_MultiIter_SIZE(multi)
         n = res_size // imax
         with lock, nogil:
@@ -620,21 +1020,30 @@ cdef object vec_discdd_array(irk_state *state, irk_discdd_vec func, object size,
         multi_dims = cnp_PyArray_MultiIter_DIMS(multi)
         multi_shape = cpython.tuple.PyTuple_New(multi_nd)
         for i from 0 <= i < multi_nd:
-            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i]) 
+            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i])
         arr_obj.shape = (multi_shape + arr_obj.shape)[:arr_obj.ndim]
         multi_ndim = len(multi_shape)
-        arr_obj = arr_obj.transpose(tuple(range(multi_ndim, arr_obj.ndim)) + tuple(range(0, multi_ndim)))
+        arr_obj = arr_obj.transpose(
+            tuple(range(multi_ndim, arr_obj.ndim))
+            + tuple(range(0, multi_ndim))
+        )
 
     return arr_obj
 
 
-cdef object vec_discnmN_array_sc(irk_state *state, irk_discnmN_vec func, object size,
-                             int n, int m, int N, object lock):
+cdef object vec_discnmN_array_sc(
+    irk_state *state,
+    irk_discnmN_vec func,
+    object size,
+    int n,
+    int m,
+    int N,
+    object lock
+):
     cdef int *array_data
     cdef int res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res, n, m, N)
@@ -648,15 +1057,22 @@ cdef object vec_discnmN_array_sc(irk_state *state, irk_discnmN_vec func, object
         return array
 
 
-cdef object vec_discnmN_array(irk_state *state, irk_discnmN_vec func, object size,
-                          cnp.ndarray on, cnp.ndarray om, cnp.ndarray oN, object lock):
+cdef object vec_discnmN_array(
+    irk_state *state,
+    irk_discnmN_vec func,
+    object size,
+    cnp.ndarray on,
+    cnp.ndarray om,
+    cnp.ndarray oN,
+    object lock
+):
     cdef int *array_data
     cdef int *on_data
     cdef int *om_data
     cdef int *oN_data
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp i
-    cdef cnp.broadcast multi, multi2
+    cdef cnp.broadcast multi
     cdef cnp.npy_intp imax, n, res_size
     cdef object arr_obj
     cdef Py_ssize_t multi_nd
@@ -664,27 +1080,43 @@ cdef object vec_discnmN_array(irk_state *state, irk_discnmN_vec func, object siz
     cdef cnp.npy_intp *multi_dims
 
     if size is None:
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(3, <void *>on, <void *>om, <void *>oN)
-        array = <cnp.ndarray> cnp.PyArray_SimpleNew(cnp_PyArray_MultiIter_NDIM(multi), cnp_PyArray_MultiIter_DIMS(multi), cnp.NPY_INT)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            3, <void *>on, <void *>om, <void *>oN
+        )
+        array = <cnp.ndarray> cnp.PyArray_SimpleNew(
+            cnp_PyArray_MultiIter_NDIM(multi),
+            cnp_PyArray_MultiIter_DIMS(multi),
+            cnp.NPY_INT
+        )
         array_data = <int *>cnp.PyArray_DATA(array)
         with lock, nogil:
             for i from 0 <= i < cnp_PyArray_MultiIter_SIZE(multi):
                 on_data = <int *>cnp.PyArray_MultiIter_DATA(multi, 0)
                 om_data = <int *>cnp.PyArray_MultiIter_DATA(multi, 1)
                 oN_data = <int *>cnp.PyArray_MultiIter_DATA(multi, 2)
-                func(state, 1, array_data + i, on_data[0], om_data[0], oN_data[0])
+                func(
+                    state,
+                    1,
+                    array_data + i,
+                    on_data[0],
+                    om_data[0],
+                    oN_data[0]
+                )
                 cnp.PyArray_MultiIter_NEXT(multi)
         arr_obj = <object>array
     else:
         array = <cnp.ndarray>np.empty(size, np.intc)
         array_data = <int *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(4, <void*>array, <void *>on, <void *>om,
-                                                <void *>oN)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            4, <void*>array, <void *>on, <void *>om, <void *>oN
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
 
-        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(3, <void *>on, <void *>om, <void *>oN)
+        multi = <cnp.broadcast> cnp.PyArray_MultiIterNew(
+            3, <void *>on, <void *>om, <void *>oN
+        )
         imax = cnp_PyArray_MultiIter_SIZE(multi)
         n = res_size // imax
         with lock, nogil:
@@ -692,27 +1124,37 @@ cdef object vec_discnmN_array(irk_state *state, irk_discnmN_vec func, object siz
                 on_data = <int *>cnp.PyArray_MultiIter_DATA(multi, 0)
                 om_data = <int *>cnp.PyArray_MultiIter_DATA(multi, 1)
                 oN_data = <int *>cnp.PyArray_MultiIter_DATA(multi, 2)
-                func(state, n, array_data + n*i, on_data[0], om_data[0], oN_data[0])
+                func(
+                    state,
+                    n,
+                    array_data + n*i,
+                    on_data[0],
+                    om_data[0],
+                    oN_data[0]
+                )
                 cnp.PyArray_MultiIter_NEXT(multi)
         arr_obj = <object>array
         multi_nd = cnp_PyArray_MultiIter_NDIM(multi)
         multi_dims = cnp_PyArray_MultiIter_DIMS(multi)
         multi_shape = cpython.tuple.PyTuple_New(multi_nd)
         for i from 0 <= i < multi_nd:
-            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i]) 
+            cpython.tuple.PyTuple_SetItem(multi_shape, i, multi_dims[i])
         arr_obj.shape = (multi_shape + arr_obj.shape)[:arr_obj.ndim]
         multi_ndim = len(multi_shape)
-        arr_obj = arr_obj.transpose(tuple(range(multi_ndim, arr_obj.ndim)) + tuple(range(0, multi_ndim)))
+        arr_obj = arr_obj.transpose(
+            tuple(range(multi_ndim, arr_obj.ndim))
+            + tuple(range(0, multi_ndim))
+        )
 
     return arr_obj
 
-cdef object vec_discd_array_sc(irk_state *state, irk_discd_vec func, object size,
-                           double a, object lock):
+cdef object vec_discd_array_sc(
+    irk_state *state, irk_discd_vec func, object size, double a, object lock
+):
     cdef int *array_data
     cdef int res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res, a)
@@ -726,13 +1168,17 @@ cdef object vec_discd_array_sc(irk_state *state, irk_discd_vec func, object size
 
         return array
 
-cdef object vec_long_discd_array_sc(irk_state *state, irk_discd_long_vec func, object size,
-                           double a, object lock):
+cdef object vec_long_discd_array_sc(
+    irk_state *state,
+    irk_discd_long_vec func,
+    object size,
+    double a,
+    object lock
+):
     cdef long *array_data
     cdef long res
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length
-    cdef cnp.npy_intp i
 
     if size is None:
         func(state, 1, &res, a)
@@ -746,8 +1192,13 @@ cdef object vec_long_discd_array_sc(irk_state *state, irk_discd_long_vec func, o
 
         return array
 
-cdef object vec_discd_array(irk_state *state, irk_discd_vec func, object size, cnp.ndarray oa,
-                        object lock):
+cdef object vec_discd_array(
+    irk_state *state,
+    irk_discd_vec func,
+    object size,
+    cnp.ndarray oa,
+    object lock
+):
     cdef int *array_data
     cdef double *oa_data
     cdef cnp.ndarray array "arrayObject"
@@ -758,20 +1209,30 @@ cdef object vec_discd_array(irk_state *state, irk_discd_vec func, object size, c
     cdef object arr_obj
 
     if size is None:
-        array = <cnp.ndarray>cnp.PyArray_SimpleNew(cnp.PyArray_NDIM(oa),
-                cnp.PyArray_DIMS(oa), cnp.NPY_INT32)
+        array = <cnp.ndarray>cnp.PyArray_SimpleNew(
+            cnp.PyArray_NDIM(oa),
+            cnp.PyArray_DIMS(oa),
+            cnp.NPY_INT32
+        )
         length = cnp.PyArray_SIZE(array)
         array_data = <int *>cnp.PyArray_DATA(array)
         itera = <cnp.flatiter>cnp.PyArray_IterNew(<object>oa)
         with lock, nogil:
             for i from 0 <= i < length:
-                func(state, 1, &array_data[i], (<double *>(cnp.PyArray_ITER_DATA(itera)))[0])
+                func(
+                    state,
+                    1,
+                    &array_data[i],
+                    (<double *>(cnp.PyArray_ITER_DATA(itera)))[0]
+                )
                 cnp.PyArray_ITER_NEXT(itera)
         arr_obj = <object>array
     else:
         array = <cnp.ndarray>np.empty(size, np.intc)
         array_data = <int *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(2, <void *>array, <void *>oa)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            2, <void *>array, <void *>oa
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
@@ -784,13 +1245,21 @@ cdef object vec_discd_array(irk_state *state, irk_discd_vec func, object size, c
                 func(state, n, array_data + n*i, oa_data[0])
                 cnp.PyArray_MultiIter_NEXTi(multi, 1)
         arr_obj = <object>array
-        arr_obj.shape = ((<object >oa).shape + arr_obj.shape)[:arr_obj.ndim]
-        arr_obj = arr_obj.transpose(tuple(range(oa.ndim, arr_obj.ndim)) + tuple(range(0, oa.ndim)))
+        arr_obj.shape = ((<object>oa).shape + arr_obj.shape)[:arr_obj.ndim]
+        arr_obj = arr_obj.transpose(
+            tuple(range(oa.ndim, arr_obj.ndim))
+            + tuple(range(0, oa.ndim))
+        )
 
     return arr_obj
 
-cdef object vec_long_discd_array(irk_state *state, irk_discd_long_vec func, object size, cnp.ndarray oa,
-                        object lock):
+cdef object vec_long_discd_array(
+    irk_state *state,
+    irk_discd_long_vec func,
+    object size,
+    cnp.ndarray oa,
+    object lock
+):
     cdef long *array_data
     cdef double *oa_data
     cdef cnp.ndarray array "arrayObject"
@@ -801,20 +1270,28 @@ cdef object vec_long_discd_array(irk_state *state, irk_discd_long_vec func, obje
     cdef object arr_obj
 
     if size is None:
-        array = <cnp.ndarray>cnp.PyArray_SimpleNew(cnp.PyArray_NDIM(oa),
-                cnp.PyArray_DIMS(oa), cnp.NPY_LONG)
+        array = <cnp.ndarray>cnp.PyArray_SimpleNew(
+            cnp.PyArray_NDIM(oa), cnp.PyArray_DIMS(oa), cnp.NPY_LONG
+        )
         length = cnp.PyArray_SIZE(array)
         array_data = <long *>cnp.PyArray_DATA(array)
         itera = <cnp.flatiter>cnp.PyArray_IterNew(<object>oa)
         with lock, nogil:
             for i from 0 <= i < length:
-                func(state, 1, array_data + i, (<double *>(cnp.PyArray_ITER_DATA(itera)))[0])
+                func(
+                    state,
+                    1,
+                    array_data + i,
+                    (<double *>(cnp.PyArray_ITER_DATA(itera)))[0]
+                )
                 cnp.PyArray_ITER_NEXT(itera)
         arr_obj = <object>array
     else:
         array = <cnp.ndarray>np.empty(size, np.dtype("long"))
         array_data = <long *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(2, <void *>array, <void *>oa)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            2, <void *>array, <void *>oa
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
@@ -828,24 +1305,33 @@ cdef object vec_long_discd_array(irk_state *state, irk_discd_long_vec func, obje
                 cnp.PyArray_MultiIter_NEXTi(multi, 1)
         arr_obj = <object>array
         arr_obj.shape = ((<object> oa).shape + arr_obj.shape)[:arr_obj.ndim]
-        arr_obj = arr_obj.transpose(tuple(range(oa.ndim, arr_obj.ndim)) + tuple(range(0, oa.ndim)))
+        arr_obj = arr_obj.transpose(
+            tuple(range(oa.ndim, arr_obj.ndim))
+            + tuple(range(0, oa.ndim))
+        )
 
     return arr_obj
 
-cdef object vec_Poisson_array(irk_state *state, irk_discdptr_vec func1, irk_discd_vec func2, object size, cnp.ndarray olambda,
-                        object lock):
+cdef object vec_Poisson_array(
+    irk_state *state,
+    irk_discdptr_vec func1,
+    irk_discd_vec func2,
+    object size,
+    cnp.ndarray olambda,
+    object lock
+):
     cdef int *array_data
     cdef double *oa_data
     cdef cnp.ndarray array "arrayObject"
     cdef cnp.npy_intp length, res_size
     cdef cnp.npy_intp i, imax, n
     cdef cnp.broadcast multi
-    cdef cnp.flatiter itera
     cdef object arr_obj
 
     if size is None:
-        array = <cnp.ndarray>cnp.PyArray_SimpleNew(cnp.PyArray_NDIM(olambda),
-                cnp.PyArray_DIMS(olambda), cnp.NPY_INT)
+        array = <cnp.ndarray>cnp.PyArray_SimpleNew(
+            cnp.PyArray_NDIM(olambda), cnp.PyArray_DIMS(olambda), cnp.NPY_INT
+        )
         length = cnp.PyArray_SIZE(array)
         array_data = <int *>cnp.PyArray_DATA(array)
         oa_data = <double *>cnp.PyArray_DATA(olambda)
@@ -855,7 +1341,9 @@ cdef object vec_Poisson_array(irk_state *state, irk_discdptr_vec func1, irk_disc
     else:
         array = <cnp.ndarray>np.empty(size, np.intc)
         array_data = <int *>cnp.PyArray_DATA(array)
-        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(2, <void *>array, <void *>olambda)
+        multi = <cnp.broadcast>cnp.PyArray_MultiIterNew(
+            2, <void *>array, <void *>olambda
+        )
         res_size = cnp.PyArray_SIZE(array)
         if (cnp_PyArray_MultiIter_SIZE(multi) != res_size):
             raise ValueError("size is not compatible with inputs")
@@ -869,8 +1357,13 @@ cdef object vec_Poisson_array(irk_state *state, irk_discdptr_vec func1, irk_disc
                     func2(state, n, array_data + n*i, oa_data[0])
                     cnp.PyArray_MultiIter_NEXTi(multi, 1)
             arr_obj = <object>array
-            arr_obj.shape = ((<object>olambda).shape + arr_obj.shape)[:arr_obj.ndim]
-            arr_obj = arr_obj.transpose(tuple(range(olambda.ndim, arr_obj.ndim)) + tuple(range(0, olambda.ndim)))
+            arr_obj.shape = (
+                (<object>olambda).shape + arr_obj.shape
+            )[:arr_obj.ndim]
+            arr_obj = arr_obj.transpose(
+                tuple(range(olambda.ndim, arr_obj.ndim))
+                + tuple(range(0, olambda.ndim))
+            )
         else:
             oa_data = <double *>cnp.PyArray_DATA(olambda)
             with lock, nogil:
@@ -893,7 +1386,9 @@ cdef double kahan_sum(double *darr, cnp.npy_intp n) nogil:
         sum = t
     return sum
 
-# computes dim*(dim + 1)/2  -- number of elements in lower-triangular part of a square matrix of shape (dim, dim)
+
+# computes dim*(dim + 1)/2  -- number of elements in lower-triangular part
+# of a square matrix of shape (dim, dim)
 cdef inline int packed_cholesky_size(int dim):
     cdef int dh, lsb
 
@@ -901,16 +1396,18 @@ cdef inline int packed_cholesky_size(int dim):
     lsb = (dim & 1)
     return (lsb + dh) * (dim + (1 - lsb))
 
+
 def _shape_from_size(size, d):
     if size is None:
         shape = (d,)
     else:
         try:
-           shape = (operator.index(size), d)
+            shape = (operator.index(size), d)
         except TypeError:
-           shape = tuple(size) + (d,)
+            shape = tuple(size) + (d,)
     return shape
 
+
 # sampling methods enum
 ICDF = 0
 BOXMULLER = 1
@@ -918,14 +1415,26 @@ BOXMULLER2 = 2
 POISNORM = 3
 PTPE = 4
 
-_method_alias_dict_gaussian = {'ICDF': ICDF, 'Inversion': ICDF,
-                      'BoxMuller': BOXMULLER, 'Box-Muller': BOXMULLER,
-                      'BoxMuller2': BOXMULLER2, 'Box-Muller2': BOXMULLER2}
+_method_alias_dict_gaussian = {
+    "ICDF": ICDF,
+    "Inversion": ICDF,
+    "BoxMuller": BOXMULLER,
+    "Box-Muller": BOXMULLER,
+    "BoxMuller2": BOXMULLER2,
+    "Box-Muller2": BOXMULLER2
+}
+
+_method_alias_dict_gaussian_short = {
+    "ICDF": ICDF,
+    "Inversion": ICDF,
+    "BoxMuller": BOXMULLER,
+    "Box-Muller": BOXMULLER
+}
 
-_method_alias_dict_gaussian_short = {'ICDF': ICDF, 'Inversion': ICDF,
-                            'BoxMuller': BOXMULLER, 'Box-Muller': BOXMULLER}
+_method_alias_dict_poisson = {
+    "PTPE" : PTPE, "Poisson-Normal": POISNORM, "POISNORM" : POISNORM
+}
 
-_method_alias_dict_poisson = {'PTPE' : PTPE, 'Poisson-Normal': POISNORM, 'POISNORM' : POISNORM}
 
 def choose_method(method, mlist, alias_dict = None):
     if (method not in mlist):
@@ -940,20 +1449,21 @@ def choose_method(method, mlist, alias_dict = None):
     else:
         return method
 
+
 _brng_dict = {
-    'MT19937' : MT19937,
-    'SFMT19937' : SFMT19937,
-    'WH' : WH,
-    'MT2203': MT2203,
-    'MCG31' : MCG31,
-    'R250' : R250,
-    'MRG32K3A' : MRG32K3A,
-    'MCG59' : MCG59,
-    'PHILOX4X32X10' : PHILOX4X32X10,
-    'NONDETERM' : NONDETERM,
-    'NONDETERMINISTIC' : NONDETERM,
-    'NON_DETERMINISTIC' : NONDETERM,
-    'ARS5' : ARS5
+    "MT19937" : MT19937,
+    "SFMT19937" : SFMT19937,
+    "WH" : WH,
+    "MT2203": MT2203,
+    "MCG31" : MCG31,
+    "R250" : R250,
+    "MRG32K3A" : MRG32K3A,
+    "MCG59" : MCG59,
+    "PHILOX4X32X10" : PHILOX4X32X10,
+    "NONDETERM" : NONDETERM,
+    "NONDETERMINISTIC" : NONDETERM,
+    "NON_DETERMINISTIC" : NONDETERM,
+    "ARS5" : ARS5
 }
 
 _brng_dict_stream_max = {
@@ -972,12 +1482,15 @@ _brng_dict_stream_max = {
 
 cdef irk_brng_t _default_fallback_brng_token_(brng):
     cdef irk_brng_t brng_token
-    warnings.warn(("The basic random generator specification {given} is not recognized. "
-                   "\"MT19937\" will be used instead").format(given=brng),
-                  UserWarning)
+    warnings.warn(
+        f"The basic random generator specification {brng} is not recognized. "
+        "\"MT19937\" will be used instead",
+        UserWarning
+    )
     brng_token = MT19937
     return brng_token
 
+
 cdef irk_brng_t _parse_brng_token_(brng):
     cdef irk_brng_t brng_token
 
@@ -994,26 +1507,30 @@ cdef irk_brng_t _parse_brng_token_(brng):
 
     return brng_token
 
+
 def _parse_brng_argument(brng):
     cdef irk_brng_t brng_token
     cdef unsigned int stream_id = 0
 
     if isinstance(brng, (list, tuple)) and len(brng) == 2:
-        bt, s = brng;
+        bt, s = brng
         brng_token = _parse_brng_token_(bt)
         smax = _brng_dict_stream_max[brng_token]
         if isinstance(s, int):
             s = s % smax
             if (s != brng[1]):
-                warnings.warn(("The generator index {actual} is not between 0 and {max}, "
-                        "index {choice} will be used.").format(actual=brng[-1], max=smax-1, choice=s),
-                        UserWarning)
+                warnings.warn(
+                    f"The generator index {brng[-1]} is not between 0 and "
+                    f"{smax-1}, index {s} will be used.",
+                    UserWarning
+                )
             stream_id = s
     else:
         brng_token = _parse_brng_token_(brng)
 
     return (brng_token, stream_id)
 
+
 def _brng_id_to_name(int brng_id):
     cdef object nm
     cdef object brng_name = None
@@ -1028,7 +1545,7 @@ cdef class _MKLRandomState:
     cdef irk_state *internal_state
     cdef object lock
 
-    def __init__(self, seed=None, brng='MT19937'):
+    def __init__(self, seed=None, brng="MT19937"):
         self.internal_state = <irk_state*>PyMem_Malloc(sizeof(irk_state))
         memset(self.internal_state, 0, sizeof(irk_state))
 
@@ -1042,35 +1559,46 @@ cdef class _MKLRandomState:
             self.internal_state = NULL
 
     def _seed_impl(self, seed=None, brng=None):
-        cdef irk_error errcode
+        cdef irk_error _errcode
         cdef irk_brng_t brng_token = MT19937
         cdef unsigned int stream_id
         cdef cnp.ndarray obj "arrayObject_obj"
 
         if (brng):
-            brng_token, stream_id = _parse_brng_argument(brng);
+            brng_token, stream_id = _parse_brng_argument(brng)
         else:
-            brng_token = <irk_brng_t> irk_get_brng_and_stream_mkl(self.internal_state, &stream_id)
+            brng_token = <irk_brng_t> irk_get_brng_and_stream_mkl(
+                self.internal_state, &stream_id
+            )
         try:
             if seed is None:
                 with self.lock:
-                    errcode = irk_randomseed_mkl(self.internal_state, brng_token, stream_id)
+                    _errcode = irk_randomseed_mkl(
+                        self.internal_state, brng_token, stream_id
+                    )
             else:
                 idx = operator.index(seed)
                 if idx > int(2**32 - 1) or idx < 0:
                     raise ValueError("Seed must be between 0 and 4294967295")
                 with self.lock:
-                    irk_seed_mkl(self.internal_state, idx, brng_token, stream_id)
+                    irk_seed_mkl(
+                        self.internal_state, idx, brng_token, stream_id
+                    )
         except TypeError:
             obj = np.asarray(seed)
-            if not obj.dtype is np.dtype('uint64'):
-                obj = obj.astype(np.int64, casting='safe')
+            if obj.dtype is not np.dtype("uint64"):
+                obj = obj.astype(np.int64, casting="safe")
             if ((obj > int(2**32 - 1)) | (obj < 0)).any():
                 raise ValueError("Seed must be between 0 and 4294967295")
-            obj = obj.astype('uint32', casting='unsafe')
+            obj = obj.astype("uint32", casting="unsafe")
             with self.lock:
-                irk_seed_mkl_array(self.internal_state, <unsigned int *>cnp.PyArray_DATA(obj),
-                                        cnp.PyArray_DIM(obj, 0), brng_token, stream_id)
+                irk_seed_mkl_array(
+                    self.internal_state,
+                    <unsigned int *>cnp.PyArray_DATA(obj),
+                    cnp.PyArray_DIM(obj, 0),
+                    brng_token,
+                    stream_id
+                )
 
     def seed(self, seed=None, brng=None):
         """
@@ -1079,7 +1607,8 @@ cdef class _MKLRandomState:
         Seed the generator.
 
         This method is called when `MKLRandomState` is initialized. It can be
-        called again to re-seed the generator. For details, see `MKLRandomState`.
+        called again to re-seed the generator. For details, see
+        `MKLRandomState`.
 
         Parameters
         ----------
@@ -1089,9 +1618,10 @@ cdef class _MKLRandomState:
         brng : {'MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', 'MCG31',
                 'MCG59', 'MRG32K3A', 'PHILOX4X32X10', 'NONDETERM',
                 'ARS5', None}, optional
-            basic pseudo-random number generation algorithms, or non-deterministic
-            hardware-based generator, provided by Intel MKL. Use `brng==None` to keep
-            the `brng` specified during construction of this class instance.
+            basic pseudo-random number generation algorithms, or
+            non-deterministic hardware-based generator, provided by Intel MKL.
+            Use `brng==None` to keep the `brng` specified during construction
+            of this class instance.
 
         See Also
         --------
@@ -1099,12 +1629,11 @@ cdef class _MKLRandomState:
 
         References
         --------
-        MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html
+        MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html  # no-cython-lint
 
         """
         self._seed_impl(seed, brng)
 
-
     def get_state(self, legacy=True):
         """
         get_state(legacy=True)
@@ -1135,12 +1664,12 @@ cdef class _MKLRandomState:
         Notes
         -----
         `set_state` and `get_state` are not needed to work with any of the
-        random distributions in NumPy. If the internal state is manually altered,
-        the user should know exactly what he/she is doing.
+        random distributions in NumPy. If the internal state is manually
+        altered, the user should know exactly what he/she is doing.
 
         References
         -----
-        MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html
+        MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html  # no-cython-lint
 
         """
         cdef int state_buffer_size
@@ -1196,38 +1725,52 @@ cdef class _MKLRandomState:
         Notes
         -----
         `set_state` and `get_state` are not needed to work with any of the
-        random distributions in NumPy. If the internal state is manually altered,
-        the user should know exactly what he/she is doing.
+        random distributions in NumPy. If the internal state is manually
+        altered, the user should know exactly what he/she is doing.
 
         For backwards compatibility, the form (str, array of 624 uints, int) is
-        also accepted although in such a case keys are used to seed the generator,
-        and position index pos is ignored: ``state = ('MT19937', keys, pos)``.
+        also accepted although in such a case keys are used to seed the
+        generator, and position index pos is ignored:
+        ``state = ('MT19937', keys, pos)``
 
         References
         ----------
-        MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html
+        MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html  # no-cython-lint
 
         """
         cdef char *bytes_ptr
         cdef int brng_id
         cdef cnp.ndarray obj "arrayObject_obj"
 
-
         if isinstance(state, (tuple, list)):
             state_len = len(state)
             if (state_len != 2):
                 if (state_len == 3 or state_len == 5):
-                    algo_name, key, pos = state[:3]
-                    if algo_name != 'MT19937':
-                        raise ValueError("The legacy state input algorithm must be 'MT19937'")
+                    algo_name, key, _pos = state[:3]
+                    if algo_name != "MT19937":
+                        raise ValueError(
+                            "The legacy state input algorithm must be 'MT19937'"
+                        )
                     try:
-                        obj = <cnp.ndarray> cnp.PyArray_ContiguousFromObject(key, cnp.NPY_ULONG, 1, 1)
+                        obj = <cnp.ndarray> cnp.PyArray_ContiguousFromObject(
+                            key,
+                            cnp.NPY_ULONG,
+                            1,
+                            1
+                        )
                     except TypeError:
                         # compatibility -- could be an older pickle
-                        obj = <cnp.ndarray> cnp.PyArray_ContiguousFromObject(key, cnp.NPY_LONG, 1, 1)
+                        obj = <cnp.ndarray> cnp.PyArray_ContiguousFromObject(
+                            key,
+                            cnp.NPY_LONG,
+                            1,
+                            1
+                        )
                     self.seed(obj, brng = algo_name)
                     return
-                raise ValueError("The argument to set_state must be a list of 2 elements")
+                raise ValueError(
+                    "The argument to set_state must be a list of 2 elements"
+                )
         elif isinstance(state, dict):
             try:
                 state = (state["bit_generator"], state["state"]["mkl_stream"])
@@ -1238,21 +1781,26 @@ cdef class _MKLRandomState:
 
         algorithm_name = state[0]
         if algorithm_name not in _brng_dict.keys():
-            raise ValueError("basic number generator algorithm must be one of ['" + "',".join(_brng_dict.keys()) + "']")
+            raise ValueError(
+                "basic number generator algorithm must be one of ['"
+                + "', '".join(_brng_dict.keys()) + "']"
+            )
 
         stream_buf = state[1]
         if not is_bytes_object(stream_buf):
-            raise ValueError('state is expected to be bytes')
+            raise ValueError("state is expected to be bytes")
 
         bytes_ptr = py_bytes_DataPtr(stream_buf)
 
         with self.lock:
             err = irk_set_state_mkl(self.internal_state, bytes_ptr)
             if(err):
-                raise ValueError('The stream state buffer is corrupted')
+                raise ValueError("The stream state buffer is corrupted")
             brng_id = irk_get_brng_mkl(self.internal_state)
             if (_brng_dict[algorithm_name] != brng_id):
-                raise ValueError('The algorithm name does not match content of the buffer')
+                raise ValueError(
+                    "The algorithm name does not match content of the buffer"
+                )
 
     # Pickling support:
     def __getstate__(self):
@@ -1284,8 +1832,8 @@ cdef class _MKLRandomState:
         Returns
         -------
         out : float or ndarray of floats
-            Array of random floats of shape `size` (unless ``size=None``, in which
-            case a single float is returned).
+            Array of random floats of shape `size` (unless ``size=None``, in
+            whichcase a single float is returned).
 
         Examples
         --------
@@ -1304,7 +1852,9 @@ cdef class _MKLRandomState:
                [-1.23204345, -1.75224494]])
 
         """
-        return vec_cont0_array(self.internal_state, irk_double_vec, size, self.lock)
+        return vec_cont0_array(
+            self.internal_state, irk_double_vec, size, self.lock
+        )
 
     # Set up dictionary of integer types and relevant functions.
     #
@@ -1319,19 +1869,19 @@ cdef class _MKLRandomState:
 
     def _choose_randint_type(self, dtype):
         _randint_type = {
-            'bool': (0, 2, self._rand_bool),
-            'int8': (-2**7, 2**7, self._rand_int8),
-            'int16': (-2**15, 2**15, self._rand_int16),
-            'int32': (-2**31, 2**31, self._rand_int32),
-            'int64': (-2**63, 2**63, self._rand_int64),
-            'uint8': (0, 2**8, self._rand_uint8),
-            'uint16': (0, 2**16, self._rand_uint16),
-            'uint32': (0, 2**32, self._rand_uint32),
-            'uint64': (0, 2**64, self._rand_uint64)
+            "bool": (0, 2, self._rand_bool),
+            "int8": (-2**7, 2**7, self._rand_int8),
+            "int16": (-2**15, 2**15, self._rand_int16),
+            "int32": (-2**31, 2**31, self._rand_int32),
+            "int64": (-2**63, 2**63, self._rand_int64),
+            "uint8": (0, 2**8, self._rand_uint8),
+            "uint16": (0, 2**16, self._rand_uint16),
+            "uint32": (0, 2**32, self._rand_uint32),
+            "uint64": (0, 2**64, self._rand_uint64)
         }
 
         key = np.dtype(dtype).name
-        if not key in _randint_type:
+        if key not in _randint_type:
             raise TypeError('Unsupported dtype "%s" for randint' % key)
         return _randint_type[key]
 
@@ -1359,7 +1909,6 @@ cdef class _MKLRandomState:
                 irk_rand_bool_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def _rand_int8(self, cnp.npy_int8 low, cnp.npy_int8 high, size):
         """
         _rand_int8(low, high, size)
@@ -1383,7 +1932,6 @@ cdef class _MKLRandomState:
                 irk_rand_int8_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def _rand_int16(self, cnp.npy_int16 low, cnp.npy_int16 high, size):
         """
         _rand_int16(low, high, size)
@@ -1407,7 +1955,6 @@ cdef class _MKLRandomState:
                 irk_rand_int16_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def _rand_int32(self, cnp.npy_int32 low, cnp.npy_int32 high, size):
         """
         _rand_int32(self, low, high, size)
@@ -1452,7 +1999,6 @@ cdef class _MKLRandomState:
                 irk_rand_int32_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def _rand_int64(self, cnp.npy_int64 low, cnp.npy_int64 high, size):
         """
         _rand_int64(low, high, size)
@@ -1499,7 +2045,6 @@ cdef class _MKLRandomState:
                 irk_rand_uint8_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def _rand_uint16(self, cnp.npy_uint16 low, cnp.npy_uint16 high, size):
         """
         _rand_uint16(low, high, size)
@@ -1507,7 +2052,7 @@ cdef class _MKLRandomState:
         See `_rand_int32` for documentation, only the return type changes.
 
         """
-        cdef cnp.npy_uint16 off, rng, buf
+        cdef cnp.npy_uint16 buf
         cdef cnp.npy_uint16 *out
         cdef cnp.ndarray array "arrayObject"
         cdef cnp.npy_intp cnt
@@ -1523,7 +2068,6 @@ cdef class _MKLRandomState:
                 irk_rand_uint16_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def _rand_uint32(self, cnp.npy_uint32 low, cnp.npy_uint32 high, size):
         """
         _rand_uint32(self, low, high, size)
@@ -1547,7 +2091,6 @@ cdef class _MKLRandomState:
                 irk_rand_uint32_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def _rand_uint64(self, cnp.npy_uint64 low, cnp.npy_uint64 high, size):
         """
         _rand_uint64(low, high, size)
@@ -1571,7 +2114,6 @@ cdef class _MKLRandomState:
                 irk_rand_uint64_vec(self.internal_state, cnt, out, low, high)
             return array
 
-
     def randint(self, low, high=None, size=None, dtype=int):
         """
         randint(low, high=None, size=None, dtype=int)
@@ -1637,9 +2179,13 @@ cdef class _MKLRandomState:
         lowbnd, highbnd, randfunc = self._choose_randint_type(dtype)
 
         if low < lowbnd:
-            raise ValueError("low is out of bounds for %s" % (np.dtype(dtype).name,))
+            raise ValueError(
+                f"low is out of bounds for {np.dtype(dtype).name}"
+            )
         if high > highbnd:
-            raise ValueError("high is out of bounds for %s" % (np.dtype(dtype).name,))
+            raise ValueError(
+                f"high is out of bounds for {np.dtype(dtype).name}"
+            )
         if low >= high:
             raise ValueError("low >= high")
 
@@ -1680,7 +2226,6 @@ cdef class _MKLRandomState:
             irk_fill(bytes, length, self.internal_state)
         return bytestring
 
-
     def choice(self, a, size=None, replace=True, p=None):
         """
         choice(a, size=None, replace=True, p=None)
@@ -1785,7 +2330,9 @@ cdef class _MKLRandomState:
                 if np.issubdtype(p.dtype, np.floating):
                     atol = max(atol, np.sqrt(np.finfo(p.dtype).eps))
 
-            p = <cnp.ndarray>cnp.PyArray_ContiguousFromObject(p, cnp.NPY_DOUBLE, 1, 1)
+            p = <cnp.ndarray>cnp.PyArray_ContiguousFromObject(
+                p, cnp.NPY_DOUBLE, 1, 1
+            )
             pix = <double*>cnp.PyArray_DATA(p)
 
             if p.ndim != 1:
@@ -1809,8 +2356,8 @@ cdef class _MKLRandomState:
                 cdf = p.cumsum()
                 cdf /= cdf[-1]
                 uniform_samples = self.random_sample(shape)
-                idx = cdf.searchsorted(uniform_samples, side='right')
-                idx = np.asarray(idx) # searchsorted returns a scalar
+                idx = cdf.searchsorted(uniform_samples, side="right")
+                idx = np.asarray(idx)  # searchsorted returns a scalar
             else:
                 idx = self.randint(0, pop_size, size=shape)
         else:
@@ -1823,7 +2370,9 @@ cdef class _MKLRandomState:
                     raise ValueError("Fewer non-zero entries in p than size")
                 n_uniq = 0
                 p = p.copy()
-                found = np.zeros(tuple() if shape is None else shape, dtype=np.int64)
+                found = np.zeros(
+                    tuple() if shape is None else shape, dtype=np.int64
+                )
                 flat_found = found.ravel()
                 while n_uniq < size:
                     x = self.rand(size - n_uniq)
@@ -1831,7 +2380,7 @@ cdef class _MKLRandomState:
                         p[flat_found[0:n_uniq]] = 0
                     cdf = np.cumsum(p)
                     cdf /= cdf[-1]
-                    new = cdf.searchsorted(x, side='right')
+                    new = cdf.searchsorted(x, side="right")
                     _, unique_indices = np.unique(new, return_index=True)
                     unique_indices.sort()
                     new = new.take(unique_indices)
@@ -1847,7 +2396,7 @@ cdef class _MKLRandomState:
             # In most cases a scalar will have been made an array
             idx = idx.item(0)
 
-        #Use samples as indices for a if a is array-like
+        # Use samples as indices for a if a is array-like
         if a.ndim == 0:
             return idx
 
@@ -1863,7 +2412,6 @@ cdef class _MKLRandomState:
 
         return a[idx]
 
-
     def uniform(self, low=0.0, high=1.0, size=None):
         """
         uniform(low=0.0, high=1.0, size=None)
@@ -1936,31 +2484,41 @@ cdef class _MKLRandomState:
         """
         cdef cnp.ndarray olow, ohigh
         cdef double flow, fhigh
-        cdef object temp
 
         flow = PyFloat_AsDouble(low)
         fhigh = PyFloat_AsDouble(high)
         if not npy_isfinite(flow) or not npy_isfinite(fhigh):
-            raise OverflowError('Range exceeds valid bounds')
+            raise OverflowError("Range exceeds valid bounds")
         if flow >= fhigh:
             raise ValueError("low >= high")
 
         if not PyErr_Occurred():
-            return vec_cont2_array_sc(self.internal_state, irk_uniform_vec, size, flow,
-                                  fhigh, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_uniform_vec,
+                size,
+                flow,
+                fhigh,
+                self.lock
+            )
 
         PyErr_Clear()
-        olow = <cnp.ndarray>cnp.PyArray_FROM_OTF(low, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        ohigh = <cnp.ndarray>cnp.PyArray_FROM_OTF(high, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        olow = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            low, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        ohigh = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            high, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
 
         if not np.all(np.isfinite(olow)) or not np.all(np.isfinite(ohigh)):
-            raise OverflowError('Range exceeds valid bounds')
+            raise OverflowError("Range exceeds valid bounds")
 
         if np.any(olow >= ohigh):
             raise ValueError("low >= high")
 
-        return vec_cont2_array(self.internal_state, irk_uniform_vec, size, olow, ohigh,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_uniform_vec, size, olow, ohigh, self.lock
+        )
 
     def rand(self, *args):
         """
@@ -2063,7 +2621,6 @@ cdef class _MKLRandomState:
         else:
             return self.standard_normal(args)
 
-
     def random_integers(self, low, high=None, size=None):
         """
         random_integers(low, high=None, size=None)
@@ -2152,11 +2709,13 @@ cdef class _MKLRandomState:
             low = 1
 
         else:
-            warnings.warn(("This function is deprecated. Please call "
-                           "randint({low}, {high} + 1) instead".format(
-                low=low, high=high)), DeprecationWarning)
+            warnings.warn(
+                f"This function is deprecated. Please call "
+                f"randint({low}, {high} + 1) instead",
+                DeprecationWarning
+            )
 
-        return self.randint(low, high + 1, size=size, dtype='l')
+        return self.randint(low, high + 1, size=size, dtype="l")
 
     # Complicated, continuous distributions:
     def standard_normal(self, size=None, method=ICDF):
@@ -2193,13 +2752,30 @@ cdef class _MKLRandomState:
         (3, 4, 2)
 
         """
-        method = choose_method(method, [ICDF, BOXMULLER, BOXMULLER2], _method_alias_dict_gaussian)
+        method = choose_method(
+            method, [ICDF, BOXMULLER, BOXMULLER2], _method_alias_dict_gaussian
+        )
         if method is ICDF:
-            return vec_cont0_array(self.internal_state, irk_standard_normal_vec_ICDF, size, self.lock)
+            return vec_cont0_array(
+                self.internal_state,
+                irk_standard_normal_vec_ICDF,
+                size,
+                self.lock
+            )
         elif method is BOXMULLER2:
-            return vec_cont0_array(self.internal_state, irk_standard_normal_vec_BM2, size, self.lock)
+            return vec_cont0_array(
+                self.internal_state,
+                irk_standard_normal_vec_BM2,
+                size,
+                self.lock
+            )
         else:
-            return vec_cont0_array(self.internal_state, irk_standard_normal_vec_BM1, size, self.lock);
+            return vec_cont0_array(
+                self.internal_state,
+                irk_standard_normal_vec_BM1,
+                size,
+                self.lock
+            )
 
     def normal(self, loc=0.0, scale=1.0, size=None, method=ICDF):
         """
@@ -2295,27 +2871,76 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            method = choose_method(method, [ICDF, BOXMULLER, BOXMULLER2], _method_alias_dict_gaussian)
+            method = choose_method(
+                method,
+                [ICDF, BOXMULLER, BOXMULLER2],
+                _method_alias_dict_gaussian
+            )
             if method is ICDF:
-                return vec_cont2_array_sc(self.internal_state, irk_normal_vec_ICDF, size, floc, fscale, self.lock)
+                return vec_cont2_array_sc(
+                    self.internal_state,
+                    irk_normal_vec_ICDF,
+                    size,
+                    floc,
+                    fscale,
+                    self.lock
+                )
             elif method is BOXMULLER2:
-                return vec_cont2_array_sc(self.internal_state, irk_normal_vec_BM2, size, floc, fscale, self.lock)
+                return vec_cont2_array_sc(
+                    self.internal_state,
+                    irk_normal_vec_BM2,
+                    size,
+                    floc,
+                    fscale,
+                    self.lock
+                )
             else:
-                return vec_cont2_array_sc(self.internal_state, irk_normal_vec_BM1, size, floc, fscale, self.lock)
+                return vec_cont2_array_sc(
+                    self.internal_state,
+                    irk_normal_vec_BM1,
+                    size,
+                    floc,
+                    fscale,
+                    self.lock
+                )
 
         PyErr_Clear()
 
-        oloc = <cnp.ndarray>cnp.PyArray_FROM_OTF(loc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        oscale = <cnp.ndarray>cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oloc = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            loc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        oscale = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oscale, 0)):
             raise ValueError("scale <= 0")
-        method = choose_method(method, [ICDF, BOXMULLER, BOXMULLER2], _method_alias_dict_gaussian)
+        method = choose_method(
+            method, [ICDF, BOXMULLER, BOXMULLER2], _method_alias_dict_gaussian
+        )
         if method is ICDF:
-            return vec_cont2_array(self.internal_state, irk_normal_vec_ICDF, size, oloc, oscale, self.lock)
+            return vec_cont2_array(
+                self.internal_state,
+                irk_normal_vec_ICDF,
+                size,
+                oloc,
+                oscale, self.lock
+            )
         elif method is BOXMULLER2:
-            return vec_cont2_array(self.internal_state, irk_normal_vec_BM2, size, oloc, oscale, self.lock)
+            return vec_cont2_array(
+                self.internal_state,
+                irk_normal_vec_BM2,
+                size,
+                oloc,
+                oscale, self.lock
+            )
         else:
-            return vec_cont2_array(self.internal_state, irk_normal_vec_BM1, size, oloc, oscale, self.lock)
+            return vec_cont2_array(
+                self.internal_state,
+                irk_normal_vec_BM1,
+                size,
+                oloc,
+                oscale, self.lock
+            )
 
     def beta(self, a, b, size=None):
         """
@@ -2365,19 +2990,25 @@ cdef class _MKLRandomState:
                 raise ValueError("a <= 0")
             if fb <= 0:
                 raise ValueError("b <= 0")
-            return vec_cont2_array_sc(self.internal_state, irk_beta_vec, size, fa, fb,
-                                  self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state, irk_beta_vec, size, fa, fb, self.lock
+            )
 
         PyErr_Clear()
 
-        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        ob = <cnp.ndarray>cnp.PyArray_FROM_OTF(b, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        ob = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            b, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oa, 0)):
             raise ValueError("a <= 0")
         if np.any(np.less_equal(ob, 0)):
             raise ValueError("b <= 0")
-        return vec_cont2_array(self.internal_state, irk_beta_vec, size, oa, ob,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_beta_vec, size, oa, ob, self.lock
+        )
 
     def exponential(self, scale=1.0, size=None):
         """
@@ -2387,7 +3018,9 @@ cdef class _MKLRandomState:
 
         Its probability density function is
 
-        .. math:: f(x; \\frac{1}{\\beta}) = \\frac{1}{\\beta} \\exp(-\\frac{x}{\\beta}),
+        .. math::
+            f(x; \\frac{1}{\\beta}) = \\frac{1}{\\beta}
+            \\exp(-\\frac{x}{\\beta}),
 
         for ``x > 0`` and 0 elsewhere. :math:`\\beta` is the scale parameter,
         which is the inverse of the rate parameter :math:`\\lambda = 1/\\beta`.
@@ -2425,17 +3058,24 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_exponential_vec, size,
-                                  fscale, self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state,
+                irk_exponential_vec,
+                size,
+                fscale,
+                self.lock
+            )
 
         PyErr_Clear()
 
-        oscale = <cnp.ndarray> cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE,
-                                            cnp.NPY_ARRAY_ALIGNED)
+        oscale = <cnp.ndarray> cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oscale, 0.0)):
             raise ValueError("scale <= 0")
-        return vec_cont1_array(self.internal_state, irk_exponential_vec, size, oscale,
-                           self.lock)
+        return vec_cont1_array(
+            self.internal_state, irk_exponential_vec, size, oscale, self.lock
+        )
 
     def standard_exponential(self, size=None):
         """
@@ -2465,8 +3105,9 @@ cdef class _MKLRandomState:
         >>> n = mkl_random.standard_exponential((3, 8000))
 
         """
-        return vec_cont0_array(self.internal_state, irk_standard_exponential_vec, size,
-                           self.lock)
+        return vec_cont0_array(
+            self.internal_state, irk_standard_exponential_vec, size, self.lock
+        )
 
     def standard_gamma(self, shape, size=None):
         """
@@ -2543,16 +3184,27 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fshape <= 0:
                 raise ValueError("shape <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_standard_gamma_vec,
-                                  size, fshape, self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state,
+                irk_standard_gamma_vec,
+                size,
+                fshape,
+                self.lock
+            )
 
         PyErr_Clear()
-        oshape = <cnp.ndarray> cnp.PyArray_FROM_OTF(shape, cnp.NPY_DOUBLE,
-                                            cnp.NPY_ARRAY_ALIGNED)
+        oshape = <cnp.ndarray> cnp.PyArray_FROM_OTF(
+            shape, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oshape, 0.0)):
             raise ValueError("shape <= 0")
-        return vec_cont1_array(self.internal_state, irk_standard_gamma_vec, size,
-                           oshape, self.lock)
+        return vec_cont1_array(
+            self.internal_state,
+            irk_standard_gamma_vec,
+            size,
+            oshape,
+            self.lock
+        )
 
     def gamma(self, shape, scale=1.0, size=None):
         """
@@ -2635,18 +3287,29 @@ cdef class _MKLRandomState:
                 raise ValueError("shape <= 0")
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont2_array_sc(self.internal_state, irk_gamma_vec, size, fshape,
-                                  fscale, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_gamma_vec,
+                size,
+                fshape,
+                fscale,
+                self.lock
+            )
 
         PyErr_Clear()
-        oshape = <cnp.ndarray>cnp.PyArray_FROM_OTF(shape, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        oscale = <cnp.ndarray>cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oshape = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            shape, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        oscale = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oshape, 0.0)):
             raise ValueError("shape <= 0")
         if np.any(np.less_equal(oscale, 0.0)):
             raise ValueError("scale <= 0")
-        return vec_cont2_array(self.internal_state, irk_gamma_vec, size, oshape, oscale,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_gamma_vec, size, oshape, oscale, self.lock
+        )
 
     def f(self, dfnum, dfden, size=None):
         """
@@ -2740,19 +3403,25 @@ cdef class _MKLRandomState:
                 raise ValueError("shape <= 0")
             if fdfden <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont2_array_sc(self.internal_state, irk_f_vec, size, fdfnum,
-                                  fdfden, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state, irk_f_vec, size, fdfnum, fdfden, self.lock
+            )
 
         PyErr_Clear()
 
-        odfnum = <cnp.ndarray>cnp.PyArray_FROM_OTF(dfnum, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        odfden = <cnp.ndarray>cnp.PyArray_FROM_OTF(dfden, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        odfnum = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            dfnum, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        odfden = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            dfden, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(odfnum, 0.0)):
             raise ValueError("dfnum <= 0")
         if np.any(np.less_equal(odfden, 0.0)):
             raise ValueError("dfden <= 0")
-        return vec_cont2_array(self.internal_state, irk_f_vec, size, odfnum, odfden,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_f_vec, size, odfnum, odfden, self.lock
+        )
 
     def noncentral_f(self, dfnum, dfden, nonc, size=None):
         """
@@ -2832,14 +3501,27 @@ cdef class _MKLRandomState:
                 raise ValueError("dfden <= 0")
             if fnonc < 0:
                 raise ValueError("nonc < 0")
-            return vec_cont3_array_sc(self.internal_state, irk_noncentral_f_vec, size,
-                                  fdfnum, fdfden, fnonc, self.lock)
+            return vec_cont3_array_sc(
+                self.internal_state,
+                irk_noncentral_f_vec,
+                size,
+                fdfnum,
+                fdfden,
+                fnonc,
+                self.lock
+            )
 
         PyErr_Clear()
 
-        odfnum = <cnp.ndarray>cnp.PyArray_FROM_OTF(dfnum, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        odfden = <cnp.ndarray>cnp.PyArray_FROM_OTF(dfden, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        ononc = <cnp.ndarray>cnp.PyArray_FROM_OTF(nonc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        odfnum = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            dfnum, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        odfden = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            dfden, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        ononc = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            nonc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
 
         if np.any(np.less_equal(odfnum, 1.0)):
             raise ValueError("dfnum <= 1")
@@ -2847,8 +3529,15 @@ cdef class _MKLRandomState:
             raise ValueError("dfden <= 0")
         if np.any(np.less(ononc, 0.0)):
             raise ValueError("nonc < 0")
-        return vec_cont3_array(self.internal_state, irk_noncentral_f_vec, size, odfnum,
-                           odfden, ononc, self.lock)
+        return vec_cont3_array(
+            self.internal_state,
+            irk_noncentral_f_vec,
+            size,
+            odfnum,
+            odfden,
+            ononc,
+            self.lock
+        )
 
     def chisquare(self, df, size=None):
         """
@@ -2920,16 +3609,20 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fdf <= 0:
                 raise ValueError("df <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_chisquare_vec, size, fdf,
-                                  self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state, irk_chisquare_vec, size, fdf, self.lock
+            )
 
         PyErr_Clear()
 
-        odf = <cnp.ndarray>cnp.PyArray_FROM_OTF(df, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        odf = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            df, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(odf, 0.0)):
             raise ValueError("df <= 0")
-        return vec_cont1_array(self.internal_state, irk_chisquare_vec, size, odf,
-                           self.lock)
+        return vec_cont1_array(
+            self.internal_state, irk_chisquare_vec, size, odf, self.lock
+        )
 
     def noncentral_chisquare(self, df, nonc, size=None):
         """
@@ -2989,8 +3682,11 @@ cdef class _MKLRandomState:
         and compare to a chisquare.
 
         >>> plt.figure()
-        >>> values = plt.hist(mkl_random.noncentral_chisquare(3, .0000001, 100000),
-        ...                   bins=np.arange(0., 25, .1), normed=True)
+        >>> values = plt.hist(
+        ...     mkl_random.noncentral_chisquare(3, .0000001, 100000),
+        ...     bins=np.arange(0., 25, .1),
+        ...     normed=True,
+        ... )
         >>> values2 = plt.hist(mkl_random.chisquare(3, 100000),
         ...                    bins=np.arange(0., 25, .1), normed=True)
         >>> plt.plot(values[1][0:-1], values[0]-values2[0], 'ob')
@@ -3015,19 +3711,35 @@ cdef class _MKLRandomState:
                 raise ValueError("df <= 0")
             if fnonc < 0:
                 raise ValueError("nonc < 0")
-            return vec_cont2_array_sc(self.internal_state, irk_noncentral_chisquare_vec,
-                                  size, fdf, fnonc, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_noncentral_chisquare_vec,
+                size,
+                fdf,
+                fnonc,
+                self.lock
+            )
 
         PyErr_Clear()
 
-        odf = <cnp.ndarray>cnp.PyArray_FROM_OTF(df, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        ononc = <cnp.ndarray>cnp.PyArray_FROM_OTF(nonc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        odf = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            df, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+            )
+        ononc = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            nonc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(odf, 0.0)):
             raise ValueError("df <= 0")
         if np.any(np.less(ononc, 0.0)):
             raise ValueError("nonc < 0")
-        return vec_cont2_array(self.internal_state, irk_noncentral_chisquare_vec, size,
-                           odf, ononc, self.lock)
+        return vec_cont2_array(
+            self.internal_state,
+            irk_noncentral_chisquare_vec,
+            size,
+            odf,
+            ononc,
+            self.lock
+        )
 
     def standard_cauchy(self, size=None):
         """
@@ -3090,8 +3802,9 @@ cdef class _MKLRandomState:
         >>> plt.show()
 
         """
-        return vec_cont0_array(self.internal_state, irk_standard_cauchy_vec, size,
-                           self.lock)
+        return vec_cont0_array(
+            self.internal_state, irk_standard_cauchy_vec, size, self.lock
+        )
 
     def standard_t(self, df, size=None):
         """
@@ -3122,8 +3835,10 @@ cdef class _MKLRandomState:
         -----
         The probability density function for the t distribution is
 
-        .. math:: P(x, df) = \\frac{\\Gamma(\\frac{df+1}{2})}{\\sqrt{\\pi df}
-                  \\Gamma(\\frac{df}{2})}\\Bigl( 1+\\frac{x^2}{df} \\Bigr)^{-(df+1)/2}
+        .. math::
+            P(x, df) = \\frac{\\Gamma(\\frac{df+1}{2})}{\\sqrt{\\pi df}
+            \\Gamma(\\frac{df}{2})}\\Bigl( 1+\\frac{x^2}{df}
+            \\Bigr)^{-(df+1)/2}
 
         The t test is based on an assumption that the data come from a
         Normal distribution. The t test provides a way to test whether
@@ -3147,8 +3862,8 @@ cdef class _MKLRandomState:
         From Dalgaard page 83 [1]_, suppose the daily energy intake for 11
         women in Kj is:
 
-        >>> intake = np.array([5260., 5470, 5640, 6180, 6390, 6515, 6805, 7515, \\
-        ...                    7515, 8230, 8770])
+        >>> intake = np.array([5260., 5470, 5640, 6180, 6390, 6515, 6805, \\
+        ...                    7515, 7515, 8230, 8770])
 
         Does their energy intake deviate systematically from the recommended
         value of 7725 kJ?
@@ -3187,16 +3902,20 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fdf <= 0:
                 raise ValueError("df <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_standard_t_vec, size,
-                                  fdf, self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state, irk_standard_t_vec, size, fdf, self.lock
+            )
 
         PyErr_Clear()
 
-        odf = <cnp.ndarray> cnp.PyArray_FROM_OTF(df, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        odf = <cnp.ndarray> cnp.PyArray_FROM_OTF(
+            df, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(odf, 0.0)):
             raise ValueError("df <= 0")
-        return vec_cont1_array(self.internal_state, irk_standard_t_vec, size, odf,
-                           self.lock)
+        return vec_cont1_array(
+            self.internal_state, irk_standard_t_vec, size, odf, self.lock
+        )
 
     def vonmises(self, mu, kappa, size=None):
         """
@@ -3283,18 +4002,28 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fkappa < 0:
                 raise ValueError("kappa < 0")
-            return vec_cont2_array_sc(self.internal_state, irk_vonmises_vec, size, fmu,
-                                  fkappa, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_vonmises_vec,
+                size,
+                fmu,
+                kappa,
+                self.lock
+            )
 
         PyErr_Clear()
 
-        omu = <cnp.ndarray> cnp.PyArray_FROM_OTF(mu, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        okappa = <cnp.ndarray> cnp.PyArray_FROM_OTF(kappa, cnp.NPY_DOUBLE,
-                                            cnp.NPY_ARRAY_ALIGNED)
+        omu = <cnp.ndarray> cnp.PyArray_FROM_OTF(
+            mu, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        okappa = <cnp.ndarray> cnp.PyArray_FROM_OTF(
+            kappa, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less(okappa, 0.0)):
             raise ValueError("kappa < 0")
-        return vec_cont2_array(self.internal_state, irk_vonmises_vec, size, omu, okappa,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_vonmises_vec, size, omu, okappa, self.lock
+        )
 
     def pareto(self, a, size=None):
         """
@@ -3390,15 +4119,20 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fa <= 0:
                 raise ValueError("a <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_pareto_vec, size, fa,
-                                  self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state, irk_pareto_vec, size, fa, self.lock
+            )
 
         PyErr_Clear()
 
-        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oa, 0.0)):
             raise ValueError("a <= 0")
-        return vec_cont1_array(self.internal_state, irk_pareto_vec, size, oa, self.lock)
+        return vec_cont1_array(
+            self.internal_state, irk_pareto_vec, size, oa, self.lock
+        )
 
     def weibull(self, a, size=None):
         """
@@ -3498,16 +4232,20 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fa <= 0:
                 raise ValueError("a <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_weibull_vec, size, fa,
-                                  self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state, irk_weibull_vec, size, fa, self.lock
+            )
 
         PyErr_Clear()
 
-        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oa, 0.0)):
             raise ValueError("a <= 0")
-        return vec_cont1_array(self.internal_state, irk_weibull_vec, size, oa,
-                           self.lock)
+        return vec_cont1_array(
+            self.internal_state, irk_weibull_vec, size, oa, self.lock
+        )
 
     def power(self, a, size=None):
         """
@@ -3610,15 +4348,20 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fa <= 0:
                 raise ValueError("a <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_power_vec, size, fa,
-                                  self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state, irk_power_vec, size, fa, self.lock
+            )
 
         PyErr_Clear()
 
-        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oa, 0.0)):
             raise ValueError("a <= 0")
-        return vec_cont1_array(self.internal_state, irk_power_vec, size, oa, self.lock)
+        return vec_cont1_array(
+            self.internal_state, irk_power_vec, size, oa, self.lock
+        )
 
     def laplace(self, loc=0.0, scale=1.0, size=None):
         """
@@ -3651,8 +4394,9 @@ cdef class _MKLRandomState:
         -----
         It has the probability density function
 
-        .. math:: f(x; \\mu, \\lambda) = \\frac{1}{2\\lambda}
-                                       \\exp\\left(-\\frac{|x - \\mu|}{\\lambda}\\right).
+        .. math::
+            f(x; \\mu, \\lambda) = \\frac{1}{2\\lambda}
+            \\exp\\left(-\\frac{|x - \\mu|}{\\lambda}\\right).
 
         The first law of Laplace, from 1774, states that the frequency
         of an error can be expressed as an exponential function of the
@@ -3705,16 +4449,25 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont2_array_sc(self.internal_state, irk_laplace_vec, size, floc,
-                                  fscale, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_laplace_vec,
+                size,
+                floc,
+                fscale,
+                self.lock
+            )
 
         PyErr_Clear()
         oloc = cnp.PyArray_FROM_OTF(loc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        oscale = cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oscale = cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oscale, 0.0)):
             raise ValueError("scale <= 0")
-        return vec_cont2_array(self.internal_state, irk_laplace_vec, size, oloc, oscale,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_laplace_vec, size, oloc, oscale, self.lock
+        )
 
     def gumbel(self, loc=0.0, scale=1.0, size=None):
         """
@@ -3758,8 +4511,9 @@ cdef class _MKLRandomState:
 
         The probability density for the Gumbel distribution is
 
-        .. math:: p(x) = \\frac{e^{-(x - \\mu)/ \\beta}}{\\beta} e^{ -e^{-(x - \\mu)/
-                  \\beta}},
+        .. math::
+            p(x) = \\frac{e^{-(x - \\mu)/ \\beta}}{\\beta} e^{ -e^{-(x - \\mu)/
+            \\beta}},
 
         where :math:`\\mu` is the mode, a location parameter, and
         :math:`\\beta` is the scale parameter.
@@ -3834,16 +4588,25 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont2_array_sc(self.internal_state, irk_gumbel_vec, size, floc,
-                                  fscale, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_gumbel_vec,
+                size,
+                floc,
+                fscale,
+                self.lock
+            )
 
         PyErr_Clear()
         oloc = cnp.PyArray_FROM_OTF(loc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        oscale = cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oscale = cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oscale, 0.0)):
             raise ValueError("scale <= 0")
-        return vec_cont2_array(self.internal_state, irk_gumbel_vec, size, oloc, oscale,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_gumbel_vec, size, oloc, oscale, self.lock
+        )
 
     def logistic(self, loc=0.0, scale=1.0, size=None):
         """
@@ -3925,16 +4688,30 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont2_array_sc(self.internal_state, irk_logistic_vec, size, floc,
-                                  fscale, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_logistic_vec,
+                size,
+                floc,
+                fscale,
+                self.lock
+                )
 
         PyErr_Clear()
         oloc = cnp.PyArray_FROM_OTF(loc, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        oscale = cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oscale = cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oscale, 0.0)):
             raise ValueError("scale <= 0")
-        return vec_cont2_array(self.internal_state, irk_logistic_vec, size, oloc,
-                           oscale, self.lock)
+        return vec_cont2_array(
+            self.internal_state,
+            irk_logistic_vec,
+            size,
+            oloc,
+            oscale,
+            self.lock
+        )
 
     def lognormal(self, mean=0.0, sigma=1.0, size=None, method=ICDF):
         """
@@ -4052,29 +4829,60 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fsigma <= 0:
                 raise ValueError("sigma <= 0")
-            method = choose_method(method, [ICDF, BOXMULLER], _method_alias_dict_gaussian_short)
+            method = choose_method(
+                method, [ICDF, BOXMULLER], _method_alias_dict_gaussian_short
+            )
             if method is ICDF:
-                return vec_cont2_array_sc(self.internal_state, irk_lognormal_vec_ICDF, size,
-                                  fmean, fsigma, self.lock)
+                return vec_cont2_array_sc(
+                    self.internal_state,
+                    irk_lognormal_vec_ICDF,
+                    size,
+                    fmean,
+                    fsigma,
+                    self.lock
+                )
             else:
-                return vec_cont2_array_sc(self.internal_state, irk_lognormal_vec_BM, size,
-                                  fmean, fsigma, self.lock)
+                return vec_cont2_array_sc(
+                    self.internal_state,
+                    irk_lognormal_vec_BM,
+                    size,
+                    fmean,
+                    fsigma,
+                    self.lock
+                )
 
         PyErr_Clear()
 
-        omean = cnp.PyArray_FROM_OTF(mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        osigma = cnp.PyArray_FROM_OTF(sigma, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        omean = cnp.PyArray_FROM_OTF(
+            mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        osigma = cnp.PyArray_FROM_OTF(
+            sigma, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(osigma, 0.0)):
             raise ValueError("sigma <= 0.0")
 
-        method = choose_method(method, [ICDF, BOXMULLER], _method_alias_dict_gaussian_short)
+        method = choose_method(
+            method, [ICDF, BOXMULLER], _method_alias_dict_gaussian_short
+        )
         if method is ICDF:
-            return vec_cont2_array(self.internal_state, irk_lognormal_vec_ICDF, size,
-                                  omean, osigma, self.lock)
+            return vec_cont2_array(
+                self.internal_state,
+                irk_lognormal_vec_ICDF,
+                size,
+                omean,
+                osigma,
+                self.lock
+            )
         else:
-            return vec_cont2_array(self.internal_state, irk_lognormal_vec_BM, size,
-                                  omean, osigma, self.lock)
-
+            return vec_cont2_array(
+                self.internal_state,
+                irk_lognormal_vec_BM,
+                size,
+                omean,
+                osigma,
+                self.lock
+            )
 
     def rayleigh(self, scale=1.0, size=None):
         """
@@ -4098,7 +4906,8 @@ cdef class _MKLRandomState:
         -----
         The probability density function for the Rayleigh distribution is
 
-        .. math:: P(x;scale) = \\frac{x}{scale^2}e^{\\frac{-x^2}{2 \\cdotp scale^2}}
+        .. math::
+            P(x;scale) = \\frac{x}{scale^2}e^{\\frac{-x^2}{2 \\cdotp scale^2}}
 
         The Rayleigh distribution would arise, for example, if the East
         and North components of the wind velocity had identical zero-mean
@@ -4140,16 +4949,20 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont1_array_sc(self.internal_state, irk_rayleigh_vec, size,
-                                  fscale, self.lock)
+            return vec_cont1_array_sc(
+                self.internal_state, irk_rayleigh_vec, size, fscale, self.lock
+            )
 
         PyErr_Clear()
 
-        oscale = <cnp.ndarray>cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oscale = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(oscale, 0.0)):
             raise ValueError("scale <= 0.0")
-        return vec_cont1_array(self.internal_state, irk_rayleigh_vec, size, oscale,
-                           self.lock)
+        return vec_cont1_array(
+            self.internal_state, irk_rayleigh_vec, size, oscale, self.lock
+        )
 
     def wald(self, mean, scale, size=None):
         """
@@ -4223,18 +5036,29 @@ cdef class _MKLRandomState:
                 raise ValueError("mean <= 0")
             if fscale <= 0:
                 raise ValueError("scale <= 0")
-            return vec_cont2_array_sc(self.internal_state, irk_wald_vec, size, fmean,
-                                  fscale, self.lock)
+            return vec_cont2_array_sc(
+                self.internal_state,
+                irk_wald_vec,
+                size,
+                fmean,
+                fscale,
+                self.lock
+            )
 
         PyErr_Clear()
-        omean = cnp.PyArray_FROM_OTF(mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        oscale = cnp.PyArray_FROM_OTF(scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        if np.any(np.less_equal(omean,0.0)):
+        omean = cnp.PyArray_FROM_OTF(
+            mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        oscale = cnp.PyArray_FROM_OTF(
+            scale, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        if np.any(np.less_equal(omean, 0.0)):
             raise ValueError("mean <= 0.0")
-        elif np.any(np.less_equal(oscale,0.0)):
+        elif np.any(np.less_equal(oscale, 0.0)):
             raise ValueError("scale <= 0.0")
-        return vec_cont2_array(self.internal_state, irk_wald_vec, size, omean, oscale,
-                           self.lock)
+        return vec_cont2_array(
+            self.internal_state, irk_wald_vec, size, omean, oscale, self.lock
+        )
 
     def triangular(self, left, mode, right, size=None):
         """
@@ -4270,11 +5094,12 @@ cdef class _MKLRandomState:
         -----
         The probability density function for the triangular distribution is
 
-        .. math:: P(x;l, m, r) = \\begin{cases}
-                  \\frac{2(x-l)}{(r-l)(m-l)}& \\text{for $l \\leq x \\leq m$},\\\\
-                  \\frac{2(r-x)}{(r-l)(r-m)}& \\text{for $m \\leq x \\leq r$},\\\\
-                  0& \\text{otherwise}.
-                  \\end{cases}
+        .. math::
+            P(x;l, m, r) = \\begin{cases}
+            \\frac{2(x-l)}{(r-l)(m-l)}& \\text{for $l \\leq x \\leq m$},\\\\
+            \\frac{2(r-x)}{(r-l)(r-m)}& \\text{for $m \\leq x \\leq r$},\\\\
+            0& \\text{otherwise}.
+            \\end{cases}
 
         The triangular distribution is often used in ill-defined
         problems where the underlying distribution is not known, but
@@ -4309,13 +5134,26 @@ cdef class _MKLRandomState:
                 raise ValueError("mode > right")
             if fleft == fright:
                 raise ValueError("left == right")
-            return vec_cont3_array_sc(self.internal_state, irk_triangular_vec, size,
-                                  fleft, fmode, fright, self.lock)
+            return vec_cont3_array_sc(
+                self.internal_state,
+                irk_triangular_vec,
+                size,
+                fleft,
+                fmode,
+                fright,
+                self.lock
+            )
 
         PyErr_Clear()
-        oleft = <cnp.ndarray>cnp.PyArray_FROM_OTF(left, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        omode = <cnp.ndarray>cnp.PyArray_FROM_OTF(mode, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
-        oright = <cnp.ndarray>cnp.PyArray_FROM_OTF(right, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        oleft = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            left, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        omode = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            mode, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
+        oright = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            right, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
 
         if np.any(np.greater(oleft, omode)):
             raise ValueError("left > mode")
@@ -4323,8 +5161,15 @@ cdef class _MKLRandomState:
             raise ValueError("mode > right")
         if np.any(np.equal(oleft, oright)):
             raise ValueError("left == right")
-        return vec_cont3_array(self.internal_state, irk_triangular_vec, size, oleft,
-                           omode, oright, self.lock)
+        return vec_cont3_array(
+            self.internal_state,
+            irk_triangular_vec,
+            size,
+            oleft,
+            omode,
+            oright,
+            self.lock
+        )
 
     # Complicated, discrete distributions:
     def binomial(self, n, p, size=None):
@@ -4427,13 +5272,23 @@ cdef class _MKLRandomState:
             if n > int(2**31-1):
                 raise ValueError("n > 2147483647")
             else:
-                return vec_discnp_array_sc(self.internal_state, irk_binomial_vec, size, <int> ln,
-                            fp, self.lock)
+                return vec_discnp_array_sc(
+                    self.internal_state,
+                    irk_binomial_vec,
+                    size,
+                    <int> ln,
+                    fp,
+                    self.lock
+                )
 
         PyErr_Clear()
 
-        on = <cnp.ndarray>cnp.PyArray_FROM_OTF(n, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY)
-        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
+        on = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            n, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY
+        )
+        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
         if np.any(np.less(n, 0)):
             raise ValueError("n < 0")
         if np.any(np.less(op, 0)):
@@ -4443,9 +5298,10 @@ cdef class _MKLRandomState:
         if np.any(np.greater(n, int(2**31-1))):
             raise ValueError("n > 2147483647")
 
-        on = on.astype(np.int32, casting='unsafe')
-        return vec_discnp_array(self.internal_state, irk_binomial_vec, size, on, op,
-                            self.lock)
+        on = on.astype(np.int32, casting="unsafe")
+        return vec_discnp_array(
+            self.internal_state, irk_binomial_vec, size, on, op, self.lock
+        )
 
     def negative_binomial(self, n, p, size=None):
         """
@@ -4509,8 +5365,12 @@ cdef class _MKLRandomState:
 
         >>> s = mkl_random.negative_binomial(1, 0.1, 100000)
         >>> for i in range(1, 11):
-        ...    probability = sum(s<i) / 100000.
-        ...    print i, "wells drilled, probability of one success =", probability
+        ...    probability = sum(s < i) / 100000.
+        ...    print(
+        ...        i,
+        ...        "wells drilled, probability of one success =",
+        ...        probability,
+        ...    )
 
         """
         cdef cnp.ndarray on
@@ -4527,21 +5387,32 @@ cdef class _MKLRandomState:
                 raise ValueError("p < 0")
             elif fp > 1:
                 raise ValueError("p > 1")
-            return vec_discdd_array_sc(self.internal_state, irk_negbinomial_vec,
-                                   size, fn, fp, self.lock)
+            return vec_discdd_array_sc(
+                self.internal_state,
+                irk_negbinomial_vec,
+                size,
+                fn,
+                fp,
+                self.lock
+            )
 
         PyErr_Clear()
 
-        on = <cnp.ndarray>cnp.PyArray_FROM_OTF(n, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
-        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
+        on = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            n, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
+        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
         if np.any(np.less_equal(n, 0)):
             raise ValueError("n <= 0")
         if np.any(np.less(p, 0)):
             raise ValueError("p < 0")
         if np.any(np.greater(p, 1)):
             raise ValueError("p > 1")
-        return vec_discdd_array(self.internal_state, irk_negbinomial_vec, size,
-                            on, op, self.lock)
+        return vec_discdd_array(
+            self.internal_state, irk_negbinomial_vec, size, on, op, self.lock
+        )
 
     def poisson(self, lam=1.0, size=None, method=POISNORM):
         """
@@ -4613,7 +5484,7 @@ cdef class _MKLRandomState:
         """
         cdef cnp.ndarray olam
         cdef double flam
-        poisson_lam_max = np.iinfo('l').max - np.sqrt(np.iinfo('l').max)*10
+        poisson_lam_max = np.iinfo("l").max - np.sqrt(np.iinfo("l").max)*10
 
         flam = PyFloat_AsDouble(lam)
         if not PyErr_Occurred():
@@ -4621,25 +5492,55 @@ cdef class _MKLRandomState:
                 raise ValueError("lam < 0")
             if lam > poisson_lam_max:
                 raise ValueError("lam value too large")
-            method = choose_method(method, [POISNORM, PTPE], _method_alias_dict_poisson);
+            method = choose_method(
+                method, [POISNORM, PTPE], _method_alias_dict_poisson
+            )
             if method is POISNORM:
-                return vec_discd_array_sc(self.internal_state, irk_poisson_vec_POISNORM, size, flam, self.lock)
+                return vec_discd_array_sc(
+                    self.internal_state,
+                    irk_poisson_vec_POISNORM,
+                    size,
+                    flam,
+                    self.lock
+                )
             else:
-                return vec_discd_array_sc(self.internal_state, irk_poisson_vec_PTPE, size, flam, self.lock)
+                return vec_discd_array_sc(
+                    self.internal_state,
+                    irk_poisson_vec_PTPE,
+                    size,
+                    flam,
+                    self.lock
+                )
 
         PyErr_Clear()
 
-        olam = <cnp.ndarray>cnp.PyArray_FROM_OTF(lam, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
+        olam = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            lam, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
         if np.any(np.less(olam, 0)):
             raise ValueError("lam < 0")
         if np.any(np.greater(olam, poisson_lam_max)):
             raise ValueError("lam value too large.")
-        method = choose_method(method, [POISNORM, PTPE], _method_alias_dict_poisson);
+        method = choose_method(
+            method, [POISNORM, PTPE], _method_alias_dict_poisson
+        )
         if method is POISNORM:
-            return vec_Poisson_array(self.internal_state, irk_poisson_vec_V, irk_poisson_vec_POISNORM, size, olam, self.lock)
+            return vec_Poisson_array(
+                self.internal_state,
+                irk_poisson_vec_V,
+                irk_poisson_vec_POISNORM,
+                size,
+                olam,
+                self.lock
+            )
         else:
-            return vec_discd_array(self.internal_state, irk_poisson_vec_PTPE, size, olam, self.lock)
-
+            return vec_discd_array(
+                self.internal_state,
+                irk_poisson_vec_PTPE,
+                size,
+                olam,
+                self.lock
+            )
 
     def zipf(self, a, size=None):
         """
@@ -4719,15 +5620,20 @@ cdef class _MKLRandomState:
         if not PyErr_Occurred():
             if fa <= 1.0:
                 raise ValueError("a <= 1.0")
-            return vec_long_discd_array_sc(self.internal_state, irk_zipf_long_vec, size, fa,
-                                  self.lock)
+            return vec_long_discd_array_sc(
+                self.internal_state, irk_zipf_long_vec, size, fa, self.lock
+            )
 
         PyErr_Clear()
 
-        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
+        oa = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            a, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
         if np.any(np.less_equal(oa, 1.0)):
             raise ValueError("a <= 1.0")
-        return vec_long_discd_array(self.internal_state, irk_zipf_long_vec, size, oa, self.lock)
+        return vec_long_discd_array(
+            self.internal_state, irk_zipf_long_vec, size, oa, self.lock
+        )
 
     def geometric(self, p, size=None):
         """
@@ -4784,19 +5690,22 @@ cdef class _MKLRandomState:
                 raise ValueError("p <= 0.0")
             if fp > 1.0:
                 raise ValueError("p > 1.0")
-            return vec_discd_array_sc(self.internal_state, irk_geometric_vec, size, fp,
-                                  self.lock)
+            return vec_discd_array_sc(
+                self.internal_state, irk_geometric_vec, size, fp, self.lock
+            )
 
         PyErr_Clear()
 
-
-        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
+        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
         if np.any(np.less_equal(op, 0.0)):
             raise ValueError("p < 0.0")
         if np.any(np.greater(op, 1.0)):
             raise ValueError("p > 1.0")
-        return vec_discd_array(self.internal_state, irk_geometric_vec, size, op,
-                           self.lock)
+        return vec_discd_array(
+            self.internal_state, irk_geometric_vec, size, op, self.lock
+        )
 
     def hypergeometric(self, ngood, nbad, nsample, size=None):
         """
@@ -4820,8 +5729,8 @@ cdef class _MKLRandomState:
             ``ngood + nbad``.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(d1, d2, d3)``, then
-            ``d1 * d2 * d3`` samples are drawn.  Default is None, in which case a
-            single value is returned.
+            ``d1 * d2 * d3`` samples are drawn.  Default is None, in which case
+            a single value is returned.
 
         Returns
         -------
@@ -4841,8 +5750,8 @@ cdef class _MKLRandomState:
 
         where :math:`0 \\le x \\le m` and :math:`n+m-N \\le x \\le n`
 
-        for P(x) the probability of x successes, m = ngood, N = ngood + nbad, and
-        n = number of samples.
+        for P(x) the probability of x successes, m = ngood, N = ngood + nbad,
+        and n = number of samples.
 
         Consider an urn with black and white marbles in it, ngood of them
         black and nbad are white. If you draw nsample balls without
@@ -4897,36 +5806,62 @@ cdef class _MKLRandomState:
                 raise ValueError("nbad < 0")
             if lnsample < 1:
                 raise ValueError("nsample < 1")
-            if ((<int> lngood) != lngood) or ((<int> lnbad) != lnbad) or ((<int> lnsample) != lnsample):
+            if (
+                ((<int> lngood) != lngood) or
+                ((<int> lnbad) != lnbad) or
+                ((<int> lnsample) != lnsample)
+            ):
                 raise ValueError("All parameters should not exceed 2147483647")
             lntot = lngood + lnbad
             if lntot < lnsample:
                 raise ValueError("ngood + nbad < nsample")
-            return vec_discnmN_array_sc(self.internal_state, irk_hypergeometric_vec,
-                                    size, lntot, lnsample, lngood, self.lock)
+            return vec_discnmN_array_sc(
+                self.internal_state,
+                irk_hypergeometric_vec,
+                size,
+                lntot,
+                lnsample,
+                lngood,
+                self.lock
+            )
 
         PyErr_Clear()
 
-        ongood = <cnp.ndarray>cnp.PyArray_FROM_OTF(ngood, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY)
-        onbad = <cnp.ndarray>cnp.PyArray_FROM_OTF(nbad, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY)
-        onsample = <cnp.ndarray>cnp.PyArray_FROM_OTF(nsample, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY)
+        ongood = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            ngood, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY
+        )
+        onbad = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            nbad, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY
+        )
+        onsample = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            nsample, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY
+        )
         if np.any(np.less(ongood, 0)):
             raise ValueError("ngood < 0")
         if np.any(np.less(onbad, 0)):
             raise ValueError("nbad < 0")
         if np.any(np.less(onsample, 1)):
             raise ValueError("nsample < 1")
-        otot = np.asarray(np.add(ongood, onbad));
+        otot = np.asarray(np.add(ongood, onbad))
         if np.any(np.less_equal(otot, 0)):
-            raise ValueError("Number of balls in each urn should not exceed 2147483647")
-        if np.any(np.less(otot,onsample)):
+            raise ValueError(
+                "Number of balls in each urn should not exceed 2147483647"
+            )
+        if np.any(np.less(otot, onsample)):
             raise ValueError("ngood + nbad < nsample")
 
-        otot = otot.astype(np.int32, casting='unsafe')
-        onsample = onsample.astype(np.int32, casting='unsafe')
-        ongood = ongood.astype(np.int32, casting='unsafe')
-        return vec_discnmN_array(self.internal_state, irk_hypergeometric_vec, size,
-                             otot, onsample, ongood, self.lock)
+        otot = otot.astype(np.int32, casting="unsafe")
+        onsample = onsample.astype(np.int32, casting="unsafe")
+        ongood = ongood.astype(np.int32, casting="unsafe")
+        return vec_discnmN_array(
+            self.internal_state,
+            irk_hypergeometric_vec,
+            size,
+            otot,
+            onsample,
+            ongood,
+            self.lock
+        )
 
     def logseries(self, p, size=None):
         """
@@ -5012,21 +5947,27 @@ cdef class _MKLRandomState:
                 raise ValueError("p <= 0.0")
             if fp >= 1.0:
                 raise ValueError("p >= 1.0")
-            return vec_discd_array_sc(self.internal_state, irk_logseries_vec, size, fp,
-                                  self.lock)
+            return vec_discd_array_sc(
+                self.internal_state, irk_logseries_vec, size, fp, self.lock
+            )
 
         PyErr_Clear()
 
-        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        op = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            p, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if np.any(np.less_equal(op, 0.0)):
             raise ValueError("p <= 0.0")
         if np.any(np.greater_equal(op, 1.0)):
             raise ValueError("p >= 1.0")
-        return vec_discd_array(self.internal_state, irk_logseries_vec, size, op,
-                           self.lock)
+        return vec_discd_array(
+            self.internal_state, irk_logseries_vec, size, op, self.lock
+        )
 
     # Multivariate distributions:
-    def multivariate_normal(self, mean, cov, size=None, check_valid="warn", tol=1e-8):
+    def multivariate_normal(
+        self, mean, cov, size=None, check_valid="warn", tol=1e-8
+    ):
         """
         multivariate_normal(mean, cov[, size, check_valid, tol])
 
@@ -5140,11 +6081,11 @@ cdef class _MKLRandomState:
             shape = size
 
         if len(mean.shape) != 1:
-               raise ValueError("mean must be 1 dimensional")
+            raise ValueError("mean must be 1 dimensional")
         if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]):
-               raise ValueError("cov must be 2 dimensional and square")
+            raise ValueError("cov must be 2 dimensional and square")
         if mean.shape[0] != cov.shape[0]:
-               raise ValueError("mean and cov must have same length")
+            raise ValueError("mean and cov must have same length")
 
         # Compute shape of output and create a matrix of independent
         # standard normally distributed random numbers. The matrix has rows
@@ -5170,7 +6111,7 @@ cdef class _MKLRandomState:
 
         # ensure double to make tol meaningful
         cov = cov.astype(np.double)
-        (u, s, v) = svd(cov)
+        (_u, s, v) = svd(cov)
 
         if check_valid != "ignore":
             if check_valid != "warn" and check_valid != "raise":
@@ -5180,8 +6121,10 @@ cdef class _MKLRandomState:
             psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol)
             if not psd:
                 if check_valid == "warn":
-                    warnings.warn("covariance is not symmetric positive-semidefinite.",
-                        RuntimeWarning)
+                    warnings.warn(
+                        "covariance is not symmetric positive-semidefinite.",
+                        RuntimeWarning
+                    )
                 else:
                     raise ValueError(
                         "covariance is not symmetric positive-semidefinite.")
@@ -5273,12 +6216,12 @@ cdef class _MKLRandomState:
         cdef cnp.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr"
         cdef double *pix
         cdef int *mnix
-        cdef cnp.npy_intp i, j, sz
-        cdef double Sum
-        cdef int dn
+        cdef cnp.npy_intp sz
 
         d = len(pvals)
-        parr = <cnp.ndarray>cnp.PyArray_ContiguousFromObject(pvals, cnp.NPY_DOUBLE, 1, 1)
+        parr = <cnp.ndarray>cnp.PyArray_ContiguousFromObject(
+            pvals, cnp.NPY_DOUBLE, 1, 1
+        )
         pix = <double*>cnp.PyArray_DATA(parr)
 
         if kahan_sum(pix, d-1) > (1.0 + 1e-12):
@@ -5295,7 +6238,6 @@ cdef class _MKLRandomState:
 
         return multin
 
-
     def dirichlet(self, object alpha, size=None):
         """
         dirichlet(alpha, size=None)
@@ -5356,47 +6298,48 @@ cdef class _MKLRandomState:
         >>> plt.title("Lengths of Strings")
 
         """
-        #=================
-        # Pure python algo
-        #=================
-        #alpha   = N.atleast_1d(alpha)
-        #k       = alpha.size
-
-        #if n == 1:
-        #    val = N.zeros(k)
-        #    for i in range(k):
-        #        val[i]   = sgamma(alpha[i], n)
-        #    val /= N.sum(val)
-        #else:
-        #    val = N.zeros((k, n))
-        #    for i in range(k):
-        #        val[i]   = sgamma(alpha[i], n)
-        #    val /= N.sum(val, axis = 0)
-        #    val = val.T
-
-        #return val
-        cdef cnp.npy_intp   k
-        cdef cnp.npy_intp   totsize
-        cdef cnp.ndarray    alpha_arr, val_arr
-        cdef double     *alpha_data
-        cdef double     *val_data
-        cdef cnp.npy_intp   i, j
-        cdef double     invacc, acc
-        cdef cnp.broadcast  multi1, multi2
-
-        alpha_arr = <cnp.ndarray>cnp.PyArray_FROM_OTF(alpha, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED)
+        # =================
+        #  Pure python algo
+        # =================
+        # alpha = N.atleast_1d(alpha)
+        # k = alpha.size
+
+        # if n == 1:
+        #     val = N.zeros(k)
+        #     for i in range(k):
+        #         val[i]   = sgamma(alpha[i], n)
+        #     val /= N.sum(val)
+        # else:
+        #     val = N.zeros((k, n))
+        #     for i in range(k):
+        #         val[i]   = sgamma(alpha[i], n)
+        #     val /= N.sum(val, axis = 0)
+        #     val = val.T
+        # return val
+        cdef cnp.npy_intp k
+        cdef cnp.npy_intp totsize
+        cdef cnp.ndarray alpha_arr, val_arr
+        cdef double *val_data
+        cdef cnp.npy_intp i, j
+        cdef double invacc, acc
+        cdef cnp.broadcast multi1, multi2
+
+        alpha_arr = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            alpha, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_ALIGNED
+        )
         if (alpha_arr.ndim != 1):
             raise ValueError("Parameter alpha is not a vector")
 
-        k     = len(alpha)
+        k = len(alpha)
         shape = _shape_from_size(size, k)
 
-        diric    = self.standard_gamma(alpha_arr, shape)
+        diric = self.standard_gamma(alpha_arr, shape)
 
-        val_arr  = <cnp.ndarray>diric
+        val_arr = <cnp.ndarray>diric
         totsize = cnp.PyArray_SIZE(val_arr)
 
-        # Use of iterators is faster than calling PyArray_ContiguousFromObject and iterating in C
+        # Use of iterators is faster than calling PyArray_ContiguousFromObject
+        # and iterating in C
         multi1 = cnp.PyArray_MultiIterNew(2, <void *>val_arr, <void *>alpha_arr)
         multi2 = cnp.PyArray_MultiIterNew(2, <void *>val_arr, <void *>alpha_arr)
 
@@ -5475,22 +6418,26 @@ cdef class _MKLRandomState:
             # of bytes for the swaps to avoid leaving one of the objects
             # within the buffer and erroneously decrementing it's refcount
             # when the function exits.
-            buf = np.empty(itemsize, dtype=np.int8) # GC'd at function exit
+            buf = np.empty(itemsize, dtype=np.int8)  # GC'd at function exit
             buf_ptr = <char*><size_t>buf.ctypes.data
             with self.lock:
                 # We trick gcc into providing a specialized implementation for
                 # the most common case, yielding a ~33% performance improvement.
                 # Note that apparently, only one branch can ever be specialized.
                 if itemsize == sizeof(cnp.npy_intp):
-                    self._shuffle_raw(n, sizeof(cnp.npy_intp), stride, x_ptr, buf_ptr, u_data)
+                    self._shuffle_raw(
+                        n, sizeof(cnp.npy_intp), stride, x_ptr, buf_ptr, u_data
+                    )
                 else:
-                    self._shuffle_raw(n, itemsize, stride, x_ptr, buf_ptr, u_data)
+                    self._shuffle_raw(
+                        n, itemsize, stride, x_ptr, buf_ptr, u_data
+                    )
         elif isinstance(x, np.ndarray) and x.ndim > 1 and x.size:
             # Multidimensional ndarrays require a bounce buffer.
             buf = np.empty_like(x[0])
             with self.lock:
                 for i in reversed(range(1, n)):
-                    j = <cnp.npy_intp>floor( (i + 1) * u_data[i - 1])
+                    j = <cnp.npy_intp>floor((i + 1) * u_data[i - 1])
                     if (j < i):
                         buf[...] = x[j]
                         x[j] = x[i]
@@ -5499,14 +6446,21 @@ cdef class _MKLRandomState:
             # Untyped path.
             with self.lock:
                 for i in reversed(range(1, n)):
-                    j = <cnp.npy_intp>floor( (i + 1) * u_data[i - 1])
+                    j = <cnp.npy_intp>floor((i + 1) * u_data[i - 1])
                     x[i], x[j] = x[j], x[i]
 
-    cdef inline _shuffle_raw(self, cnp.npy_intp n, cnp.npy_intp itemsize,
-                             cnp.npy_intp stride, char* data, char* buf, double* udata):
+    cdef inline _shuffle_raw(
+        self,
+        cnp.npy_intp n,
+        cnp.npy_intp itemsize,
+        cnp.npy_intp stride,
+        char* data,
+        char* buf,
+        double* udata
+    ):
         cdef cnp.npy_intp i, j
         for i in reversed(range(1, n)):
-            j = <cnp.npy_intp>floor( (i + 1) * udata[i - 1])
+            j = <cnp.npy_intp>floor((i + 1) * udata[i - 1])
             memcpy(buf, data + j * stride, itemsize)
             memcpy(data + j * stride, data + i * stride, itemsize)
             memcpy(data + i * stride, buf, itemsize)
@@ -5589,7 +6543,7 @@ cdef class MKLRandomState(_MKLRandomState):
     brng : {'MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', 'MCG31', 'MCG59',
             'MRG32K3A', 'PHILOX4X32X10', 'NONDETERM', 'ARS5'}, optional
         basic pseudo-random number generation algorithms, or non-deterministic
-        hardware-based generator, provided by Intel MKL. The default choice is 
+        hardware-based generator, provided by Intel MKL. The default choice is
         'MT19937' - the Mersenne Twister generator.
 
     Notes
@@ -5602,7 +6556,7 @@ cdef class MKLRandomState(_MKLRandomState):
 
     References
     -----
-    MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html
+    MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html  # no-cython-lint
 
     """
 
@@ -5620,14 +6574,16 @@ cdef class MKLRandomState(_MKLRandomState):
         """
         cdef int err, brng_id
 
-        err = irk_leapfrog_stream_mkl(self.internal_state, k, nstreams);
+        err = irk_leapfrog_stream_mkl(self.internal_state, k, nstreams)
 
         if err == -1:
-            raise ValueError('The stream state buffer is corrupted')
+            raise ValueError("The stream state buffer is corrupted")
         elif err == 1:
             with self.lock:
                 brng_id = irk_get_brng_mkl(self.internal_state)
-            raise ValueError("Leap-frog method of stream initialization is not supported for " + str(_brng_id_to_name(brng_id)))
+            raise ValueError(
+                "Leap-frog method of stream initialization is not supported "
+                f"for {str(_brng_id_to_name(brng_id))}")
 
     def skipahead(self, long long int nskips):
         """
@@ -5639,14 +6595,17 @@ cdef class MKLRandomState(_MKLRandomState):
         """
         cdef int err, brng_id
 
-        err = irk_skipahead_stream_mkl(self.internal_state, nskips);
+        err = irk_skipahead_stream_mkl(self.internal_state, nskips)
 
         if err == -1:
-            raise ValueError('The stream state buffer is corrupted')
+            raise ValueError("The stream state buffer is corrupted")
         elif err == 1:
             with self.lock:
                 brng_id = irk_get_brng_mkl(self.internal_state)
-            raise ValueError("Skip-ahead method of stream initialization is not supported for " + str(_brng_id_to_name(brng_id)))
+            raise ValueError(
+                "Skip-ahead method of stream initialization is not supported "
+                f"for {str(_brng_id_to_name(brng_id))}"
+                )
 
     def tomaxint(self, size=None):
         """
@@ -5691,7 +6650,9 @@ cdef class MKLRandomState(_MKLRandomState):
                 [ True,  True]]], dtype=bool)
 
         """
-        return vec_long_disc0_array(self.internal_state, irk_long_vec, size, self.lock)
+        return vec_long_disc0_array(
+            self.internal_state, irk_long_vec, size, self.lock
+        )
 
     def randint_untyped(self, low, high=None, size=None):
         """
@@ -5764,25 +6725,37 @@ cdef class MKLRandomState(_MKLRandomState):
 
         if ((<int> lo) == lo) and ((<int>hi) == hi):
             if size is None:
-                irk_discrete_uniform_vec(self.internal_state, 1, &rv_int, <int>lo, <int>hi)
+                irk_discrete_uniform_vec(
+                    self.internal_state, 1, &rv_int, <int>lo, <int>hi
+                )
                 return rv_int
             else:
                 array = <cnp.ndarray>np.empty(size, np.int32)
                 length = cnp.PyArray_SIZE(array)
                 array_int_data = <int*>cnp.PyArray_DATA(array)
                 with self.lock, nogil:
-                    irk_discrete_uniform_vec(self.internal_state, length, array_int_data, <int>lo, <int>hi)
+                    irk_discrete_uniform_vec(
+                        self.internal_state,
+                        length,
+                        array_int_data,
+                        <int>lo,
+                        <int>hi
+                    )
                 return array
         else:
             if size is None:
-                irk_discrete_uniform_long_vec(self.internal_state, 1, &rv_long, lo, hi)
+                irk_discrete_uniform_long_vec(
+                    self.internal_state, 1, &rv_long, lo, hi
+                )
                 return rv_long
             else:
                 array = <cnp.ndarray>np.empty(size, int)
                 length = cnp.PyArray_SIZE(array)
                 array_long_data = <long*>cnp.PyArray_DATA(array)
                 with self.lock, nogil:
-                    irk_discrete_uniform_long_vec(self.internal_state, length, array_long_data, lo, hi)
+                    irk_discrete_uniform_long_vec(
+                        self.internal_state, length, array_long_data, lo, hi
+                    )
                 return array
 
     def multinormal_cholesky(self, mean, ch, size=None, method=ICDF):
@@ -5804,8 +6777,8 @@ cdef class MKLRandomState(_MKLRandomState):
         mean : 1-D array_like, of length N
             Mean of the N-dimensional distribution.
         ch : 2-D array_like, of shape (N, N)
-            Cholesky factor of the covariance matrix of the distribution. Only lower-triangular
-            part of the matrix is actually used.
+            Cholesky factor of the covariance matrix of the distribution. Only
+            lower-triangular part of the matrix is actually used.
         size : int or tuple of ints, optional
             Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are
             generated, and packed in an `m`-by-`n`-by-`k` arrangement.  Because
@@ -5895,8 +6868,12 @@ cdef class MKLRandomState(_MKLRandomState):
         cdef ch_st_enum storage_mode
 
         # Check preconditions on arguments
-        marr = <cnp.ndarray>cnp.PyArray_FROM_OTF(mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
-        tarr = <cnp.ndarray>cnp.PyArray_FROM_OTF(ch, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY)
+        marr = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
+        tarr = <cnp.ndarray>cnp.PyArray_FROM_OTF(
+            ch, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY
+        )
 
         if size is None:
             shape = []
@@ -5906,23 +6883,33 @@ cdef class MKLRandomState(_MKLRandomState):
             shape = size
 
         if marr.ndim != 1:
-               raise ValueError("mean must be 1 dimensional")
-        dim = marr.shape[0];
+            raise ValueError("mean must be 1 dimensional")
+        dim = marr.shape[0]
         if (tarr.ndim == 2):
             storage_mode = MATRIX
             if (tarr.shape[0] != tarr.shape[1]):
-                   raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such")
+                raise ValueError(
+                    "ch must be a square lower triangular 2-dimensional array "
+                    "or a row-packed one-dimensional representation of such"
+                )
             if dim != tarr.shape[0]:
-                   raise ValueError("mean and ch must have consistent shapes")
+                raise ValueError("mean and ch must have consistent shapes")
         elif (tarr.ndim == 1):
             if (tarr.shape[0] == dim):
                 storage_mode = DIAGONAL
             elif (tarr.shape[0] == packed_cholesky_size(dim)):
                 storage_mode = PACKED
             else:
-                raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such")
+                raise ValueError(
+                    "ch must be a square lower triangular "
+                    "2-dimensional array or a row-packed one-dimensional "
+                    "representation of such"
+                )
         else:
-            raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such")
+            raise ValueError(
+                "ch must be a square lower triangular 2-dimensional array or a "
+                "row-packed one-dimensional representation of such"
+            )
 
         # Compute shape of output and create a matrix of independent
         # standard normally distributed random numbers. The matrix has rows
@@ -5938,13 +6925,39 @@ cdef class MKLRandomState(_MKLRandomState):
 
         n = cnp.PyArray_SIZE(resarr) // dim
 
-        method = choose_method(method, [ICDF, BOXMULLER2, BOXMULLER], _method_alias_dict_gaussian)
+        method = choose_method(
+            method, [ICDF, BOXMULLER2, BOXMULLER], _method_alias_dict_gaussian
+        )
         if (method is ICDF):
-            irk_multinormal_vec_ICDF(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode)
+            irk_multinormal_vec_ICDF(
+                self.internal_state,
+                n,
+                res_data,
+                dim,
+                mean_data,
+                t_data,
+                storage_mode
+            )
         elif (method is BOXMULLER2):
-            irk_multinormal_vec_BM2(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode)
+            irk_multinormal_vec_BM2(
+                self.internal_state,
+                n,
+                res_data,
+                dim,
+                mean_data,
+                t_data,
+                storage_mode
+            )
         else:
-            irk_multinormal_vec_BM1(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode)
+            irk_multinormal_vec_BM1(
+                self.internal_state,
+                n,
+                res_data,
+                dim,
+                mean_data,
+                t_data,
+                storage_mode
+            )
 
         return resarr
 
@@ -5980,10 +6993,10 @@ def __MKLRandomState_ctor():
     """
     Return a MKLRandomState instance.
     This function exists solely to assist (un)pickling.
-    Note that the state of the MKLRandomState returned here is irrelevant, as this function's
-    entire purpose is to return a newly allocated MKLRandomState whose state pickle can set.
-    Consequently the MKLRandomState returned by this function is a freshly allocated copy
-    with a seed=0.
+    Note that the state of the MKLRandomState returned here is irrelevant, as
+    this function's entire purpose is to return a newly allocated
+    MKLRandomState whose state pickle can set. Consequently the MKLRandomState
+    returned by this function is a freshly allocated copy with a seed=0.
     See https://github.com/numpy/numpy/issues/4763 for a detailed discussion
     """
     return MKLRandomState(seed=0)
@@ -5993,10 +7006,10 @@ def __RandomState_ctor():
     """
     Return a RandomState instance.
     This function exists solely to assist (un)pickling.
-    Note that the state of the RandomState returned here is irrelevant, as this function's
-    entire purpose is to return a newly allocated RandomState whose state pickle can set.
-    Consequently the RandomState returned by this function is a freshly allocated copy
-    with a seed=0.
+    Note that the state of the RandomState returned here is irrelevant, as this
+    function's entire purpose is to return a newly allocated RandomState whose
+    state pickle can set. Consequently the RandomState returned by this
+    function is a freshly allocated copy with a seed=0.
     See https://github.com/numpy/numpy/issues/4763 for a detailed discussion
     """
     return RandomState(seed=0)
diff --git a/mkl_random/src/generate_mklrand_c.py b/mkl_random/src/generate_mklrand_c.py
index bb6fd20..744223e 100644
--- a/mkl_random/src/generate_mklrand_c.py
+++ b/mkl_random/src/generate_mklrand_c.py
@@ -1,42 +1,48 @@
 #!/usr/bin/env python
-from __future__ import division, absolute_import, print_function
+from __future__ import absolute_import, division, print_function
 
-import sys
-import re
 import os
+import re
+import sys
 
-unused_internal_funcs = ['__Pyx_PrintItem',
-                         '__Pyx_PrintNewline',
-                         '__Pyx_ReRaise',
-                         #'__Pyx_GetExcValue',
-                         '__Pyx_ArgTypeTest',
-                         '__Pyx_SetVtable',
-                         '__Pyx_GetVtable',
-                         '__Pyx_CreateClass']
+unused_internal_funcs = [
+    "__Pyx_PrintItem",
+    "__Pyx_PrintNewline",
+    "__Pyx_ReRaise",
+    # '__Pyx_GetExcValue',
+    "__Pyx_ArgTypeTest",
+    "__Pyx_SetVtable",
+    "__Pyx_GetVtable",
+    "__Pyx_CreateClass",
+]
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # Use cython here so that long docstrings are broken up.
     # This is needed for some VC++ compilers.
-    os.system('cython mklrand.pyx')
-    mklrand_c = open('mklrand.c', 'r')
-    processed = open('mklrand_pp.c', 'w')
-    unused_funcs_str = '(' + '|'.join(unused_internal_funcs) + ')'
-    uifpat = re.compile(r'static \w+ \*?'+unused_funcs_str+r'.*/\*proto\*/')
+    os.system("cython mklrand.pyx")
+    mklrand_c = open("mklrand.c", "r")
+    processed = open("mklrand_pp.c", "w")
+    unused_funcs_str = "(" + "|".join(unused_internal_funcs) + ")"
+    uifpat = re.compile(r"static \w+ \*?" + unused_funcs_str + r".*/\*proto\*/")
     linepat = re.compile(r'/\* ".*/mklrand.pyx":')
     for linenum, line in enumerate(mklrand_c):
-        m = re.match(r'^(\s+arrayObject\w*\s*=\s*[(])[(]PyObject\s*[*][)]',
-                     line)
+        m = re.match(
+            r"^(\s+arrayObject\w*\s*=\s*[(])[(]PyObject\s*[*][)]", line
+        )
         if m:
-            line = '%s(PyArrayObject *)%s' % (m.group(1), line[m.end():])
+            line = "%s(PyArrayObject *)%s" % (m.group(1), line[m.end() :])
         m = uifpat.match(line)
         if m:
-            line = ''
+            line = ""
         m = re.search(unused_funcs_str, line)
         if m:
-            print("%s was declared unused, but is used at line %d" % (m.group(),
-                                                                    linenum+1), file=sys.stderr)
+            print(
+                "%s was declared unused, but is used at line %d"
+                % (m.group(), linenum + 1),
+                file=sys.stderr,
+            )
         line = linepat.sub(r'/* "mklrand.pyx":', line)
         processed.write(line)
     mklrand_c.close()
     processed.close()
-    os.rename('mklrand_pp.c', 'mklrand.c')
+    os.rename("mklrand_pp.c", "mklrand.c")
diff --git a/mkl_random/src/mkl_distributions.cpp b/mkl_random/src/mkl_distributions.cpp
index 77910f6..d4de0f6 100644
--- a/mkl_random/src/mkl_distributions.cpp
+++ b/mkl_random/src/mkl_distributions.cpp
@@ -25,6 +25,7 @@
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+// clang-format off
 #include <stddef.h> /* for nullptr */
 #include <limits.h> /* for ULONG_MAX */
 #include <assert.h>
@@ -37,6 +38,7 @@
 #include "mkl_distributions.h"
 #include "Python.h"
 #include "numpy/npy_common.h" /* npy_intp */
+// clang-format on
 
 #define MKL_INT_MAX ((npy_intp)(~((MKL_UINT)0) >> 1))
 
@@ -58,793 +60,796 @@
 #define DIST_ASSUME_ALIGNED(p, b)
 #endif
 
-void irk_double_vec(irk_state *state, npy_intp len, double *res)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_double_vec(irk_state *state, npy_intp len, double *res) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, MKL_INT_MAX, res, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream,
+                       MKL_INT_MAX, res, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len,
+                     res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_uniform_vec(irk_state *state, npy_intp len, double *res, const double low, const double high)
-{
-    int err = 0;
+void irk_uniform_vec(irk_state *state, npy_intp len, double *res,
+                     const double low, const double high) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, MKL_INT_MAX, res, low, high);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream,
+                       MKL_INT_MAX, res, low, high);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len, res, low, high);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len,
+                     res, low, high);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_standard_normal_vec_ICDF(irk_state *state, npy_intp len, double *res)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_standard_normal_vec_ICDF(irk_state *state, npy_intp len, double *res) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, MKL_INT_MAX, res, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream,
+                        MKL_INT_MAX, res, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res,
+                      d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_normal_vec_ICDF(irk_state *state, npy_intp len, double *res, const double loc, const double scale)
-{
-    int err = 0;
+void irk_normal_vec_ICDF(irk_state *state, npy_intp len, double *res,
+                         const double loc, const double scale) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, MKL_INT_MAX, res, loc, scale);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream,
+                        MKL_INT_MAX, res, loc, scale);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res, loc, scale);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res,
+                      loc, scale);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_standard_normal_vec_BM1(irk_state *state, npy_intp len, double *res)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_standard_normal_vec_BM1(irk_state *state, npy_intp len, double *res) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream, MKL_INT_MAX, res, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream,
+                        MKL_INT_MAX, res, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream, len,
+                      res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_normal_vec_BM1(irk_state *state, npy_intp len, double *res, const double loc, const double scale)
-{
-    int err = 0;
+void irk_normal_vec_BM1(irk_state *state, npy_intp len, double *res,
+                        const double loc, const double scale) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream, MKL_INT_MAX, res, loc, scale);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream,
+                        MKL_INT_MAX, res, loc, scale);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream, len, res, loc, scale);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, state->stream, len,
+                      res, loc, scale);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_standard_normal_vec_BM2(irk_state *state, npy_intp len, double *res)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_standard_normal_vec_BM2(irk_state *state, npy_intp len, double *res) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream, MKL_INT_MAX, res, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream,
+                        MKL_INT_MAX, res, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream, len,
+                      res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_normal_vec_BM2(irk_state *state, npy_intp len, double *res, const double loc, const double scale)
-{
-    int err = 0;
+void irk_normal_vec_BM2(irk_state *state, npy_intp len, double *res,
+                        const double loc, const double scale) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream, MKL_INT_MAX, res, loc, scale);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream,
+                        MKL_INT_MAX, res, loc, scale);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream, len, res, loc, scale);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, state->stream, len,
+                      res, loc, scale);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_standard_exponential_vec(irk_state *state, npy_intp len, double *res)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_standard_exponential_vec(irk_state *state, npy_intp len, double *res) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE, state->stream, MKL_INT_MAX, res, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE,
+                           state->stream, MKL_INT_MAX, res, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE,
+                         state->stream, len, res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_exponential_vec(irk_state *state, npy_intp len, double *res, const double scale)
-{
-    int err = 0;
-    const double d_zero = 0.0;
+void irk_exponential_vec(irk_state *state, npy_intp len, double *res,
+                         const double scale) {
+  int err = 0;
+  const double d_zero = 0.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE, state->stream, MKL_INT_MAX, res, d_zero, scale);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE,
+                           state->stream, MKL_INT_MAX, res, d_zero, scale);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE, state->stream, len, res, d_zero, scale);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE,
+                         state->stream, len, res, d_zero, scale);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_standard_cauchy_vec(irk_state *state, npy_intp len, double *res)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_standard_cauchy_vec(irk_state *state, npy_intp len, double *res) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngCauchy(VSL_RNG_METHOD_CAUCHY_ICDF, state->stream, MKL_INT_MAX, res, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngCauchy(VSL_RNG_METHOD_CAUCHY_ICDF, state->stream, MKL_INT_MAX,
+                      res, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngCauchy(VSL_RNG_METHOD_CAUCHY_ICDF, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngCauchy(VSL_RNG_METHOD_CAUCHY_ICDF, state->stream, len, res, d_zero,
+                    d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_standard_gamma_vec(irk_state *state, npy_intp len, double *res, const double shape)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_standard_gamma_vec(irk_state *state, npy_intp len, double *res,
+                            const double shape) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, MKL_INT_MAX, res, shape, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream,
+                     MKL_INT_MAX, res, shape, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res, shape, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res,
+                   shape, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_gamma_vec(irk_state *state, npy_intp len, double *res, const double shape, const double scale)
-{
-    int err = 0;
-    const double d_zero = 0.0;
+void irk_gamma_vec(irk_state *state, npy_intp len, double *res,
+                   const double shape, const double scale) {
+  int err = 0;
+  const double d_zero = 0.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, MKL_INT_MAX, res, shape, d_zero, scale);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream,
+                     MKL_INT_MAX, res, shape, d_zero, scale);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res, shape, d_zero, scale);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res,
+                   shape, d_zero, scale);
+  assert(err == VSL_STATUS_OK);
 }
 
 /*  X ~ Z * (G*(2/df))**-0.5 */
-void irk_standard_t_vec(irk_state *state, npy_intp len, double *res, const double df)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
-    double shape = df / 2;
-    double *sn = nullptr;
-
-    if (len < 1)
-        return;
-
-    while (len > MKL_INT_MAX)
-    {
-        irk_standard_t_vec(state, MKL_INT_MAX, res, df);
-
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+void irk_standard_t_vec(irk_state *state, npy_intp len, double *res,
+                        const double df) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
+  double shape = df / 2;
+  double *sn = nullptr;
 
-    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res, shape, d_zero, 1.0 / shape);
-    assert(err == VSL_STATUS_OK);
+  if (len < 1)
+    return;
 
-    vmdInvSqrt(len, res, res, VML_HA);
+  while (len > MKL_INT_MAX) {
+    irk_standard_t_vec(state, MKL_INT_MAX, res, df);
 
-    sn = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(sn != nullptr);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, sn, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res,
+                   shape, d_zero, 1.0 / shape);
+  assert(err == VSL_STATUS_OK);
+
+  vmdInvSqrt(len, res, res, VML_HA);
+
+  sn = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(sn != nullptr);
 
-    vmdMul(len, res, sn, res, VML_HA);
-    mkl_free(sn);
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, sn,
+                      d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
+
+  vmdMul(len, res, sn, res, VML_HA);
+  mkl_free(sn);
 }
 
 /* chisquare(df) ~ G(df/2, 2) */
-void irk_chisquare_vec(irk_state *state, npy_intp len, double *res, const double df)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_two = 2.0;
-    double shape = 0.5 * df;
+void irk_chisquare_vec(irk_state *state, npy_intp len, double *res,
+                       const double df) {
+  int err = 0;
+  const double d_zero = 0.0, d_two = 2.0;
+  double shape = 0.5 * df;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_chisquare_vec(state, MKL_INT_MAX, res, df);
+  while (len > MKL_INT_MAX) {
+    irk_chisquare_vec(state, MKL_INT_MAX, res, df);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res, shape, d_zero, d_two);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res,
+                   shape, d_zero, d_two);
+  assert(err == VSL_STATUS_OK);
 }
 
 /*    P ~ U^(-1/a) - 1 =  */
-void irk_pareto_vec(irk_state *state, npy_intp len, double *res, const double alp)
-{
-    int err = 0;
-    npy_intp i = 0;
-    const double d_zero = 0.0, d_one = 1.0;
-    double neg_rec_alp = -1.0 / alp;
-
-    if (len < 1)
-        return;
-
-    while (len > MKL_INT_MAX)
-    {
-        irk_pareto_vec(state, MKL_INT_MAX, res, alp);
-
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+void irk_pareto_vec(irk_state *state, npy_intp len, double *res,
+                    const double alp) {
+  int err = 0;
+  npy_intp i = 0;
+  const double d_zero = 0.0, d_one = 1.0;
+  double neg_rec_alp = -1.0 / alp;
 
-    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  if (len < 1)
+    return;
 
-    /* res[i] = pow(res[i], neg_rec_alp) */
-    vmdPowx(len, res, neg_rec_alp, res, VML_HA);
+  while (len > MKL_INT_MAX) {
+    irk_pareto_vec(state, MKL_INT_MAX, res, alp);
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-        res[i] -= 1.0;
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
+
+  err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len,
+                     res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
+
+  /* res[i] = pow(res[i], neg_rec_alp) */
+  vmdPowx(len, res, neg_rec_alp, res, VML_HA);
+
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] -= 1.0;
 }
 
 /*  W ~ E^(1/alp) */
-void irk_weibull_vec(irk_state *state, npy_intp len, double *res, const double alp)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
-    double rec_alp = 1.0 / alp;
+void irk_weibull_vec(irk_state *state, npy_intp len, double *res,
+                     const double alp) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
+  double rec_alp = 1.0 / alp;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_weibull_vec(state, MKL_INT_MAX, res, alp);
+  while (len > MKL_INT_MAX) {
+    irk_weibull_vec(state, MKL_INT_MAX, res, alp);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE,
+                         state->stream, len, res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 
-    vmdPowx(len, res, rec_alp, res, VML_HA);
+  vmdPowx(len, res, rec_alp, res, VML_HA);
 }
 
 /*  pow(1 - exp(-E(1))), 1./a) == pow(U, 1./a) */
-void irk_power_vec(irk_state *state, npy_intp len, double *res, const double alp)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
-    double rec_alp = 1.0 / alp;
+void irk_power_vec(irk_state *state, npy_intp len, double *res,
+                   const double alp) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
+  double rec_alp = 1.0 / alp;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_power_vec(state, MKL_INT_MAX, res, alp);
+  while (len > MKL_INT_MAX) {
+    irk_power_vec(state, MKL_INT_MAX, res, alp);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len,
+                     res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 
-    /* res[i] = pow(res[i], rec_alp) */
-    vmdPowx(len, res, rec_alp, res, VML_HA);
+  /* res[i] = pow(res[i], rec_alp) */
+  vmdPowx(len, res, rec_alp, res, VML_HA);
 }
 
 /*  scale * sqrt(2.0 * E(1))  */
-void irk_rayleigh_vec(irk_state *state, npy_intp len, double *res, const double scale)
-{
-    int err = 0;
-    npy_intp i = 0;
-    const double d_zero = 0.0, d_two = 2.0;
+void irk_rayleigh_vec(irk_state *state, npy_intp len, double *res,
+                      const double scale) {
+  int err = 0;
+  npy_intp i = 0;
+  const double d_zero = 0.0, d_two = 2.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_rayleigh_vec(state, MKL_INT_MAX, res, scale);
+  while (len > MKL_INT_MAX) {
+    irk_rayleigh_vec(state, MKL_INT_MAX, res, scale);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE, state->stream, len, res, d_zero, d_two);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngExponential(VSL_RNG_METHOD_EXPONENTIAL_ICDF_ACCURATE,
+                         state->stream, len, res, d_zero, d_two);
+  assert(err == VSL_STATUS_OK);
 
-    vmdSqrt(len, res, res, VML_HA);
+  vmdSqrt(len, res, res, VML_HA);
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-        res[i] *= scale;
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] *= scale;
 }
 
-void irk_beta_vec(irk_state *state, npy_intp len, double *res, const double a, const double b)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_beta_vec(irk_state *state, npy_intp len, double *res, const double a,
+                  const double b) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngBeta(VSL_RNG_METHOD_BETA_CJA_ACCURATE, state->stream, MKL_INT_MAX, res, a, b, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngBeta(VSL_RNG_METHOD_BETA_CJA_ACCURATE, state->stream,
+                    MKL_INT_MAX, res, a, b, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngBeta(VSL_RNG_METHOD_BETA_CJA_ACCURATE, state->stream, len, res, a, b, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngBeta(VSL_RNG_METHOD_BETA_CJA_ACCURATE, state->stream, len, res, a,
+                  b, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 }
 
 /*  F(df_num, df_den) ~ G( df_num/2, 2/df_num) / G(df_den/2, 2/df_den))  */
-void irk_f_vec(irk_state *state, npy_intp len, double *res, const double df_num, const double df_den)
-{
-    int err = 0;
-    const double d_zero = 0.0;
-    double shape = 0.5 * df_num, scale = 2.0 / df_num;
-    double *den = nullptr;
-
-    if (len < 1)
-        return;
-
-    while (len > MKL_INT_MAX)
-    {
-        irk_f_vec(state, MKL_INT_MAX, res, df_num, df_den);
-
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
-
-    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res, shape, d_zero, scale);
-    assert(err == VSL_STATUS_OK);
-
-    den = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(den != nullptr);
-
-    shape = 0.5 * df_den;
-    scale = 2.0 / df_den;
-    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, den, shape, d_zero, scale);
-    assert(err == VSL_STATUS_OK);
-
-    vmdDiv(len, res, den, res, VML_HA);
-    mkl_free(den);
+void irk_f_vec(irk_state *state, npy_intp len, double *res, const double df_num,
+               const double df_den) {
+  int err = 0;
+  const double d_zero = 0.0;
+  double shape = 0.5 * df_num, scale = 2.0 / df_num;
+  double *den = nullptr;
+
+  if (len < 1)
+    return;
+
+  while (len > MKL_INT_MAX) {
+    irk_f_vec(state, MKL_INT_MAX, res, df_num, df_den);
+
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
+
+  err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res,
+                   shape, d_zero, scale);
+  assert(err == VSL_STATUS_OK);
+
+  den = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(den != nullptr);
+
+  shape = 0.5 * df_den;
+  scale = 2.0 / df_den;
+  err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, den,
+                   shape, d_zero, scale);
+  assert(err == VSL_STATUS_OK);
+
+  vmdDiv(len, res, den, res, VML_HA);
+  mkl_free(den);
 }
 
 /*
    for df > 1, X ~ Chi2(df - 1) + ( sqrt(nonc) + Z)^2
    for df <=1, X ~ Chi2( df + 2*I), where I ~ Poisson( nonc/2.0)
 */
-void irk_noncentral_chisquare_vec(irk_state *state, npy_intp len, double *res, const double df, const double nonc)
-{
-    int err = 0;
-    npy_intp i = 0;
-    const double d_zero = 0.0, d_one = 1.0, d_two = 2.0;
-    double shape, loc;
-
-    if (len < 1)
-        return;
-
-    while (len > MKL_INT_MAX)
-    {
-        irk_noncentral_chisquare_vec(state, MKL_INT_MAX, res, df, nonc);
-
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
-
-    if (df > 1)
-    {
-        double *nvec;
+void irk_noncentral_chisquare_vec(irk_state *state, npy_intp len, double *res,
+                                  const double df, const double nonc) {
+  int err = 0;
+  npy_intp i = 0;
+  const double d_zero = 0.0, d_one = 1.0, d_two = 2.0;
+  double shape, loc;
 
-        shape = 0.5 * (df - 1.0);
-        /* res has chi^2 with (df - 1) */
-        err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len, res, shape, d_zero, d_two);
+  if (len < 1)
+    return;
 
-        nvec = (double *)mkl_malloc(len * sizeof(double), 64);
-        assert(nvec != nullptr);
+  while (len > MKL_INT_MAX) {
+    irk_noncentral_chisquare_vec(state, MKL_INT_MAX, res, df, nonc);
 
-        loc = sqrt(nonc);
-        err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, nvec, loc, d_one);
-        assert(err == VSL_STATUS_OK);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-        /* squaring could result in an overflow */
-        vmdSqr(len, nvec, nvec, VML_HA);
-        vmdAdd(len, res, nvec, res, VML_HA);
+  if (df > 1) {
+    double *nvec;
 
-        mkl_free(nvec);
-    }
-    else
-    {
-        if (df == 0.)
-        {
-            return irk_chisquare_vec(state, len, res, df);
-        }
-        if (df < 1)
-        {
-            /* noncentral_chisquare(df, nonc) ~ G( df/2 + Poisson(nonc/2), 2) */
-            double lambda;
-            int *pvec = (int *)mkl_malloc(len * sizeof(int), 64);
-
-            assert(pvec != nullptr);
-
-            lambda = 0.5 * nonc;
-            err = viRngPoisson(VSL_RNG_METHOD_POISSON_PTPE, state->stream, len, pvec, lambda);
-            assert(err == VSL_STATUS_OK);
-
-            shape = 0.5 * df;
-
-            if (0.125 * len > sqrt(lambda))
-            {
-                int *idx = nullptr;
-                double *tmp = nullptr;
-
-                idx = (int *)mkl_malloc(len * sizeof(int), 64);
-                assert(idx != nullptr);
-
-                DIST_PRAGMA_VECTOR
-                for (i = 0; i < len; ++i)
-                    idx[i] = (int)i;
-
-                std::sort(idx, idx + len, [pvec](int i1, int i2)
-                          { return pvec[i1] < pvec[i2]; });
-                /* idx now contains original indexes of ordered Poisson outputs */
-
-                /* allocate workspace to store samples of gamma, enough to hold entire output */
-                tmp = (double *)mkl_malloc(len * sizeof(double), 64);
-                assert(tmp != nullptr);
-
-                for (i = 0; i < len;)
-                {
-                    int cv = pvec[idx[i]];
-                    npy_intp k = 0, j = 0;
-
-                    for (j = i + 1; (j < len) && (pvec[idx[j]] == cv); ++j)
-                    {
-                    }
-
-                    assert(j > i);
-                    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, j - i, tmp,
-                                     shape + cv, d_zero, d_two);
-                    assert(err == VSL_STATUS_OK);
-
-                    DIST_PRAGMA_VECTOR
-                    for (k = 0; k < j - i; ++k)
-                        res[idx[k + i]] = tmp[k];
-
-                    i = j;
-                }
-
-                mkl_free(tmp);
-                mkl_free(idx);
-            }
-            else
-            {
-
-                for (i = 0; i < len; ++i)
-                {
-                    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, 1,
-                                     res + i, shape + pvec[i], d_zero, d_two);
-                    assert(err == VSL_STATUS_OK);
-                }
-            }
-
-            mkl_free(pvec);
-        }
-        else
-        {
-            /* noncentral_chisquare(1, nonc) ~ (Z + sqrt(nonc))**2 for df == 1 */
-            loc = sqrt(nonc);
-            err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res, loc, d_one);
-            assert(err == VSL_STATUS_OK);
-            /* squaring could result in an overflow */
-            vmdSqr(len, res, res, VML_HA);
-        }
-    }
-}
+    shape = 0.5 * (df - 1.0);
+    /* res has chi^2 with (df - 1) */
+    err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream, len,
+                     res, shape, d_zero, d_two);
 
-void irk_laplace_vec(irk_state *state, npy_intp len, double *res, const double loc, const double scale)
-{
-    int err = 0;
+    nvec = (double *)mkl_malloc(len * sizeof(double), 64);
+    assert(nvec != nullptr);
 
-    if (len < 1)
-        return;
+    loc = sqrt(nonc);
+    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, nvec,
+                        loc, d_one);
+    assert(err == VSL_STATUS_OK);
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngLaplace(VSL_RNG_METHOD_LAPLACE_ICDF, state->stream, MKL_INT_MAX, res, loc, scale);
-        assert(err == VSL_STATUS_OK);
+    /* squaring could result in an overflow */
+    vmdSqr(len, nvec, nvec, VML_HA);
+    vmdAdd(len, res, nvec, res, VML_HA);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
+    mkl_free(nvec);
+  } else {
+    if (df == 0.) {
+      return irk_chisquare_vec(state, len, res, df);
     }
+    if (df < 1) {
+      /* noncentral_chisquare(df, nonc) ~ G( df/2 + Poisson(nonc/2), 2) */
+      double lambda;
+      int *pvec = (int *)mkl_malloc(len * sizeof(int), 64);
 
-    err = vdRngLaplace(VSL_RNG_METHOD_LAPLACE_ICDF, state->stream, len, res, loc, scale);
-    assert(err == VSL_STATUS_OK);
-}
+      assert(pvec != nullptr);
 
-void irk_gumbel_vec(irk_state *state, npy_intp len, double *res, const double loc, const double scale)
-{
-    int err = 0;
+      lambda = 0.5 * nonc;
+      err = viRngPoisson(VSL_RNG_METHOD_POISSON_PTPE, state->stream, len, pvec,
+                         lambda);
+      assert(err == VSL_STATUS_OK);
 
-    if (len < 1)
-        return;
+      shape = 0.5 * df;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGumbel(VSL_RNG_METHOD_GUMBEL_ICDF, state->stream, MKL_INT_MAX, res, loc, scale);
-        assert(err == VSL_STATUS_OK);
+      if (0.125 * len > sqrt(lambda)) {
+        int *idx = nullptr;
+        double *tmp = nullptr;
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+        idx = (int *)mkl_malloc(len * sizeof(int), 64);
+        assert(idx != nullptr);
 
-    err = vdRngGumbel(VSL_RNG_METHOD_GUMBEL_ICDF, state->stream, len, res, loc, scale);
-    assert(err == VSL_STATUS_OK);
-}
+        DIST_PRAGMA_VECTOR
+        for (i = 0; i < len; ++i)
+          idx[i] = (int)i;
 
-/*   Logistic(loc, scale) ~ loc + scale * log(u/(1.0 - u)) */
-void irk_logistic_vec(irk_state *state, npy_intp len, double *res, const double loc, const double scale)
-{
-    int err = 0;
-    npy_intp i = 0;
-    const double d_one = 1.0, d_zero = 0.0;
+        std::sort(idx, idx + len,
+                  [pvec](int i1, int i2) { return pvec[i1] < pvec[i2]; });
+        /* idx now contains original indexes of ordered Poisson outputs */
 
-    if (len < 1)
-        return;
+        /* allocate workspace to store samples of gamma, enough to hold entire
+         * output */
+        tmp = (double *)mkl_malloc(len * sizeof(double), 64);
+        assert(tmp != nullptr);
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_logistic_vec(state, MKL_INT_MAX, res, loc, scale);
+        for (i = 0; i < len;) {
+          int cv = pvec[idx[i]];
+          npy_intp k = 0, j = 0;
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+          for (j = i + 1; (j < len) && (pvec[idx[j]] == cv); ++j) {
+          }
 
-    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+          assert(j > i);
+          err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream,
+                           j - i, tmp, shape + cv, d_zero, d_two);
+          assert(err == VSL_STATUS_OK);
 
-    /* can MKL optimize computation of the logit function  p \mapsto \ln(p/(1-p)) */
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-        res[i] = log(res[i] / (1.0 - res[i]));
+          DIST_PRAGMA_VECTOR
+          for (k = 0; k < j - i; ++k)
+            res[idx[k + i]] = tmp[k];
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-        res[i] = loc + scale * res[i];
-}
+          i = j;
+        }
 
-void irk_lognormal_vec_ICDF(irk_state *state, npy_intp len, double *res, const double mean, const double sigma)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+        mkl_free(tmp);
+        mkl_free(idx);
+      } else {
 
-    if (len < 1)
-        return;
+        for (i = 0; i < len; ++i) {
+          err = vdRngGamma(VSL_RNG_METHOD_GAMMA_GNORM_ACCURATE, state->stream,
+                           1, res + i, shape + pvec[i], d_zero, d_two);
+          assert(err == VSL_STATUS_OK);
+        }
+      }
+
+      mkl_free(pvec);
+    } else {
+      /* noncentral_chisquare(1, nonc) ~ (Z + sqrt(nonc))**2 for df == 1 */
+      loc = sqrt(nonc);
+      err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res,
+                          loc, d_one);
+      assert(err == VSL_STATUS_OK);
+      /* squaring could result in an overflow */
+      vmdSqr(len, res, res, VML_HA);
+    }
+  }
+}
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF_ACCURATE, state->stream, MKL_INT_MAX, res, mean, sigma, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+void irk_laplace_vec(irk_state *state, npy_intp len, double *res,
+                     const double loc, const double scale) {
+  int err = 0;
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  if (len < 1)
+    return;
 
-    err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF_ACCURATE, state->stream, len, res, mean, sigma, d_zero, d_one);
+  while (len > MKL_INT_MAX) {
+    err = vdRngLaplace(VSL_RNG_METHOD_LAPLACE_ICDF, state->stream, MKL_INT_MAX,
+                       res, loc, scale);
     assert(err == VSL_STATUS_OK);
-}
 
-void irk_lognormal_vec_BM(irk_state *state, npy_intp len, double *res, const double mean, const double sigma)
-{
-    int err = 0;
-    const double d_zero = 0.0, d_one = 1.0;
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    if (len < 1)
-        return;
+  err = vdRngLaplace(VSL_RNG_METHOD_LAPLACE_ICDF, state->stream, len, res, loc,
+                     scale);
+  assert(err == VSL_STATUS_OK);
+}
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2_ACCURATE, state->stream, MKL_INT_MAX, res, mean, sigma, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+void irk_gumbel_vec(irk_state *state, npy_intp len, double *res,
+                    const double loc, const double scale) {
+  int err = 0;
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  if (len < 1)
+    return;
 
-    err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2_ACCURATE, state->stream, len, res, mean, sigma, d_zero, d_one);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGumbel(VSL_RNG_METHOD_GUMBEL_ICDF, state->stream, MKL_INT_MAX,
+                      res, loc, scale);
     assert(err == VSL_STATUS_OK);
+
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
+
+  err = vdRngGumbel(VSL_RNG_METHOD_GUMBEL_ICDF, state->stream, len, res, loc,
+                    scale);
+  assert(err == VSL_STATUS_OK);
 }
 
-/* direct transformation method */
-void irk_wald_vec(irk_state *state, npy_intp len, double *res, const double mean, const double scale)
-{
-    int err = 0;
-    npy_intp i = 0;
-    const double d_zero = 0., d_one = 1.0;
-    double *uvec = nullptr;
-    double gsc = sqrt(0.5 * mean / scale);
-
-    if (len < 1)
-        return;
-
-    while (len > MKL_INT_MAX)
-    {
-        irk_wald_vec(state, MKL_INT_MAX, res, mean, scale);
-
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+/*   Logistic(loc, scale) ~ loc + scale * log(u/(1.0 - u)) */
+void irk_logistic_vec(irk_state *state, npy_intp len, double *res,
+                      const double loc, const double scale) {
+  int err = 0;
+  npy_intp i = 0;
+  const double d_one = 1.0, d_zero = 0.0;
+
+  if (len < 1)
+    return;
+
+  while (len > MKL_INT_MAX) {
+    irk_logistic_vec(state, MKL_INT_MAX, res, loc, scale);
+
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
+
+  err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len,
+                     res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
+
+  /* can MKL optimize computation of the logit function  p \mapsto \ln(p/(1-p))
+   */
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = log(res[i] / (1.0 - res[i]));
+
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = loc + scale * res[i];
+}
 
-    err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res, d_zero, gsc);
+void irk_lognormal_vec_ICDF(irk_state *state, npy_intp len, double *res,
+                            const double mean, const double sigma) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
+
+  if (len < 1)
+    return;
+
+  while (len > MKL_INT_MAX) {
+    err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF_ACCURATE, state->stream,
+                         MKL_INT_MAX, res, mean, sigma, d_zero, d_one);
     assert(err == VSL_STATUS_OK);
 
-    /* Y = mean/(2 scale) * Z^2 */
-    vmdSqr(len, res, res, VML_HA);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-    {
-        if (res[i] <= 2.0)
-        {
-            res[i] = 1.0 + res[i] + sqrt(res[i] * (res[i] + 2.0));
-        }
-        else
-        {
-            res[i] = 1.0 + res[i] * (1.0 + sqrt(1.0 + 2.0 / res[i]));
-        }
-    }
+  err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF_ACCURATE, state->stream,
+                       len, res, mean, sigma, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
+}
+
+void irk_lognormal_vec_BM(irk_state *state, npy_intp len, double *res,
+                          const double mean, const double sigma) {
+  int err = 0;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    uvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(uvec != nullptr);
+  if (len < 1)
+    return;
 
-    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len, uvec, d_zero, d_one);
+  while (len > MKL_INT_MAX) {
+    err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2_ACCURATE,
+                         state->stream, MKL_INT_MAX, res, mean, sigma, d_zero,
+                         d_one);
     assert(err == VSL_STATUS_OK);
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-    {
-        if (uvec[i] * (1.0 + res[i]) <= res[i])
-            res[i] = mean / res[i];
-        else
-            res[i] = mean * res[i];
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
+
+  err = vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2_ACCURATE,
+                       state->stream, len, res, mean, sigma, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
+}
+
+/* direct transformation method */
+void irk_wald_vec(irk_state *state, npy_intp len, double *res,
+                  const double mean, const double scale) {
+  int err = 0;
+  npy_intp i = 0;
+  const double d_zero = 0., d_one = 1.0;
+  double *uvec = nullptr;
+  double gsc = sqrt(0.5 * mean / scale);
+
+  if (len < 1)
+    return;
+
+  while (len > MKL_INT_MAX) {
+    irk_wald_vec(state, MKL_INT_MAX, res, mean, scale);
+
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
+
+  err = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, state->stream, len, res,
+                      d_zero, gsc);
+  assert(err == VSL_STATUS_OK);
+
+  /* Y = mean/(2 scale) * Z^2 */
+  vmdSqr(len, res, res, VML_HA);
+
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i) {
+    if (res[i] <= 2.0) {
+      res[i] = 1.0 + res[i] + sqrt(res[i] * (res[i] + 2.0));
+    } else {
+      res[i] = 1.0 + res[i] * (1.0 + sqrt(1.0 + 2.0 / res[i]));
+    }
+  }
+
+  uvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(uvec != nullptr);
+
+  err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len,
+                     uvec, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
+
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i) {
+    if (uvec[i] * (1.0 + res[i]) <= res[i])
+      res[i] = mean / res[i];
+    else
+      res[i] = mean * res[i];
+  }
 
-    mkl_free(uvec);
+  mkl_free(uvec);
 }
 
 #ifndef M_PI
@@ -858,1200 +863,1156 @@ void irk_wald_vec(irk_state *state, npy_intp len, double *res, const double mean
    http://cg.scs.carleton.ca/~luc/rnbookindex.html
    (but corrected to match the algorithm in R and Python)
 */
-static void
-irk_vonmises_vec_small_kappa(irk_state *state, npy_intp len, double *res, const double mu, const double kappa)
-{
-    int err = 0;
-    npy_intp n = 0, i = 0, size = 0;
-    double rho_over_kappa, rho, r, s_kappa, Z, W, Y, V;
-    double *Uvec = nullptr, *Vvec = nullptr;
-    float *VFvec = nullptr;
-    const double d_zero = 0.0, d_one = 1.0;
-
-    assert(0. < kappa <= 1.0);
-
-    r = 1 + sqrt(1 + 4 * kappa * kappa);
-    rho_over_kappa = (2) / (r + sqrt(2 * r));
-    rho = rho_over_kappa * kappa;
-
-    /* s times kappa */
-    s_kappa = (1 + rho * rho) / (2 * rho_over_kappa);
-
-    Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Uvec != nullptr);
-    Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Vvec != nullptr);
-
-    for (n = 0; n < len;)
-    {
-        size = len - n;
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, size, Uvec, d_zero, M_PI);
-        assert(err == VSL_STATUS_OK);
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, size, Vvec, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
-
-        for (i = 0; i < size; ++i)
-        {
-            Z = cos(Uvec[i]);
-            V = Vvec[i];
-            W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
-            Y = s_kappa - kappa * W;
-            if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y))
-            {
-                res[n++] = acos(W);
-            }
-        }
+static void irk_vonmises_vec_small_kappa(irk_state *state, npy_intp len,
+                                         double *res, const double mu,
+                                         const double kappa) {
+  int err = 0;
+  npy_intp n = 0, i = 0, size = 0;
+  double rho_over_kappa, rho, r, s_kappa, Z, W, Y, V;
+  double *Uvec = nullptr, *Vvec = nullptr;
+  float *VFvec = nullptr;
+  const double d_zero = 0.0, d_one = 1.0;
+
+  assert(0. < kappa <= 1.0);
+
+  r = 1 + sqrt(1 + 4 * kappa * kappa);
+  rho_over_kappa = (2) / (r + sqrt(2 * r));
+  rho = rho_over_kappa * kappa;
+
+  /* s times kappa */
+  s_kappa = (1 + rho * rho) / (2 * rho_over_kappa);
+
+  Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Uvec != nullptr);
+  Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Vvec != nullptr);
+
+  for (n = 0; n < len;) {
+    size = len - n;
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, size, Uvec,
+                       d_zero, M_PI);
+    assert(err == VSL_STATUS_OK);
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, size,
+                       Vvec, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
+
+    for (i = 0; i < size; ++i) {
+      Z = cos(Uvec[i]);
+      V = Vvec[i];
+      W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
+      Y = s_kappa - kappa * W;
+      if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y)) {
+        res[n++] = acos(W);
+      }
     }
+  }
 
-    mkl_free(Uvec);
+  mkl_free(Uvec);
 
-    VFvec = (float *)Vvec;
-    err = vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, VFvec, (float)d_zero, (float)d_one);
-    assert(err == VSL_STATUS_OK);
+  VFvec = (float *)Vvec;
+  err = vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, VFvec,
+                     (float)d_zero, (float)d_one);
+  assert(err == VSL_STATUS_OK);
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-    {
-        double mod, resi;
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i) {
+    double mod, resi;
 
-        resi = (VFvec[i] < 0.5) ? mu - res[i] : mu + res[i];
-        mod = fabs(resi);
-        mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI);
-        res[i] = (resi < 0) ? -mod : mod;
-    }
+    resi = (VFvec[i] < 0.5) ? mu - res[i] : mu + res[i];
+    mod = fabs(resi);
+    mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI);
+    res[i] = (resi < 0) ? -mod : mod;
+  }
 
-    mkl_free(Vvec);
+  mkl_free(Vvec);
 }
 
-static void
-irk_vonmises_vec_large_kappa(irk_state *state, npy_intp len, double *res, const double mu, const double kappa)
-{
-    int err = 0;
-    npy_intp i = 0, n = 0, size = 0;
-    double r_over_two_kappa, recip_two_kappa;
-    double s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one;
-    double *Uvec = nullptr, *Vvec = nullptr;
-    float *VFvec = nullptr;
-    const double d_zero = 0.0, d_one = 1.0;
-
-    assert(kappa > 1.0);
-
-    recip_two_kappa = 1 / (2 * kappa);
-
-    /* variables here are dwindling to zero as kappa grows */
-    hpt = sqrt(1 + recip_two_kappa * recip_two_kappa);
-    r_over_two_kappa_minus_one = recip_two_kappa * (1 + recip_two_kappa / (1 + hpt));
-    r_over_two_kappa = 1 + r_over_two_kappa_minus_one;
-    rho_minus_one = r_over_two_kappa_minus_one - sqrt(2 * r_over_two_kappa * recip_two_kappa);
-    s_minus_one = rho_minus_one * (0.5 * rho_minus_one / (1 + rho_minus_one));
-
-    Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Uvec != nullptr);
-    Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Vvec != nullptr);
-
-    for (n = 0; n < len;)
-    {
-        size = len - n;
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, size, Uvec, d_zero, 0.5 * M_PI);
-        assert(err == VSL_STATUS_OK);
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, size, Vvec, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
-
-        for (i = 0; i < size; ++i)
-        {
-            double sn, cn, sn2, cn2;
-            double neg_W_minus_one, V, Y;
-
-            sn = sin(Uvec[i]);
-            cn = cos(Uvec[i]);
-            V = Vvec[i];
-            sn2 = sn * sn;
-            cn2 = cn * cn;
-
-            neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
-            Y = kappa * (s_minus_one + neg_W_minus_one);
-
-            if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y))
-            {
-                Y = neg_W_minus_one * (2 - neg_W_minus_one);
-                if (Y < 0)
-                    Y = 0.;
-                else if (Y > 1.0)
-                    Y = 1.0;
-
-                res[n++] = asin(sqrt(Y));
-            }
-        }
-    }
+static void irk_vonmises_vec_large_kappa(irk_state *state, npy_intp len,
+                                         double *res, const double mu,
+                                         const double kappa) {
+  int err = 0;
+  npy_intp i = 0, n = 0, size = 0;
+  double r_over_two_kappa, recip_two_kappa;
+  double s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one;
+  double *Uvec = nullptr, *Vvec = nullptr;
+  float *VFvec = nullptr;
+  const double d_zero = 0.0, d_one = 1.0;
+
+  assert(kappa > 1.0);
+
+  recip_two_kappa = 1 / (2 * kappa);
+
+  /* variables here are dwindling to zero as kappa grows */
+  hpt = sqrt(1 + recip_two_kappa * recip_two_kappa);
+  r_over_two_kappa_minus_one =
+      recip_two_kappa * (1 + recip_two_kappa / (1 + hpt));
+  r_over_two_kappa = 1 + r_over_two_kappa_minus_one;
+  rho_minus_one =
+      r_over_two_kappa_minus_one - sqrt(2 * r_over_two_kappa * recip_two_kappa);
+  s_minus_one = rho_minus_one * (0.5 * rho_minus_one / (1 + rho_minus_one));
+
+  Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Uvec != nullptr);
+  Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Vvec != nullptr);
+
+  for (n = 0; n < len;) {
+    size = len - n;
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, size, Uvec,
+                       d_zero, 0.5 * M_PI);
+    assert(err == VSL_STATUS_OK);
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, size,
+                       Vvec, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-    mkl_free(Uvec);
+    for (i = 0; i < size; ++i) {
+      double sn, cn, sn2, cn2;
+      double neg_W_minus_one, V, Y;
 
-    VFvec = (float *)Vvec;
-    err = vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, VFvec, (float)d_zero, (float)d_one);
-    assert(err == VSL_STATUS_OK);
+      sn = sin(Uvec[i]);
+      cn = cos(Uvec[i]);
+      V = Vvec[i];
+      sn2 = sn * sn;
+      cn2 = cn * cn;
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-    {
-        double mod, resi;
+      neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
+      Y = kappa * (s_minus_one + neg_W_minus_one);
+
+      if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y)) {
+        Y = neg_W_minus_one * (2 - neg_W_minus_one);
+        if (Y < 0)
+          Y = 0.;
+        else if (Y > 1.0)
+          Y = 1.0;
 
-        resi = (VFvec[i] < 0.5) ? mu - res[i] : mu + res[i];
-        mod = fabs(resi);
-        mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI);
-        res[i] = (resi < 0) ? -mod : mod;
+        res[n++] = asin(sqrt(Y));
+      }
     }
+  }
 
-    mkl_free(Vvec);
+  mkl_free(Uvec);
+
+  VFvec = (float *)Vvec;
+  err = vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, VFvec,
+                     (float)d_zero, (float)d_one);
+  assert(err == VSL_STATUS_OK);
+
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i) {
+    double mod, resi;
+
+    resi = (VFvec[i] < 0.5) ? mu - res[i] : mu + res[i];
+    mod = fabs(resi);
+    mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI);
+    res[i] = (resi < 0) ? -mod : mod;
+  }
+
+  mkl_free(Vvec);
 }
 
-void irk_vonmises_vec(irk_state *state, npy_intp len, double *res, const double mu, const double kappa)
-{
-    if (len < 1)
-        return;
+void irk_vonmises_vec(irk_state *state, npy_intp len, double *res,
+                      const double mu, const double kappa) {
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_vonmises_vec(state, MKL_INT_MAX, res, mu, kappa);
+  while (len > MKL_INT_MAX) {
+    irk_vonmises_vec(state, MKL_INT_MAX, res, mu, kappa);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    if (kappa > 1.0)
-        irk_vonmises_vec_large_kappa(state, len, res, mu, kappa);
-    else
-        irk_vonmises_vec_small_kappa(state, len, res, mu, kappa);
+  if (kappa > 1.0)
+    irk_vonmises_vec_large_kappa(state, len, res, mu, kappa);
+  else
+    irk_vonmises_vec_small_kappa(state, len, res, mu, kappa);
 }
 
-void irk_noncentral_f_vec(irk_state *state, npy_intp len, double *res, const double df_num, const double df_den, const double nonc)
-{
-    npy_intp i;
-    double *den = nullptr, fctr;
+void irk_noncentral_f_vec(irk_state *state, npy_intp len, double *res,
+                          const double df_num, const double df_den,
+                          const double nonc) {
+  npy_intp i;
+  double *den = nullptr, fctr;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    if (nonc == 0.)
-        return irk_f_vec(state, len, res, df_num, df_den);
+  if (nonc == 0.)
+    return irk_f_vec(state, len, res, df_num, df_den);
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_noncentral_f_vec(state, MKL_INT_MAX, res, df_num, df_den, nonc);
+  while (len > MKL_INT_MAX) {
+    irk_noncentral_f_vec(state, MKL_INT_MAX, res, df_num, df_den, nonc);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    irk_noncentral_chisquare_vec(state, len, res, df_num, nonc);
+  irk_noncentral_chisquare_vec(state, len, res, df_num, nonc);
 
-    den = (double *)mkl_malloc(len * sizeof(double), 64);
+  den = (double *)mkl_malloc(len * sizeof(double), 64);
 
-    if (den == nullptr)
-        return;
+  if (den == nullptr)
+    return;
 
-    irk_noncentral_chisquare_vec(state, len, den, df_den, nonc);
+  irk_noncentral_chisquare_vec(state, len, den, df_den, nonc);
 
-    vmdDiv(len, res, den, res, VML_HA);
+  vmdDiv(len, res, den, res, VML_HA);
 
-    mkl_free(den);
-    fctr = df_den / df_num;
+  mkl_free(den);
+  fctr = df_den / df_num;
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-        res[i] *= fctr;
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] *= fctr;
 }
 
-void irk_triangular_vec(irk_state *state, npy_intp len, double *res, const double x_min, const double x_mode, const double x_max)
-{
-    int err = 0;
-    npy_intp i = 0;
-    const double d_zero = 0.0, d_one = 1.0;
-    double ratio, lpr, rpr;
+void irk_triangular_vec(irk_state *state, npy_intp len, double *res,
+                        const double x_min, const double x_mode,
+                        const double x_max) {
+  int err = 0;
+  npy_intp i = 0;
+  const double d_zero = 0.0, d_one = 1.0;
+  double ratio, lpr, rpr;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_triangular_vec(state, MKL_INT_MAX, res, x_min, x_mode, x_max);
+  while (len > MKL_INT_MAX) {
+    irk_triangular_vec(state, MKL_INT_MAX, res, x_min, x_mode, x_max);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len, res, d_zero, d_one);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, len,
+                     res, d_zero, d_one);
+  assert(err == VSL_STATUS_OK);
 
-    {
-        double wtot, wl, wr;
+  {
+    double wtot, wl, wr;
 
-        wtot = x_max - x_min;
-        wl = x_mode - x_min;
-        wr = x_max - x_mode;
+    wtot = x_max - x_min;
+    wl = x_mode - x_min;
+    wr = x_max - x_mode;
 
-        ratio = wl / wtot;
-        lpr = wl * wtot;
-        rpr = wr * wtot;
-    }
+    ratio = wl / wtot;
+    lpr = wl * wtot;
+    rpr = wr * wtot;
+  }
 
-    assert(0 <= ratio && ratio <= 1);
+  assert(0 <= ratio && ratio <= 1);
 
-    if (ratio <= 0)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-        {
-            /* U and 1 - U are equal in distribution */
-            res[i] = x_max - sqrt(res[i] * rpr);
-        }
+  if (ratio <= 0) {
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i) {
+      /* U and 1 - U are equal in distribution */
+      res[i] = x_max - sqrt(res[i] * rpr);
     }
-    else if (ratio >= 1)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-        {
-            res[i] = x_min + sqrt(res[i] * lpr);
-        }
+  } else if (ratio >= 1) {
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i) {
+      res[i] = x_min + sqrt(res[i] * lpr);
     }
-    else
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-        {
-            double ui = res[i];
-            res[i] = (ui > ratio) ? x_max - sqrt((1.0 - ui) * rpr) : x_min + sqrt(ui * lpr);
-        }
+  } else {
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i) {
+      double ui = res[i];
+      res[i] = (ui > ratio) ? x_max - sqrt((1.0 - ui) * rpr)
+                            : x_min + sqrt(ui * lpr);
     }
+  }
 }
 
-void irk_binomial_vec(irk_state *state, npy_intp len, int *res, const int n, const double p)
-{
-    int err = 0;
+void irk_binomial_vec(irk_state *state, npy_intp len, int *res, const int n,
+                      const double p) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    if (n == 0)
-    {
-        memset(res, 0, len * sizeof(int));
+  if (n == 0) {
+    memset(res, 0, len * sizeof(int));
+  } else {
+    while (len > MKL_INT_MAX) {
+      err = viRngBinomial(VSL_RNG_METHOD_BINOMIAL_BTPE, state->stream,
+                          MKL_INT_MAX, res, n, p);
+      assert(err == VSL_STATUS_OK);
+      res += MKL_INT_MAX;
+      len -= MKL_INT_MAX;
     }
-    else
-    {
-        while (len > MKL_INT_MAX)
-        {
-            err = viRngBinomial(VSL_RNG_METHOD_BINOMIAL_BTPE, state->stream, MKL_INT_MAX, res, n, p);
-            assert(err == VSL_STATUS_OK);
-            res += MKL_INT_MAX;
-            len -= MKL_INT_MAX;
-        }
 
-        err = viRngBinomial(VSL_RNG_METHOD_BINOMIAL_BTPE, state->stream, len, res, n, p);
-        assert(err == VSL_STATUS_OK);
-    }
+    err = viRngBinomial(VSL_RNG_METHOD_BINOMIAL_BTPE, state->stream, len, res,
+                        n, p);
+    assert(err == VSL_STATUS_OK);
+  }
 }
 
-void irk_multinomial_vec(irk_state *state, npy_intp len, int *res, const int n, const int k, const double *pvec)
-{
-    int err = 0;
-
-    if (len < 1)
-        return;
+void irk_multinomial_vec(irk_state *state, npy_intp len, int *res, const int n,
+                         const int k, const double *pvec) {
+  int err = 0;
 
-    if (n == 0)
-    {
-        memset(res, 0, len * k * sizeof(int));
-    }
-    else
-    {
-        while (len > MKL_INT_MAX)
-        {
-            err = viRngMultinomial(VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, state->stream, MKL_INT_MAX, res, n, k, pvec);
-            assert(err == VSL_STATUS_OK);
-            res += k * MKL_INT_MAX;
-            len -= k * MKL_INT_MAX;
-        }
+  if (len < 1)
+    return;
 
-        err = viRngMultinomial(VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, state->stream, len, res, n, k, pvec);
-        assert(err == VSL_STATUS_OK);
+  if (n == 0) {
+    memset(res, 0, len * k * sizeof(int));
+  } else {
+    while (len > MKL_INT_MAX) {
+      err = viRngMultinomial(VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON,
+                             state->stream, MKL_INT_MAX, res, n, k, pvec);
+      assert(err == VSL_STATUS_OK);
+      res += k * MKL_INT_MAX;
+      len -= k * MKL_INT_MAX;
     }
-}
-
-void irk_geometric_vec(irk_state *state, npy_intp len, int *res, const double p)
-{
-    int err = 0;
-
-    if (len < 1)
-        return;
-
-    if ((0.0 < p) && (p < 1.0))
-    {
-        while (len > MKL_INT_MAX)
-        {
-            err = viRngGeometric(VSL_RNG_METHOD_GEOMETRIC_ICDF, state->stream, MKL_INT_MAX, res, p);
-            assert(err == VSL_STATUS_OK);
-
-            res += MKL_INT_MAX;
-            len -= MKL_INT_MAX;
-        }
 
-        err = viRngGeometric(VSL_RNG_METHOD_GEOMETRIC_ICDF, state->stream, len, res, p);
-        assert(err == VSL_STATUS_OK);
-    }
-    else
-    {
-        if (p == 1.0)
-        {
-            npy_intp i;
-            for (i = 0; i < len; ++i)
-                res[i] = 0;
-        }
-        else
-        {
-            assert(p >= 0.0);
-            assert(p <= 1.0);
-        }
-    }
+    err = viRngMultinomial(VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON,
+                           state->stream, len, res, n, k, pvec);
+    assert(err == VSL_STATUS_OK);
+  }
 }
 
-void irk_negbinomial_vec(irk_state *state, npy_intp len, int *res, const double a, const double p)
-{
-    int err = 0;
+void irk_geometric_vec(irk_state *state, npy_intp len, int *res,
+                       const double p) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = viRngNegbinomial(VSL_RNG_METHOD_NEGBINOMIAL_NBAR, state->stream, MKL_INT_MAX, res, a, p);
-        assert(err == VSL_STATUS_OK);
+  if ((0.0 < p) && (p < 1.0)) {
+    while (len > MKL_INT_MAX) {
+      err = viRngGeometric(VSL_RNG_METHOD_GEOMETRIC_ICDF, state->stream,
+                           MKL_INT_MAX, res, p);
+      assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
+      res += MKL_INT_MAX;
+      len -= MKL_INT_MAX;
     }
 
-    err = viRngNegbinomial(VSL_RNG_METHOD_NEGBINOMIAL_NBAR, state->stream, len, res, a, p);
+    err = viRngGeometric(VSL_RNG_METHOD_GEOMETRIC_ICDF, state->stream, len, res,
+                         p);
     assert(err == VSL_STATUS_OK);
+  } else {
+    if (p == 1.0) {
+      npy_intp i;
+      for (i = 0; i < len; ++i)
+        res[i] = 0;
+    } else {
+      assert(p >= 0.0);
+      assert(p <= 1.0);
+    }
+  }
 }
 
-void irk_hypergeometric_vec(irk_state *state, npy_intp len, int *res, const int lot_s,
-                            const int sampling_s, const int marked_s)
-{
-    int err = 0;
+void irk_negbinomial_vec(irk_state *state, npy_intp len, int *res,
+                         const double a, const double p) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = viRngHypergeometric(VSL_RNG_METHOD_HYPERGEOMETRIC_H2PE, state->stream, MKL_INT_MAX, res,
-                                  lot_s, sampling_s, marked_s);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = viRngNegbinomial(VSL_RNG_METHOD_NEGBINOMIAL_NBAR, state->stream,
+                           MKL_INT_MAX, res, a, p);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = viRngHypergeometric(VSL_RNG_METHOD_HYPERGEOMETRIC_H2PE, state->stream, len, res,
-                              lot_s, sampling_s, marked_s);
-    assert(err == VSL_STATUS_OK);
+  err = viRngNegbinomial(VSL_RNG_METHOD_NEGBINOMIAL_NBAR, state->stream, len,
+                         res, a, p);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_poisson_vec_PTPE(irk_state *state, npy_intp len, int *res, const double lambda)
-{
-    int err = 0;
-
-    if (len < 1)
-        return;
+void irk_hypergeometric_vec(irk_state *state, npy_intp len, int *res,
+                            const int lot_s, const int sampling_s,
+                            const int marked_s) {
+  int err = 0;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = viRngPoisson(VSL_RNG_METHOD_POISSON_PTPE, state->stream, MKL_INT_MAX, res, lambda);
-        assert(err == VSL_STATUS_OK);
+  if (len < 1)
+    return;
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
-
-    err = viRngPoisson(VSL_RNG_METHOD_POISSON_PTPE, state->stream, len, res, lambda);
+  while (len > MKL_INT_MAX) {
+    err = viRngHypergeometric(VSL_RNG_METHOD_HYPERGEOMETRIC_H2PE, state->stream,
+                              MKL_INT_MAX, res, lot_s, sampling_s, marked_s);
     assert(err == VSL_STATUS_OK);
-}
 
-void irk_poisson_vec_POISNORM(irk_state *state, npy_intp len, int *res, const double lambda)
-{
-    int err = 0;
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    if (len < 1)
-        return;
+  err = viRngHypergeometric(VSL_RNG_METHOD_HYPERGEOMETRIC_H2PE, state->stream,
+                            len, res, lot_s, sampling_s, marked_s);
+  assert(err == VSL_STATUS_OK);
+}
 
-    while (len > MKL_INT_MAX)
-    {
-        err = viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, state->stream, MKL_INT_MAX, res, lambda);
-        assert(err == VSL_STATUS_OK);
+void irk_poisson_vec_PTPE(irk_state *state, npy_intp len, int *res,
+                          const double lambda) {
+  int err = 0;
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  if (len < 1)
+    return;
 
-    err = viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, state->stream, len, res, lambda);
+  while (len > MKL_INT_MAX) {
+    err = viRngPoisson(VSL_RNG_METHOD_POISSON_PTPE, state->stream, MKL_INT_MAX,
+                       res, lambda);
     assert(err == VSL_STATUS_OK);
-}
 
-void irk_poisson_vec_V(irk_state *state, npy_intp len, int *res, double *lambdas)
-{
-    int err = 0;
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    if (len < 1)
-        return;
+  err = viRngPoisson(VSL_RNG_METHOD_POISSON_PTPE, state->stream, len, res,
+                     lambda);
+  assert(err == VSL_STATUS_OK);
+}
 
-    while (len > MKL_INT_MAX)
-    {
-        err = viRngPoissonV(VSL_RNG_METHOD_POISSONV_POISNORM, state->stream, MKL_INT_MAX, res, lambdas);
-        assert(err == VSL_STATUS_OK);
+void irk_poisson_vec_POISNORM(irk_state *state, npy_intp len, int *res,
+                              const double lambda) {
+  int err = 0;
 
-        res += MKL_INT_MAX;
-        lambdas += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  if (len < 1)
+    return;
 
-    err = viRngPoissonV(VSL_RNG_METHOD_POISSONV_POISNORM, state->stream, len, res, lambdas);
+  while (len > MKL_INT_MAX) {
+    err = viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, state->stream,
+                       MKL_INT_MAX, res, lambda);
     assert(err == VSL_STATUS_OK);
-}
 
-void irk_zipf_long_vec(irk_state *state, npy_intp len, long *res, const double a)
-{
-    int err = 0;
-    npy_intp i = 0, n_accepted = 0, batch_size = 0;
-    double T, U, V, am1, b;
-    double *Uvec = nullptr, *Vvec = nullptr;
-    long X;
-    const double d_zero = 0.0, d_one = 1.0;
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    if (len < 1)
-        return;
+  err = viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, state->stream, len, res,
+                     lambda);
+  assert(err == VSL_STATUS_OK);
+}
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_zipf_long_vec(state, MKL_INT_MAX, res, a);
+void irk_poisson_vec_V(irk_state *state, npy_intp len, int *res,
+                       double *lambdas) {
+  int err = 0;
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  if (len < 1)
+    return;
 
-    am1 = a - d_one;
-    b = pow(2.0, am1);
+  while (len > MKL_INT_MAX) {
+    err = viRngPoissonV(VSL_RNG_METHOD_POISSONV_POISNORM, state->stream,
+                        MKL_INT_MAX, res, lambdas);
+    assert(err == VSL_STATUS_OK);
 
-    Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Uvec != nullptr);
-    Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Vvec != nullptr);
+    res += MKL_INT_MAX;
+    lambdas += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    for (n_accepted = 0; n_accepted < len;)
-    {
-        batch_size = len - n_accepted;
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, batch_size, Uvec, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, batch_size, Vvec, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  err = viRngPoissonV(VSL_RNG_METHOD_POISSONV_POISNORM, state->stream, len, res,
+                      lambdas);
+  assert(err == VSL_STATUS_OK);
+}
 
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < batch_size; ++i)
-        {
-            U = d_one - Uvec[i];
-            V = Vvec[i];
-            X = (long)floor(pow(U, (-1.0) / am1));
-            /* The real result may be above what can be represented in a signed
-             * long. It will get casted to -sys.maxint-1. Since this is
-             * a straightforward rejection algorithm, we can just reject this value
-             * in the rejection condition below. This function then models a Zipf
-             * distribution truncated to sys.maxint.
-             */
-            T = pow(d_one + d_one / X, am1);
-            if ((X > 0) && ((V * X) * (T - d_one) / (b - d_one) <= T / b))
-            {
-                res[n_accepted++] = X;
-            }
-        }
-    }
+void irk_zipf_long_vec(irk_state *state, npy_intp len, long *res,
+                       const double a) {
+  int err = 0;
+  npy_intp i = 0, n_accepted = 0, batch_size = 0;
+  double T, U, V, am1, b;
+  double *Uvec = nullptr, *Vvec = nullptr;
+  long X;
+  const double d_zero = 0.0, d_one = 1.0;
+
+  if (len < 1)
+    return;
+
+  while (len > MKL_INT_MAX) {
+    irk_zipf_long_vec(state, MKL_INT_MAX, res, a);
+
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
+
+  am1 = a - d_one;
+  b = pow(2.0, am1);
+
+  Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Uvec != nullptr);
+  Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Vvec != nullptr);
+
+  for (n_accepted = 0; n_accepted < len;) {
+    batch_size = len - n_accepted;
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream,
+                       batch_size, Uvec, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, batch_size,
+                       Vvec, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-    mkl_free(Vvec);
-    mkl_free(Uvec);
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < batch_size; ++i) {
+      U = d_one - Uvec[i];
+      V = Vvec[i];
+      X = (long)floor(pow(U, (-1.0) / am1));
+      /* The real result may be above what can be represented in a signed
+       * long. It will get casted to -sys.maxint-1. Since this is
+       * a straightforward rejection algorithm, we can just reject this value
+       * in the rejection condition below. This function then models a Zipf
+       * distribution truncated to sys.maxint.
+       */
+      T = pow(d_one + d_one / X, am1);
+      if ((X > 0) && ((V * X) * (T - d_one) / (b - d_one) <= T / b)) {
+        res[n_accepted++] = X;
+      }
+    }
+  }
+
+  mkl_free(Vvec);
+  mkl_free(Uvec);
 }
 
-void irk_logseries_vec(irk_state *state, npy_intp len, int *res, const double theta)
-{
-    int err = 0;
-    npy_intp i = 0, n_accepted = 0, batch_size = 0;
-    double q, r, V;
-    double *Uvec = nullptr, *Vvec = nullptr;
-    int result;
-    const double d_zero = 0.0, d_one = 1.0;
+void irk_logseries_vec(irk_state *state, npy_intp len, int *res,
+                       const double theta) {
+  int err = 0;
+  npy_intp i = 0, n_accepted = 0, batch_size = 0;
+  double q, r, V;
+  double *Uvec = nullptr, *Vvec = nullptr;
+  int result;
+  const double d_zero = 0.0, d_one = 1.0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_logseries_vec(state, MKL_INT_MAX, res, theta);
+  while (len > MKL_INT_MAX) {
+    irk_logseries_vec(state, MKL_INT_MAX, res, theta);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    r = log(d_one - theta);
+  r = log(d_one - theta);
 
-    Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Uvec != nullptr);
-    Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
-    assert(Vvec != nullptr);
+  Uvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Uvec != nullptr);
+  Vvec = (double *)mkl_malloc(len * sizeof(double), 64);
+  assert(Vvec != nullptr);
 
-    for (n_accepted = 0; n_accepted < len;)
-    {
-        batch_size = len - n_accepted;
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, batch_size, Uvec, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
-        err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream, batch_size, Vvec, d_zero, d_one);
-        assert(err == VSL_STATUS_OK);
+  for (n_accepted = 0; n_accepted < len;) {
+    batch_size = len - n_accepted;
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, batch_size,
+                       Uvec, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
+    err = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, state->stream,
+                       batch_size, Vvec, d_zero, d_one);
+    assert(err == VSL_STATUS_OK);
 
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < batch_size; ++i)
-        {
-            V = Vvec[i];
-            if (V >= theta)
-            {
-                res[n_accepted++] = 1;
-            }
-            else
-            {
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < batch_size; ++i) {
+      V = Vvec[i];
+      if (V >= theta) {
+        res[n_accepted++] = 1;
+      } else {
 #if __cplusplus > 199711L
-                q = -expm1(r * Uvec[i]);
+        q = -expm1(r * Uvec[i]);
 #else
-                /*  exp(x) - 1 == 2 * exp(x/2) * sinh(x/2)  */
-                q = r * Uvec[i];
-                if (q > 1.)
-                {
-                    q = 1.0 - exp(q);
-                }
-                else
-                {
-                    q = 0.5 * q;
-                    q = -2.0 * exp(q) * sinh(q);
-                }
+        /*  exp(x) - 1 == 2 * exp(x/2) * sinh(x/2)  */
+        q = r * Uvec[i];
+        if (q > 1.) {
+          q = 1.0 - exp(q);
+        } else {
+          q = 0.5 * q;
+          q = -2.0 * exp(q) * sinh(q);
+        }
 #endif
-                if (V <= q * q)
-                {
-                    result = (int)floor(1 + log(V) / log(q));
-                    if (result > 0)
-                    {
-                        res[n_accepted++] = result;
-                    }
-                }
-                else
-                {
-                    res[n_accepted++] = (V < q) ? 2 : 1;
-                }
-            }
+        if (V <= q * q) {
+          result = (int)floor(1 + log(V) / log(q));
+          if (result > 0) {
+            res[n_accepted++] = result;
+          }
+        } else {
+          res[n_accepted++] = (V < q) ? 2 : 1;
         }
+      }
     }
+  }
 
-    mkl_free(Vvec);
+  mkl_free(Vvec);
 }
 
 /* samples discrete uniforms from [low, high) */
-void irk_discrete_uniform_vec(irk_state *state, npy_intp len, int *res, const int low, const int high)
-{
-    int err = 0;
+void irk_discrete_uniform_vec(irk_state *state, npy_intp len, int *res,
+                              const int low, const int high) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, MKL_INT_MAX, res, low, high);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, MKL_INT_MAX,
+                       res, low, high);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, res, low, high);
-    assert(err == VSL_STATUS_OK);
+  err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, res, low,
+                     high);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_discrete_uniform_long_vec(irk_state *state, npy_intp len, long *res, const long low, const long high)
-{
-    int err = 0;
-    unsigned long max;
-    npy_intp i = 0;
+void irk_discrete_uniform_long_vec(irk_state *state, npy_intp len, long *res,
+                                   const long low, const long high) {
+  int err = 0;
+  unsigned long max;
+  npy_intp i = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_discrete_uniform_long_vec(state, MKL_INT_MAX, res, low, high);
+  while (len > MKL_INT_MAX) {
+    irk_discrete_uniform_long_vec(state, MKL_INT_MAX, res, low, high);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    max = ((unsigned long)high) - ((unsigned long)low) - 1UL;
-    if (max == 0)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = low;
+  max = ((unsigned long)high) - ((unsigned long)low) - 1UL;
+  if (max == 0) {
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i)
+      res[i] = low;
 
-        return;
-    }
+    return;
+  }
 
-    if (max <= (unsigned long)INT_MAX)
-    {
-        int *buf = (int *)mkl_malloc(len * sizeof(int), 64);
-        assert(buf != nullptr);
+  if (max <= (unsigned long)INT_MAX) {
+    int *buf = (int *)mkl_malloc(len * sizeof(int), 64);
+    assert(buf != nullptr);
 
-        err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, -1, (int)max);
-        assert(err == VSL_STATUS_OK);
+    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, -1,
+                       (int)max);
+    assert(err == VSL_STATUS_OK);
 
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = low + ((long)buf[i]) + 1L;
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i)
+      res[i] = low + ((long)buf[i]) + 1L;
 
-        mkl_free(buf);
-    }
-    else
-    {
-        unsigned long mask = max;
-        unsigned long *buf = nullptr;
-        int n_accepted;
-
-        /* Smallest bit mask >= max */
-        mask |= mask >> 1;
-        mask |= mask >> 2;
-        mask |= mask >> 4;
-        mask |= mask >> 8;
-        mask |= mask >> 16;
+    mkl_free(buf);
+  } else {
+    unsigned long mask = max;
+    unsigned long *buf = nullptr;
+    int n_accepted;
+
+    /* Smallest bit mask >= max */
+    mask |= mask >> 1;
+    mask |= mask >> 2;
+    mask |= mask >> 4;
+    mask |= mask >> 8;
+    mask |= mask >> 16;
 #if ULONG_MAX > 0xffffffffUL
-        mask |= mask >> 32;
+    mask |= mask >> 32;
 #endif
 
-        buf = (unsigned long *)mkl_malloc(len * sizeof(long), 64);
-        assert(buf != nullptr);
-        n_accepted = 0;
-
-        while (n_accepted < len)
-        {
-            int k, batchSize = len - n_accepted;
-
-            err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORM_STD, state->stream, batchSize, (unsigned MKL_INT64 *)buf);
-            assert(err == VSL_STATUS_OK);
-
-            for (k = 0; k < batchSize; ++k)
-            {
-                unsigned long value = buf[k] & mask;
-                if (value <= max)
-                {
-                    res[n_accepted++] = low + value;
-                }
-            }
-        }
+    buf = (unsigned long *)mkl_malloc(len * sizeof(long), 64);
+    assert(buf != nullptr);
+    n_accepted = 0;
 
-        mkl_free(buf);
+    while (n_accepted < len) {
+      int k, batchSize = len - n_accepted;
+
+      err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORM_STD, state->stream,
+                               batchSize, (unsigned MKL_INT64 *)buf);
+      assert(err == VSL_STATUS_OK);
+
+      for (k = 0; k < batchSize; ++k) {
+        unsigned long value = buf[k] & mask;
+        if (value <= max) {
+          res[n_accepted++] = low + value;
+        }
+      }
     }
+
+    mkl_free(buf);
+  }
 }
 
-void irk_ulong_vec(irk_state *state, npy_intp len, unsigned long *res)
-{
-    int err = 0;
+void irk_ulong_vec(irk_state *state, npy_intp len, unsigned long *res) {
+  int err = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        irk_ulong_vec(state, MKL_INT_MAX, res);
+  while (len > MKL_INT_MAX) {
+    irk_ulong_vec(state, MKL_INT_MAX, res);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
 #if ULONG_MAX <= 0xffffffffUL
-    err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream, len, (unsigned int *)res);
+  err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream, len,
+                           (unsigned int *)res);
 #else
-    err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORMBITS64_STD, state->stream, len, (unsigned MKL_INT64 *)res);
+  err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORMBITS64_STD, state->stream, len,
+                           (unsigned MKL_INT64 *)res);
 #endif
 
-    assert(err == VSL_STATUS_OK);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_long_vec(irk_state *state, npy_intp len, long *res)
-{
-    npy_intp i = 0;
-    unsigned long *ulptr = (unsigned long *)res;
+void irk_long_vec(irk_state *state, npy_intp len, long *res) {
+  npy_intp i = 0;
+  unsigned long *ulptr = (unsigned long *)res;
 
-    irk_ulong_vec(state, len, ulptr);
+  irk_ulong_vec(state, len, ulptr);
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-        res[i] = (long)(ulptr[i] >> 1);
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = (long)(ulptr[i] >> 1);
 }
 
-void irk_rand_bool_vec(irk_state *state, npy_intp len, npy_bool *res, const npy_bool lo, const npy_bool hi)
-{
-    int err = 0;
-    npy_intp i = 0;
-    int *buf = nullptr;
-
-    if (len < 1)
-        return;
-
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_bool_vec(state, MKL_INT_MAX, res, lo, hi);
-
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
-
-    if (lo == hi)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = lo;
+void irk_rand_bool_vec(irk_state *state, npy_intp len, npy_bool *res,
+                       const npy_bool lo, const npy_bool hi) {
+  int err = 0;
+  npy_intp i = 0;
+  int *buf = nullptr;
 
-        return;
-    }
+  if (len < 1)
+    return;
 
-    assert((lo == 0) && (hi == 1));
-    buf = (int *)mkl_malloc(len * sizeof(int), 64);
-    assert(buf != nullptr);
+  if (len > MKL_INT_MAX) {
+    irk_rand_bool_vec(state, MKL_INT_MAX, res, lo, hi);
 
-    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, (int)lo, (int)hi + 1);
-    assert(err == VSL_STATUS_OK);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
+  if (lo == hi) {
     DIST_PRAGMA_VECTOR
     for (i = 0; i < len; ++i)
-        res[i] = (npy_bool)buf[i];
+      res[i] = lo;
 
-    mkl_free(buf);
-}
+    return;
+  }
 
-void irk_rand_uint8_vec(irk_state *state, npy_intp len, npy_uint8 *res, const npy_uint8 lo, const npy_uint8 hi)
-{
-    int err = 0;
-    npy_intp i = 0;
-    int *buf = nullptr;
+  assert((lo == 0) && (hi == 1));
+  buf = (int *)mkl_malloc(len * sizeof(int), 64);
+  assert(buf != nullptr);
 
-    if (len < 1)
-        return;
+  err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf,
+                     (int)lo, (int)hi + 1);
+  assert(err == VSL_STATUS_OK);
 
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_uint8_vec(state, MKL_INT_MAX, res, lo, hi);
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = (npy_bool)buf[i];
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  mkl_free(buf);
+}
 
-    if (lo == hi)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = lo;
+void irk_rand_uint8_vec(irk_state *state, npy_intp len, npy_uint8 *res,
+                        const npy_uint8 lo, const npy_uint8 hi) {
+  int err = 0;
+  npy_intp i = 0;
+  int *buf = nullptr;
 
-        return;
-    }
+  if (len < 1)
+    return;
 
-    assert(lo < hi);
-    buf = (int *)mkl_malloc(len * sizeof(int), 64);
-    assert(buf != nullptr);
+  if (len > MKL_INT_MAX) {
+    irk_rand_uint8_vec(state, MKL_INT_MAX, res, lo, hi);
 
-    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, (int)lo, (int)hi + 1);
-    assert(err == VSL_STATUS_OK);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
+  if (lo == hi) {
     DIST_PRAGMA_VECTOR
     for (i = 0; i < len; ++i)
-        res[i] = (npy_uint8)buf[i];
+      res[i] = lo;
 
-    mkl_free(buf);
-}
+    return;
+  }
 
-void irk_rand_int8_vec(irk_state *state, npy_intp len, npy_int8 *res, const npy_int8 lo, const npy_int8 hi)
-{
-    int err = 0;
-    npy_intp i = 0;
-    int *buf = nullptr;
+  assert(lo < hi);
+  buf = (int *)mkl_malloc(len * sizeof(int), 64);
+  assert(buf != nullptr);
 
-    if (len < 1)
-        return;
+  err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf,
+                     (int)lo, (int)hi + 1);
+  assert(err == VSL_STATUS_OK);
 
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_int8_vec(state, MKL_INT_MAX, res, lo, hi);
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = (npy_uint8)buf[i];
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  mkl_free(buf);
+}
 
-    if (lo == hi)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = lo;
+void irk_rand_int8_vec(irk_state *state, npy_intp len, npy_int8 *res,
+                       const npy_int8 lo, const npy_int8 hi) {
+  int err = 0;
+  npy_intp i = 0;
+  int *buf = nullptr;
 
-        return;
-    }
+  if (len < 1)
+    return;
 
-    assert(lo < hi);
-    buf = (int *)mkl_malloc(len * sizeof(int), 64);
-    assert(buf != nullptr);
+  if (len > MKL_INT_MAX) {
+    irk_rand_int8_vec(state, MKL_INT_MAX, res, lo, hi);
 
-    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, (int)lo, (int)hi + 1);
-    assert(err == VSL_STATUS_OK);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
+  if (lo == hi) {
     DIST_PRAGMA_VECTOR
     for (i = 0; i < len; ++i)
-        res[i] = (npy_int8)buf[i];
+      res[i] = lo;
 
-    mkl_free(buf);
-}
+    return;
+  }
 
-void irk_rand_uint16_vec(irk_state *state, npy_intp len, npy_uint16 *res, const npy_uint16 lo, const npy_uint16 hi)
-{
-    int err = 0;
-    npy_intp i = 0;
-    int *buf = nullptr;
+  assert(lo < hi);
+  buf = (int *)mkl_malloc(len * sizeof(int), 64);
+  assert(buf != nullptr);
 
-    if (len < 1)
-        return;
+  err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf,
+                     (int)lo, (int)hi + 1);
+  assert(err == VSL_STATUS_OK);
 
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_uint16_vec(state, MKL_INT_MAX, res, lo, hi);
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = (npy_int8)buf[i];
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  mkl_free(buf);
+}
 
-    if (lo == hi)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = lo;
+void irk_rand_uint16_vec(irk_state *state, npy_intp len, npy_uint16 *res,
+                         const npy_uint16 lo, const npy_uint16 hi) {
+  int err = 0;
+  npy_intp i = 0;
+  int *buf = nullptr;
 
-        return;
-    }
+  if (len < 1)
+    return;
 
-    assert(lo < hi);
-    buf = (int *)mkl_malloc(len * sizeof(int), 64);
-    assert(buf != nullptr);
+  if (len > MKL_INT_MAX) {
+    irk_rand_uint16_vec(state, MKL_INT_MAX, res, lo, hi);
 
-    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, (int)lo, (int)hi + 1);
-    assert(err == VSL_STATUS_OK);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
+  if (lo == hi) {
     DIST_PRAGMA_VECTOR
     for (i = 0; i < len; ++i)
-        res[i] = (npy_uint16)buf[i];
+      res[i] = lo;
 
-    mkl_free(buf);
-}
+    return;
+  }
 
-void irk_rand_int16_vec(irk_state *state, npy_intp len, npy_int16 *res, const npy_int16 lo, const npy_int16 hi)
-{
-    int err = 0;
-    npy_intp i = 0;
-    int *buf = nullptr;
+  assert(lo < hi);
+  buf = (int *)mkl_malloc(len * sizeof(int), 64);
+  assert(buf != nullptr);
 
-    if (len < 1)
-        return;
+  err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf,
+                     (int)lo, (int)hi + 1);
+  assert(err == VSL_STATUS_OK);
 
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_int16_vec(state, MKL_INT_MAX, res, lo, hi);
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = (npy_uint16)buf[i];
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+  mkl_free(buf);
+}
 
-    if (lo == hi)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = lo;
+void irk_rand_int16_vec(irk_state *state, npy_intp len, npy_int16 *res,
+                        const npy_int16 lo, const npy_int16 hi) {
+  int err = 0;
+  npy_intp i = 0;
+  int *buf = nullptr;
 
-        return;
-    }
+  if (len < 1)
+    return;
 
-    assert(lo < hi);
-    buf = (int *)mkl_malloc(len * sizeof(int), 64);
-    assert(buf != nullptr);
+  if (len > MKL_INT_MAX) {
+    irk_rand_int16_vec(state, MKL_INT_MAX, res, lo, hi);
 
-    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, (int)lo, (int)hi + 1);
-    assert(err == VSL_STATUS_OK);
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
+  if (lo == hi) {
     DIST_PRAGMA_VECTOR
     for (i = 0; i < len; ++i)
-        res[i] = (npy_int16)buf[i];
+      res[i] = lo;
 
-    mkl_free(buf);
+    return;
+  }
+
+  assert(lo < hi);
+  buf = (int *)mkl_malloc(len * sizeof(int), 64);
+  assert(buf != nullptr);
+
+  err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf,
+                     (int)lo, (int)hi + 1);
+  assert(err == VSL_STATUS_OK);
+
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = (npy_int16)buf[i];
+
+  mkl_free(buf);
 }
 
-void irk_rand_uint32_vec(irk_state *state, npy_intp len, npy_uint32 *res, const npy_uint32 lo, const npy_uint32 hi)
-{
-    int err = 0;
-    unsigned int intm = INT_MAX;
+void irk_rand_uint32_vec(irk_state *state, npy_intp len, npy_uint32 *res,
+                         const npy_uint32 lo, const npy_uint32 hi) {
+  int err = 0;
+  unsigned int intm = INT_MAX;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_uint32_vec(state, MKL_INT_MAX, res, lo, hi);
+  if (len > MKL_INT_MAX) {
+    irk_rand_uint32_vec(state, MKL_INT_MAX, res, lo, hi);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    /* optimization for lo = 0 and hi = 2**32-1 */
-    if (!(lo || ~hi))
-    {
-        err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream, len, (unsigned int *)res);
-        assert(err == VSL_STATUS_OK);
+  /* optimization for lo = 0 and hi = 2**32-1 */
+  if (!(lo || ~hi)) {
+    err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream,
+                             len, (unsigned int *)res);
+    assert(err == VSL_STATUS_OK);
 
-        return;
-    }
+    return;
+  }
 
-    if (hi >= intm)
-    {
+  if (hi >= intm) {
 
-        npy_int32 shft = ((npy_uint32)intm) + ((npy_uint32)1);
-        int i;
+    npy_int32 shift = ((npy_uint32)intm) + ((npy_uint32)1);
+    int i;
 
-        /* if lo is non-zero, shift one more to accommodate possibility of hi being ULONG_MAX */
-        if (lo)
-            shft++;
+    /* if lo is non-zero, shift one more to accommodate possibility of hi being
+     * ULONG_MAX */
+    if (lo)
+      shift++;
 
-        err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, (int *)res, (int)(lo - shft), (int)(hi - shft + 1U));
-        assert(err == VSL_STATUS_OK);
+    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len,
+                       (int *)res, (int)(lo - shift), (int)(hi - shift + 1U));
+    assert(err == VSL_STATUS_OK);
 
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] += shft;
-    }
-    else
-    {
-        err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, (int *)res, (int)lo, (int)hi + 1);
-        assert(err == VSL_STATUS_OK);
-    }
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i)
+      res[i] += shift;
+  } else {
+    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len,
+                       (int *)res, (int)lo, (int)hi + 1);
+    assert(err == VSL_STATUS_OK);
+  }
 }
 
-void irk_rand_int32_vec(irk_state *state, npy_intp len, npy_int32 *res, const npy_int32 lo, const npy_int32 hi)
-{
-    int err = 0;
-    int intm = INT_MAX;
+void irk_rand_int32_vec(irk_state *state, npy_intp len, npy_int32 *res,
+                        const npy_int32 lo, const npy_int32 hi) {
+  int err = 0;
+  int intm = INT_MAX;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_int32_vec(state, MKL_INT_MAX, res, lo, hi);
+  if (len > MKL_INT_MAX) {
+    irk_rand_int32_vec(state, MKL_INT_MAX, res, lo, hi);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    if (hi >= intm)
-    {
-        int i;
+  if (hi >= intm) {
+    int i;
 
-        irk_rand_uint32_vec(state, len, (npy_uint32 *)res, 0U, (npy_uint32)(hi - lo));
+    irk_rand_uint32_vec(state, len, (npy_uint32 *)res, 0U,
+                        (npy_uint32)(hi - lo));
 
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] += lo;
-    }
-    else
-    {
-        err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, (int *)res, (int)lo, (int)hi + 1);
-        assert(err == VSL_STATUS_OK);
-    }
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i)
+      res[i] += lo;
+  } else {
+    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len,
+                       (int *)res, (int)lo, (int)hi + 1);
+    assert(err == VSL_STATUS_OK);
+  }
 }
 
-void irk_rand_uint64_vec(irk_state *state, npy_intp len, npy_uint64 *res, const npy_uint64 lo, const npy_uint64 hi)
-{
-    npy_uint64 rng;
-    int err = 0;
-    npy_intp i = 0;
+void irk_rand_uint64_vec(irk_state *state, npy_intp len, npy_uint64 *res,
+                         const npy_uint64 lo, const npy_uint64 hi) {
+  npy_uint64 rng;
+  int err = 0;
+  npy_intp i = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    if (len > MKL_INT_MAX)
-    {
-        irk_rand_uint64_vec(state, MKL_INT_MAX, res, lo, hi);
+  if (len > MKL_INT_MAX) {
+    irk_rand_uint64_vec(state, MKL_INT_MAX, res, lo, hi);
 
-        res += MKL_INT_MAX;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX;
+    len -= MKL_INT_MAX;
+  }
 
-    /* optimization for lo = 0 and hi = 2**64-1 */
-    if (!(lo || ~hi))
-    {
-        err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORMBITS64_STD, state->stream, len, (unsigned MKL_INT64 *)res);
-        assert(err == VSL_STATUS_OK);
+  /* optimization for lo = 0 and hi = 2**64-1 */
+  if (!(lo || ~hi)) {
+    err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORMBITS64_STD, state->stream,
+                             len, (unsigned MKL_INT64 *)res);
+    assert(err == VSL_STATUS_OK);
 
-        return;
-    }
+    return;
+  }
 
-    rng = hi - lo;
-    if (!rng)
-    {
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = lo;
+  rng = hi - lo;
+  if (!rng) {
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i)
+      res[i] = lo;
 
-        return;
-    }
+    return;
+  }
 
-    rng++;
+  rng++;
 
-    if (rng <= (npy_uint64)INT_MAX)
-    {
-        int *buf = (int *)mkl_malloc(len * sizeof(int), 64);
-        assert(buf != nullptr);
+  if (rng <= (npy_uint64)INT_MAX) {
+    int *buf = (int *)mkl_malloc(len * sizeof(int), 64);
+    assert(buf != nullptr);
 
-        err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, 0, (int)rng);
-        assert(err == VSL_STATUS_OK);
+    err = viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, state->stream, len, buf, 0,
+                       (int)rng);
+    assert(err == VSL_STATUS_OK);
 
-        DIST_PRAGMA_VECTOR
-        for (i = 0; i < len; ++i)
-            res[i] = lo + ((npy_uint64)buf[i]);
+    DIST_PRAGMA_VECTOR
+    for (i = 0; i < len; ++i)
+      res[i] = lo + ((npy_uint64)buf[i]);
 
-        mkl_free(buf);
-    }
-    else
-    {
-        npy_uint64 mask = rng;
-        npy_uint64 *buf = nullptr;
-        npy_intp n_accepted = 0;
-
-        mask |= mask >> 1;
-        mask |= mask >> 2;
-        mask |= mask >> 4;
-        mask |= mask >> 8;
-        mask |= mask >> 16;
-        mask |= mask >> 32;
-
-        buf = (npy_uint64 *)mkl_malloc(len * sizeof(npy_uint64), 64);
-        assert(buf != nullptr);
-
-        while (n_accepted < len)
-        {
-            npy_intp k = 0;
-            npy_intp batchSize = len - n_accepted;
-
-            err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORM_STD, state->stream, batchSize, (unsigned MKL_INT64 *)buf);
-            assert(err == VSL_STATUS_OK);
-
-            for (k = 0; k < batchSize; ++k)
-            {
-                npy_uint64 value = buf[k] & mask;
-                if (value <= rng)
-                {
-                    res[n_accepted++] = lo + value;
-                }
-            }
-        }
+    mkl_free(buf);
+  } else {
+    npy_uint64 mask = rng;
+    npy_uint64 *buf = nullptr;
+    npy_intp n_accepted = 0;
+
+    mask |= mask >> 1;
+    mask |= mask >> 2;
+    mask |= mask >> 4;
+    mask |= mask >> 8;
+    mask |= mask >> 16;
+    mask |= mask >> 32;
+
+    buf = (npy_uint64 *)mkl_malloc(len * sizeof(npy_uint64), 64);
+    assert(buf != nullptr);
+
+    while (n_accepted < len) {
+      npy_intp k = 0;
+      npy_intp batchSize = len - n_accepted;
+
+      err = viRngUniformBits64(VSL_RNG_METHOD_UNIFORM_STD, state->stream,
+                               batchSize, (unsigned MKL_INT64 *)buf);
+      assert(err == VSL_STATUS_OK);
 
-        mkl_free(buf);
+      for (k = 0; k < batchSize; ++k) {
+        npy_uint64 value = buf[k] & mask;
+        if (value <= rng) {
+          res[n_accepted++] = lo + value;
+        }
+      }
     }
+
+    mkl_free(buf);
+  }
 }
 
-void irk_rand_int64_vec(irk_state *state, npy_intp len, npy_int64 *res, const npy_int64 lo, const npy_int64 hi)
-{
-    npy_uint64 rng = 0;
-    npy_intp i = 0;
+void irk_rand_int64_vec(irk_state *state, npy_intp len, npy_int64 *res,
+                        const npy_int64 lo, const npy_int64 hi) {
+  npy_uint64 rng = 0;
+  npy_intp i = 0;
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    rng = ((npy_uint64)hi) - ((npy_uint64)lo);
+  rng = ((npy_uint64)hi) - ((npy_uint64)lo);
 
-    irk_rand_uint64_vec(state, len, (npy_uint64 *)res, 0, rng);
+  irk_rand_uint64_vec(state, len, (npy_uint64 *)res, 0, rng);
 
-    DIST_PRAGMA_VECTOR
-    for (i = 0; i < len; ++i)
-        res[i] = res[i] + lo;
+  DIST_PRAGMA_VECTOR
+  for (i = 0; i < len; ++i)
+    res[i] = res[i] + lo;
 }
 
-const MKL_INT cholesky_storage_flags[3] = {
-    VSL_MATRIX_STORAGE_FULL,
-    VSL_MATRIX_STORAGE_PACKED,
-    VSL_MATRIX_STORAGE_DIAGONAL};
+const MKL_INT cholesky_storage_flags[3] = {VSL_MATRIX_STORAGE_FULL,
+                                           VSL_MATRIX_STORAGE_PACKED,
+                                           VSL_MATRIX_STORAGE_DIAGONAL};
 
-void irk_multinormal_vec_ICDF(irk_state *state, npy_intp len, double *res, const int dim, double *mean_vec, double *ch,
-                              const ch_st_enum storage_flag)
-{
-    int err = 0;
-    const MKL_INT storage_mode = cholesky_storage_flags[storage_flag];
+void irk_multinormal_vec_ICDF(irk_state *state, npy_intp len, double *res,
+                              const int dim, double *mean_vec, double *ch,
+                              const ch_st_enum storage_flag) {
+  int err = 0;
+  const MKL_INT storage_mode = cholesky_storage_flags[storage_flag];
 
-    err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_ICDF, state->stream, len, res, dim, storage_mode, mean_vec, ch);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_ICDF, state->stream, len, res,
+                        dim, storage_mode, mean_vec, ch);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_multinormal_vec_BM1(irk_state *state, npy_intp len, double *res, const int dim, double *mean_vec, double *ch,
-                             const ch_st_enum storage_flag)
-{
-    int err = 0;
-    const MKL_INT storage_mode = cholesky_storage_flags[storage_flag];
+void irk_multinormal_vec_BM1(irk_state *state, npy_intp len, double *res,
+                             const int dim, double *mean_vec, double *ch,
+                             const ch_st_enum storage_flag) {
+  int err = 0;
+  const MKL_INT storage_mode = cholesky_storage_flags[storage_flag];
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, state->stream, MKL_INT_MAX, res, dim, storage_mode, mean_vec, ch);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, state->stream,
+                          MKL_INT_MAX, res, dim, storage_mode, mean_vec, ch);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX * dim;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX * dim;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, state->stream, len, res, dim, storage_mode, mean_vec, ch);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, state->stream, len,
+                        res, dim, storage_mode, mean_vec, ch);
+  assert(err == VSL_STATUS_OK);
 }
 
-void irk_multinormal_vec_BM2(irk_state *state, npy_intp len, double *res, const int dim, double *mean_vec, double *ch,
-                             const ch_st_enum storage_flag)
-{
-    int err = 0;
-    const MKL_INT storage_mode = cholesky_storage_flags[storage_flag];
+void irk_multinormal_vec_BM2(irk_state *state, npy_intp len, double *res,
+                             const int dim, double *mean_vec, double *ch,
+                             const ch_st_enum storage_flag) {
+  int err = 0;
+  const MKL_INT storage_mode = cholesky_storage_flags[storage_flag];
 
-    if (len < 1)
-        return;
+  if (len < 1)
+    return;
 
-    while (len > MKL_INT_MAX)
-    {
-        err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER2, state->stream, MKL_INT_MAX, res, dim, storage_mode, mean_vec, ch);
-        assert(err == VSL_STATUS_OK);
+  while (len > MKL_INT_MAX) {
+    err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER2, state->stream,
+                          MKL_INT_MAX, res, dim, storage_mode, mean_vec, ch);
+    assert(err == VSL_STATUS_OK);
 
-        res += MKL_INT_MAX * dim;
-        len -= MKL_INT_MAX;
-    }
+    res += MKL_INT_MAX * dim;
+    len -= MKL_INT_MAX;
+  }
 
-    err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER2, state->stream, len, res, dim, storage_mode, mean_vec, ch);
-    assert(err == VSL_STATUS_OK);
+  err = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER2, state->stream,
+                        len, res, dim, storage_mode, mean_vec, ch);
+  assert(err == VSL_STATUS_OK);
 }
 
-/* This code is taken from distribution.c, and is currently unused. It is retained here for
-   possible future optimization of sampling from multinomial */
+/* This code is taken from distribution.c, and is currently unused. It is
+   retained here for possible future optimization of sampling from multinomial
+ */
 
-static double irk_double(irk_state *state)
-{
-    double res;
+static double irk_double(irk_state *state) {
+  double res;
 
-    irk_double_vec(state, 1, &res);
+  irk_double_vec(state, 1, &res);
 
-    return res;
+  return res;
 }
diff --git a/mkl_random/src/mkl_distributions.h b/mkl_random/src/mkl_distributions.h
index 802d994..94dc462 100644
--- a/mkl_random/src/mkl_distributions.h
+++ b/mkl_random/src/mkl_distributions.h
@@ -25,8 +25,10 @@
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+// clang-format off
 #include <stddef.h>
 #include "randomkit.h"
+// clang-format on
 
 #ifndef _MKL_DISTRIBUTIONS_H_
 #define _MKL_DISTRIBUTIONS_H_
@@ -35,103 +37,155 @@
 extern "C" {
 #endif
 
-typedef enum {
-    MATRIX = 0,
-    PACKED = 1,
-    DIAGONAL = 2
-} ch_st_enum;
+typedef enum { MATRIX = 0, PACKED = 1, DIAGONAL = 2 } ch_st_enum;
 
 extern void irk_double_vec(irk_state *state, npy_intp len, double *res);
-extern void irk_uniform_vec(irk_state *state, npy_intp len, double *res, const double low, const double high);
-extern void irk_standard_normal_vec_ICDF(irk_state *state, npy_intp len, double *res);
-extern void irk_standard_normal_vec_BM1(irk_state *state, npy_intp len, double *res);
-extern void irk_standard_normal_vec_BM2(irk_state *state, npy_intp len, double *res);
-
-extern void irk_normal_vec_ICDF(irk_state *state, npy_intp len, double *res, const double loc, const double scale);
-extern void irk_normal_vec_BM1(irk_state *state, npy_intp len, double *res, const double loc, const double scale);
-extern void irk_normal_vec_BM2(irk_state *state, npy_intp len, double *res, const double loc, const double scale);
-
-extern void irk_standard_t_vec(irk_state *state, npy_intp len, double *res, const double df);
-extern void irk_chisquare_vec(irk_state *state, npy_intp len, double *res, const double df);
-
-extern void irk_standard_exponential_vec(irk_state *state, npy_intp len, double *res);
-extern void irk_standard_cauchy_vec(irk_state *state, npy_intp len, double *res);
-
-extern void irk_standard_gamma_vec(irk_state *state, npy_intp len, double *res, const double shape);
-extern void irk_exponential_vec(irk_state *state, npy_intp len, double *res, const double scale);
-extern void irk_gamma_vec(irk_state *state, npy_intp len, double *res, const double shape, const double scale);
-
-extern void irk_pareto_vec(irk_state *state, npy_intp len, double *res, const double alph);
-extern void irk_power_vec(irk_state *state, npy_intp len, double *res, const double alph);
-
-extern void irk_weibull_vec(irk_state *state, npy_intp len, double *res, const double alph);
-
-extern void irk_rayleigh_vec(irk_state *state, npy_intp len, double *res, const double scale);
-
-extern void irk_beta_vec(irk_state *state, npy_intp len, double *res, const double p, const double q);
-extern void irk_f_vec(irk_state *state, npy_intp len, double *res, const double df_num, const double df_den);
-
-extern void irk_noncentral_chisquare_vec(irk_state *state, npy_intp len, double *res, const double df, const double nonc);
-
-extern void irk_laplace_vec(irk_state *vec, npy_intp len, double *res, const double loc, const double scale);
-extern void irk_gumbel_vec(irk_state *vec, npy_intp len, double *res, const double loc, const double scale);
-extern void irk_logistic_vec(irk_state *vec, npy_intp len, double *res, const double loc, const double scale);
-
-extern void irk_lognormal_vec_ICDF(irk_state *state, npy_intp len, double *res, const double mean, const double sigma);
-extern void irk_lognormal_vec_BM(irk_state *state, npy_intp len, double *res, const double mean, const double sigma);
-
-extern void irk_wald_vec(irk_state *state, npy_intp len, double *res, const double mean, const double scale);
-
-extern void irk_vonmises_vec(irk_state *state, npy_intp len, double *res, const double mu, const double kappa);
-
-extern void irk_noncentral_f_vec(irk_state *state, npy_intp len, double *res, double df_num, double df_den, double nonc);
-extern void irk_triangular_vec(irk_state *state, npy_intp len, double *res, double left, double mode, double right);
-
-extern void irk_binomial_vec(irk_state *state, npy_intp len, int *res, const int n, const double p);
-
-extern void irk_multinomial_vec(irk_state *state, npy_intp len, int *res, const int n, const int k, const double* pvec);
-
-extern void irk_geometric_vec(irk_state *state, npy_intp len, int *res, const double p);
-extern void irk_negbinomial_vec(irk_state *state, npy_intp len, int *res, const double a, const double p);
-extern void irk_hypergeometric_vec(irk_state *state, npy_intp len, int *res, const int ls, const int ss, const int ms);
-extern void irk_poisson_vec_PTPE(irk_state *state, npy_intp len, int *res, const double lambda);
-extern void irk_poisson_vec_POISNORM(irk_state *state, npy_intp len, int *res, const double lambda);
-
-extern void irk_poisson_vec_V(irk_state *state, npy_intp len, int *res, double *lambdas);
-
-extern void irk_zipf_long_vec(irk_state *state, npy_intp len, long *res, const double alp);
-
-extern void irk_logseries_vec(irk_state *state, npy_intp len, int *res, const double alp);
-
-extern void irk_discrete_uniform_vec(irk_state *state, npy_intp len, int *res, const int low, const int high);
-
-extern void irk_discrete_uniform_long_vec(irk_state *state, npy_intp len, long *res, const long low, const long high);
-
-extern void irk_rand_int64_vec(irk_state *state, npy_intp len, npy_int64 *res, const npy_int64 lo, const npy_int64 hi);
-extern void irk_rand_uint64_vec(irk_state *state, npy_intp len, npy_uint64 *res, const npy_uint64 lo, const npy_uint64 hi);
-extern void irk_rand_int32_vec(irk_state *state, npy_intp len, npy_int32 *res, const npy_int32 lo, const npy_int32 hi);
-extern void irk_rand_uint32_vec(irk_state *state, npy_intp len, npy_uint32 *res, const npy_uint32 lo, const npy_uint32 hi);
-extern void irk_rand_int16_vec(irk_state *state, npy_intp len, npy_int16 *res, const npy_int16 lo, const npy_int16 hi);
-extern void irk_rand_uint16_vec(irk_state *state, npy_intp len, npy_uint16 *res, const npy_uint16 lo, const npy_uint16 hi);
-extern void irk_rand_int8_vec(irk_state *state, npy_intp len, npy_int8 *res, const npy_int8 lo, const npy_int8 hi);
-extern void irk_rand_uint8_vec(irk_state *state, npy_intp len, npy_uint8 *res, const npy_uint8 lo, const npy_uint8 hi);
-extern void irk_rand_bool_vec(irk_state *state, npy_intp len, npy_bool *res, const npy_bool lo, const npy_bool hi);
+extern void irk_uniform_vec(irk_state *state, npy_intp len, double *res,
+                            const double low, const double high);
+extern void irk_standard_normal_vec_ICDF(irk_state *state, npy_intp len,
+                                         double *res);
+extern void irk_standard_normal_vec_BM1(irk_state *state, npy_intp len,
+                                        double *res);
+extern void irk_standard_normal_vec_BM2(irk_state *state, npy_intp len,
+                                        double *res);
+
+extern void irk_normal_vec_ICDF(irk_state *state, npy_intp len, double *res,
+                                const double loc, const double scale);
+extern void irk_normal_vec_BM1(irk_state *state, npy_intp len, double *res,
+                               const double loc, const double scale);
+extern void irk_normal_vec_BM2(irk_state *state, npy_intp len, double *res,
+                               const double loc, const double scale);
+
+extern void irk_standard_t_vec(irk_state *state, npy_intp len, double *res,
+                               const double df);
+extern void irk_chisquare_vec(irk_state *state, npy_intp len, double *res,
+                              const double df);
+
+extern void irk_standard_exponential_vec(irk_state *state, npy_intp len,
+                                         double *res);
+extern void irk_standard_cauchy_vec(irk_state *state, npy_intp len,
+                                    double *res);
+
+extern void irk_standard_gamma_vec(irk_state *state, npy_intp len, double *res,
+                                   const double shape);
+extern void irk_exponential_vec(irk_state *state, npy_intp len, double *res,
+                                const double scale);
+extern void irk_gamma_vec(irk_state *state, npy_intp len, double *res,
+                          const double shape, const double scale);
+
+extern void irk_pareto_vec(irk_state *state, npy_intp len, double *res,
+                           const double alp);
+extern void irk_power_vec(irk_state *state, npy_intp len, double *res,
+                          const double alp);
+
+extern void irk_weibull_vec(irk_state *state, npy_intp len, double *res,
+                            const double alp);
+
+extern void irk_rayleigh_vec(irk_state *state, npy_intp len, double *res,
+                             const double scale);
+
+extern void irk_beta_vec(irk_state *state, npy_intp len, double *res,
+                         const double p, const double q);
+extern void irk_f_vec(irk_state *state, npy_intp len, double *res,
+                      const double df_num, const double df_den);
+
+extern void irk_noncentral_chisquare_vec(irk_state *state, npy_intp len,
+                                         double *res, const double df,
+                                         const double nonc);
+
+extern void irk_laplace_vec(irk_state *vec, npy_intp len, double *res,
+                            const double loc, const double scale);
+extern void irk_gumbel_vec(irk_state *vec, npy_intp len, double *res,
+                           const double loc, const double scale);
+extern void irk_logistic_vec(irk_state *vec, npy_intp len, double *res,
+                             const double loc, const double scale);
+
+extern void irk_lognormal_vec_ICDF(irk_state *state, npy_intp len, double *res,
+                                   const double mean, const double sigma);
+extern void irk_lognormal_vec_BM(irk_state *state, npy_intp len, double *res,
+                                 const double mean, const double sigma);
+
+extern void irk_wald_vec(irk_state *state, npy_intp len, double *res,
+                         const double mean, const double scale);
+
+extern void irk_vonmises_vec(irk_state *state, npy_intp len, double *res,
+                             const double mu, const double kappa);
+
+extern void irk_noncentral_f_vec(irk_state *state, npy_intp len, double *res,
+                                 double df_num, double df_den, double nonc);
+extern void irk_triangular_vec(irk_state *state, npy_intp len, double *res,
+                               double left, double mode, double right);
+
+extern void irk_binomial_vec(irk_state *state, npy_intp len, int *res,
+                             const int n, const double p);
+
+extern void irk_multinomial_vec(irk_state *state, npy_intp len, int *res,
+                                const int n, const int k, const double *pvec);
+
+extern void irk_geometric_vec(irk_state *state, npy_intp len, int *res,
+                              const double p);
+extern void irk_negbinomial_vec(irk_state *state, npy_intp len, int *res,
+                                const double a, const double p);
+extern void irk_hypergeometric_vec(irk_state *state, npy_intp len, int *res,
+                                   const int ls, const int ss, const int ms);
+extern void irk_poisson_vec_PTPE(irk_state *state, npy_intp len, int *res,
+                                 const double lambda);
+extern void irk_poisson_vec_POISNORM(irk_state *state, npy_intp len, int *res,
+                                     const double lambda);
+
+extern void irk_poisson_vec_V(irk_state *state, npy_intp len, int *res,
+                              double *lambdas);
+
+extern void irk_zipf_long_vec(irk_state *state, npy_intp len, long *res,
+                              const double alp);
+
+extern void irk_logseries_vec(irk_state *state, npy_intp len, int *res,
+                              const double theta);
+
+extern void irk_discrete_uniform_vec(irk_state *state, npy_intp len, int *res,
+                                     const int low, const int high);
+
+extern void irk_discrete_uniform_long_vec(irk_state *state, npy_intp len,
+                                          long *res, const long low,
+                                          const long high);
+
+extern void irk_rand_int64_vec(irk_state *state, npy_intp len, npy_int64 *res,
+                               const npy_int64 lo, const npy_int64 hi);
+extern void irk_rand_uint64_vec(irk_state *state, npy_intp len, npy_uint64 *res,
+                                const npy_uint64 lo, const npy_uint64 hi);
+extern void irk_rand_int32_vec(irk_state *state, npy_intp len, npy_int32 *res,
+                               const npy_int32 lo, const npy_int32 hi);
+extern void irk_rand_uint32_vec(irk_state *state, npy_intp len, npy_uint32 *res,
+                                const npy_uint32 lo, const npy_uint32 hi);
+extern void irk_rand_int16_vec(irk_state *state, npy_intp len, npy_int16 *res,
+                               const npy_int16 lo, const npy_int16 hi);
+extern void irk_rand_uint16_vec(irk_state *state, npy_intp len, npy_uint16 *res,
+                                const npy_uint16 lo, const npy_uint16 hi);
+extern void irk_rand_int8_vec(irk_state *state, npy_intp len, npy_int8 *res,
+                              const npy_int8 lo, const npy_int8 hi);
+extern void irk_rand_uint8_vec(irk_state *state, npy_intp len, npy_uint8 *res,
+                               const npy_uint8 lo, const npy_uint8 hi);
+extern void irk_rand_bool_vec(irk_state *state, npy_intp len, npy_bool *res,
+                              const npy_bool lo, const npy_bool hi);
 
 extern void irk_ulong_vec(irk_state *state, npy_intp len, unsigned long *res);
 extern void irk_long_vec(irk_state *state, npy_intp len, long *res);
 
-extern void irk_multinormal_vec_ICDF(irk_state *state, npy_intp len, double *res, const int dim,
-    double *mean_vec, double *ch, const ch_st_enum storage_mode);
+extern void irk_multinormal_vec_ICDF(irk_state *state, npy_intp len,
+                                     double *res, const int dim,
+                                     double *mean_vec, double *ch,
+                                     const ch_st_enum storage_mode);
 
-extern void irk_multinormal_vec_BM1(irk_state *state, npy_intp len, double *res, const int dim,
-    double *mean_vec, double *ch, const ch_st_enum storage_mode);
+extern void irk_multinormal_vec_BM1(irk_state *state, npy_intp len, double *res,
+                                    const int dim, double *mean_vec, double *ch,
+                                    const ch_st_enum storage_mode);
 
-extern void irk_multinormal_vec_BM2(irk_state *state, npy_intp len, double *res, const int dim,
-    double *mean_vec, double *ch, const ch_st_enum storage_mode);
+extern void irk_multinormal_vec_BM2(irk_state *state, npy_intp len, double *res,
+                                    const int dim, double *mean_vec, double *ch,
+                                    const ch_st_enum storage_mode);
 
 #ifdef __cplusplus
 }
 #endif
 
-
 #endif
diff --git a/mkl_random/src/mklrand_py_helper.h b/mkl_random/src/mklrand_py_helper.h
index 28d713d..c66b2d0 100644
--- a/mkl_random/src/mklrand_py_helper.h
+++ b/mkl_random/src/mklrand_py_helper.h
@@ -1,40 +1,39 @@
+// clang-format off
 #ifndef _MKLRAND_PY_HELPER_H_
 #define _MKLRAND_PY_HELPER_H_
 
 #include <Python.h>
+// clang-format on
 
-static PyObject *empty_py_bytes(npy_intp length, void **bytesVec)
-{
-    PyObject *b;
+static PyObject *empty_py_bytes(npy_intp length, void **bytesVec) {
+  PyObject *b;
 #if PY_MAJOR_VERSION >= 3
-    b = PyBytes_FromStringAndSize(NULL, length);
-    if (b) {
-        *bytesVec = PyBytes_AS_STRING(b);
-    }
+  b = PyBytes_FromStringAndSize(NULL, length);
+  if (b) {
+    *bytesVec = PyBytes_AS_STRING(b);
+  }
 #else
-    b = PyString_FromStringAndSize(NULL, length);
-    if (b) {
-        *bytesVec = PyString_AS_STRING(b);
-    }
+  b = PyString_FromStringAndSize(NULL, length);
+  if (b) {
+    *bytesVec = PyString_AS_STRING(b);
+  }
 #endif
-    return b;
+  return b;
 }
 
-static char *py_bytes_DataPtr(PyObject *b)
-{
+static char *py_bytes_DataPtr(PyObject *b) {
 #if PY_MAJOR_VERSION >= 3
-    return PyBytes_AS_STRING(b);
+  return PyBytes_AS_STRING(b);
 #else
-    return PyString_AS_STRING(b);
+  return PyString_AS_STRING(b);
 #endif
 }
 
-static int is_bytes_object(PyObject *b)
-{
+static int is_bytes_object(PyObject *b) {
 #if PY_MAJOR_VERSION >= 3
-    return PyBytes_Check(b);
+  return PyBytes_Check(b);
 #else
-    return PyString_Check(b);
+  return PyString_Check(b);
 #endif
 }
 
diff --git a/mkl_random/src/numpy_multiiter_workaround.h b/mkl_random/src/numpy_multiiter_workaround.h
index f9f801b..05ccb4c 100644
--- a/mkl_random/src/numpy_multiiter_workaround.h
+++ b/mkl_random/src/numpy_multiiter_workaround.h
@@ -25,17 +25,19 @@
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+// clang-format off
 #include "Python.h"
 #include "numpy/arrayobject.h"
+// clang-format on
 
 /* This header file is a work-around for issue
- *   https://github.com/numpy/numpy/issues/26990 
+ *   https://github.com/numpy/numpy/issues/26990
  *
  * It is included once in mklrandom.pyx
- * 
- * The work-around is needed to support building with 
+ *
+ * The work-around is needed to support building with
  * NumPy < 2.0.0
- * 
+ *
  * Once building transitions to using NumPy 2.0 only
  * this file can be removed and corresponding changes
  * in mklrand.pyx can be applied to always use
@@ -44,41 +46,40 @@
  */
 
 #if (defined(NPY_2_0_API_VERSION) && (NPY_API_VERSION >= NPY_2_0_API_VERSION))
-    #define WORKAROUND_NEEDED 
+#define WORKAROUND_NEEDED
 #endif
 
 #if !defined(WORKAROUND_NEEDED)
 typedef struct {
-    PyObject_HEAD
-    int numiter;
-    npy_intp size;
-    npy_intp index;
-    int nd;
-    npy_intp dimensions[32];
-    void **iters;
+  PyObject_HEAD int numiter;
+  npy_intp size;
+  npy_intp index;
+  int nd;
+  npy_intp dimensions[32];
+  void **iters;
 } multi_iter_proxy_st;
 #endif
 
 npy_intp workaround_PyArray_MultiIter_SIZE(PyArrayMultiIterObject *multi) {
 #if defined(WORKAROUND_NEEDED)
-    return PyArray_MultiIter_SIZE(multi);
+  return PyArray_MultiIter_SIZE(multi);
 #else
-    return ((multi_iter_proxy_st *)(multi))->size;
+  return ((multi_iter_proxy_st *)(multi))->size;
 #endif
 }
 
 int workaround_PyArray_MultiIter_NDIM(PyArrayMultiIterObject *multi) {
 #if defined(WORKAROUND_NEEDED)
-    return PyArray_MultiIter_NDIM(multi);
+  return PyArray_MultiIter_NDIM(multi);
 #else
-    return ((multi_iter_proxy_st *)(multi))->nd;
+  return ((multi_iter_proxy_st *)(multi))->nd;
 #endif
 }
 
-npy_intp* workaround_PyArray_MultiIter_DIMS(PyArrayMultiIterObject *multi) {
+npy_intp *workaround_PyArray_MultiIter_DIMS(PyArrayMultiIterObject *multi) {
 #if defined(WORKAROUND_NEEDED)
-    return PyArray_MultiIter_DIMS(multi);
+  return PyArray_MultiIter_DIMS(multi);
 #else
-    return (((multi_iter_proxy_st *)(multi))->dimensions);
+  return (((multi_iter_proxy_st *)(multi))->dimensions);
 #endif
 }
diff --git a/mkl_random/src/randomkit.cpp b/mkl_random/src/randomkit.cpp
index a7f8bb4..cb4eb71 100644
--- a/mkl_random/src/randomkit.cpp
+++ b/mkl_random/src/randomkit.cpp
@@ -28,6 +28,8 @@
  Adopted from NumPy's Random kit 1.3,
  Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org)
  */
+
+// clang-format off
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -89,352 +91,311 @@
 #ifndef RK_DEV_RANDOM
 #define RK_DEV_RANDOM "/dev/random"
 #endif
+// clang-format on
 
-const char *irk_strerror[RK_ERR_MAX] =
-    {
-        "no error",
-        "random device unavailable"};
+const char *irk_strerror[RK_ERR_MAX] = {"no error",
+                                        "random device unavailable"};
 
 /* static functions */
 static unsigned long irk_hash(unsigned long key);
 
-void irk_dealloc_stream(irk_state *state)
-{
-    VSLStreamStatePtr stream = state->stream;
+void irk_dealloc_stream(irk_state *state) {
+  VSLStreamStatePtr stream = state->stream;
 
-    if (stream)
-    {
-        vslDeleteStream(&stream);
-    }
+  if (stream) {
+    vslDeleteStream(&stream);
+  }
 }
 
 const MKL_INT brng_list[BRNG_KINDS] = {
-    VSL_BRNG_MT19937,
-    VSL_BRNG_SFMT19937,
-    VSL_BRNG_WH,
-    VSL_BRNG_MT2203,
-    VSL_BRNG_MCG31,
-    VSL_BRNG_R250,
-    VSL_BRNG_MRG32K3A,
-    VSL_BRNG_MCG59,
-    VSL_BRNG_PHILOX4X32X10,
-    VSL_BRNG_NONDETERM,
-    VSL_BRNG_ARS5};
+    VSL_BRNG_MT19937,   VSL_BRNG_SFMT19937, VSL_BRNG_WH,
+    VSL_BRNG_MT2203,    VSL_BRNG_MCG31,     VSL_BRNG_R250,
+    VSL_BRNG_MRG32K3A,  VSL_BRNG_MCG59,     VSL_BRNG_PHILOX4X32X10,
+    VSL_BRNG_NONDETERM, VSL_BRNG_ARS5};
 
 /* Mersenne-Twister 2203 algorithm and Wichmann-Hill algorithm
  * each have a parameter which produces a family of BRNG algorithms,
- * MKL identifies individual members of these families by VSL_BRNG_ALGO + family_id
+ * MKL identifies individual members of these families by VSL_BRNG_ALGO +
+ * family_id
  */
 #define SIZE_OF_MT2203_FAMILY 6024
 #define SIZE_OF_WH_FAMILY 273
 
-int irk_get_brng_mkl(irk_state *state)
-{
-    int i, mkl_brng_id = vslGetStreamStateBrng(state->stream);
+int irk_get_brng_mkl(irk_state *state) {
+  int i, mkl_brng_id = vslGetStreamStateBrng(state->stream);
 
-    if ((VSL_BRNG_MT2203 <= mkl_brng_id) && (mkl_brng_id < VSL_BRNG_MT2203 + SIZE_OF_MT2203_FAMILY))
-        mkl_brng_id = VSL_BRNG_MT2203;
-    else if ((VSL_BRNG_WH <= mkl_brng_id) && (mkl_brng_id < VSL_BRNG_WH + SIZE_OF_WH_FAMILY))
-        mkl_brng_id = VSL_BRNG_WH;
+  if ((VSL_BRNG_MT2203 <= mkl_brng_id) &&
+      (mkl_brng_id < VSL_BRNG_MT2203 + SIZE_OF_MT2203_FAMILY))
+    mkl_brng_id = VSL_BRNG_MT2203;
+  else if ((VSL_BRNG_WH <= mkl_brng_id) &&
+           (mkl_brng_id < VSL_BRNG_WH + SIZE_OF_WH_FAMILY))
+    mkl_brng_id = VSL_BRNG_WH;
 
-    for (i = 0; i < BRNG_KINDS; i++)
-        if (mkl_brng_id == brng_list[i])
-            return i;
+  for (i = 0; i < BRNG_KINDS; i++)
+    if (mkl_brng_id == brng_list[i])
+      return i;
 
-    return -1;
+  return -1;
 }
 
-int irk_get_brng_and_stream_mkl(irk_state *state, unsigned int *stream_id)
-{
-    int i, mkl_brng_id = vslGetStreamStateBrng(state->stream);
-
-    if ((VSL_BRNG_MT2203 <= mkl_brng_id) && (mkl_brng_id < VSL_BRNG_MT2203 + SIZE_OF_MT2203_FAMILY))
-    {
-        *stream_id = (unsigned int)(mkl_brng_id - VSL_BRNG_MT2203);
-        mkl_brng_id = VSL_BRNG_MT2203;
-    }
-    else if ((VSL_BRNG_WH <= mkl_brng_id) && (mkl_brng_id < VSL_BRNG_WH + SIZE_OF_WH_FAMILY))
-    {
-        *stream_id = (unsigned int)(mkl_brng_id - VSL_BRNG_WH);
-        mkl_brng_id = VSL_BRNG_WH;
+int irk_get_brng_and_stream_mkl(irk_state *state, unsigned int *stream_id) {
+  int i, mkl_brng_id = vslGetStreamStateBrng(state->stream);
+
+  if ((VSL_BRNG_MT2203 <= mkl_brng_id) &&
+      (mkl_brng_id < VSL_BRNG_MT2203 + SIZE_OF_MT2203_FAMILY)) {
+    *stream_id = (unsigned int)(mkl_brng_id - VSL_BRNG_MT2203);
+    mkl_brng_id = VSL_BRNG_MT2203;
+  } else if ((VSL_BRNG_WH <= mkl_brng_id) &&
+             (mkl_brng_id < VSL_BRNG_WH + SIZE_OF_WH_FAMILY)) {
+    *stream_id = (unsigned int)(mkl_brng_id - VSL_BRNG_WH);
+    mkl_brng_id = VSL_BRNG_WH;
+  }
+
+  for (i = 0; i < BRNG_KINDS; i++)
+    if (mkl_brng_id == brng_list[i]) {
+      *stream_id = (unsigned int)(0);
+      return i;
     }
 
-    for (i = 0; i < BRNG_KINDS; i++)
-        if (mkl_brng_id == brng_list[i])
-        {
-            *stream_id = (unsigned int)(0);
-            return i;
-        }
-
-    return -1;
+  return -1;
 }
 
-void irk_seed_mkl(irk_state *state, const unsigned int seed, const irk_brng_t brng, const unsigned int stream_id)
-{
-    VSLStreamStatePtr stream_loc;
-    int err = VSL_STATUS_OK;
-    const MKL_INT mkl_brng = brng_list[brng];
+void irk_seed_mkl(irk_state *state, const unsigned int seed,
+                  const irk_brng_t brng, const unsigned int stream_id) {
+  VSLStreamStatePtr stream_loc;
+  int err = VSL_STATUS_OK;
+  const MKL_INT mkl_brng = brng_list[brng];
 
-    if (NULL == state->stream)
-    {
-        err = vslNewStream(&(state->stream), mkl_brng + stream_id, seed);
+  if (NULL == state->stream) {
+    err = vslNewStream(&(state->stream), mkl_brng + stream_id, seed);
 
-        assert(err == VSL_STATUS_OK);
-    }
-    else
-    {
-        err = vslNewStream(&stream_loc, mkl_brng + stream_id, seed);
-        assert(err == VSL_STATUS_OK);
+    assert(err == VSL_STATUS_OK);
+  } else {
+    err = vslNewStream(&stream_loc, mkl_brng + stream_id, seed);
+    assert(err == VSL_STATUS_OK);
 
-        err = vslDeleteStream(&(state->stream));
-        assert(err == VSL_STATUS_OK);
+    err = vslDeleteStream(&(state->stream));
+    assert(err == VSL_STATUS_OK);
 
-        state->stream = stream_loc;
-    }
-    if (err)
-    {
-        printf(
-            "irk_seed_mkl: encountered error when calling Intel(R) MKL\n");
-    }
+    state->stream = stream_loc;
+  }
+  if (err) {
+    printf("irk_seed_mkl: encountered error when calling Intel(R) MKL\n");
+  }
 }
 
-void irk_seed_mkl_array(irk_state *state, const unsigned int seed_vec[], const int seed_len,
-                        const irk_brng_t brng, const unsigned int stream_id)
-{
-    VSLStreamStatePtr stream_loc;
-    int err = VSL_STATUS_OK;
-    const MKL_INT mkl_brng = brng_list[brng];
+void irk_seed_mkl_array(irk_state *state, const unsigned int seed_vec[],
+                        const int seed_len, const irk_brng_t brng,
+                        const unsigned int stream_id) {
+  VSLStreamStatePtr stream_loc;
+  int err = VSL_STATUS_OK;
+  const MKL_INT mkl_brng = brng_list[brng];
 
-    if (NULL == state->stream)
-    {
+  if (NULL == state->stream) {
 
-        err = vslNewStreamEx(&(state->stream), mkl_brng + stream_id, (MKL_INT)seed_len, seed_vec);
+    err = vslNewStreamEx(&(state->stream), mkl_brng + stream_id,
+                         (MKL_INT)seed_len, seed_vec);
 
-        assert(err == VSL_STATUS_OK);
-    }
-    else
-    {
+    assert(err == VSL_STATUS_OK);
+  } else {
 
-        err = vslNewStreamEx(&stream_loc, mkl_brng + stream_id, (MKL_INT)seed_len, seed_vec);
-        if (err == VSL_STATUS_OK)
-        {
+    err = vslNewStreamEx(&stream_loc, mkl_brng + stream_id, (MKL_INT)seed_len,
+                         seed_vec);
+    if (err == VSL_STATUS_OK) {
 
-            err = vslDeleteStream(&(state->stream));
-            assert(err == VSL_STATUS_OK);
+      err = vslDeleteStream(&(state->stream));
+      assert(err == VSL_STATUS_OK);
 
-            state->stream = stream_loc;
-        }
+      state->stream = stream_loc;
     }
+  }
 }
 
-irk_error
-irk_randomseed_mkl(irk_state *state, const irk_brng_t brng, const unsigned int stream_id)
-{
+irk_error irk_randomseed_mkl(irk_state *state, const irk_brng_t brng,
+                             const unsigned int stream_id) {
 #ifndef _WIN32
-    struct timeval tv;
+  struct timeval tv;
 #else
-    struct _timeb tv;
+  struct _timeb tv;
 #endif
-    int no_err;
-    unsigned int *seed_array;
-    size_t buf_size = 624;
-    size_t seed_array_len = buf_size * sizeof(unsigned int);
-
-    seed_array = (unsigned int *)malloc(seed_array_len);
-    no_err = irk_devfill(seed_array, seed_array_len, 0) == RK_NOERR;
-
-    if (no_err)
-    {
-        /* ensures non-zero seed */
-        seed_array[0] |= 0x80000000UL;
-        irk_seed_mkl_array(state, seed_array, buf_size, brng, stream_id);
-        free(seed_array);
-
-        return RK_NOERR;
-    }
-    else
-    {
-        free(seed_array);
-    }
+  int no_err;
+  unsigned int *seed_array;
+  size_t buf_size = 624;
+  size_t seed_array_len = buf_size * sizeof(unsigned int);
+
+  seed_array = (unsigned int *)malloc(seed_array_len);
+  no_err = irk_devfill(seed_array, seed_array_len, 0) == RK_NOERR;
+
+  if (no_err) {
+    /* ensures non-zero seed */
+    seed_array[0] |= 0x80000000UL;
+    irk_seed_mkl_array(state, seed_array, buf_size, brng, stream_id);
+    free(seed_array);
+
+    return RK_NOERR;
+  } else {
+    free(seed_array);
+  }
 
 #ifndef _WIN32
-    gettimeofday(&tv, NULL);
-    irk_seed_mkl(state, irk_hash(getpid()) ^ irk_hash(tv.tv_sec) ^ irk_hash(tv.tv_usec) ^ irk_hash(clock()), brng, stream_id);
+  gettimeofday(&tv, NULL);
+  irk_seed_mkl(state,
+               irk_hash(getpid()) ^ irk_hash(tv.tv_sec) ^ irk_hash(tv.tv_usec) ^
+                   irk_hash(clock()),
+               brng, stream_id);
 #else
-    _FTIME(&tv);
-    irk_seed_mkl(state, irk_hash(tv.time) ^ irk_hash(tv.millitm) ^ irk_hash(clock()), brng, stream_id);
+  _FTIME(&tv);
+  irk_seed_mkl(state,
+               irk_hash(tv.time) ^ irk_hash(tv.millitm) ^ irk_hash(clock()),
+               brng, stream_id);
 #endif
 
-    return RK_ENODEV;
+  return RK_ENODEV;
 }
 
 /*
  *  Python needs this to determine the amount memory to allocate for the buffer
  */
-int irk_get_stream_size(irk_state *state)
-{
-    return vslGetStreamSize(state->stream);
+int irk_get_stream_size(irk_state *state) {
+  return vslGetStreamSize(state->stream);
 }
 
-void irk_get_state_mkl(irk_state *state, char *buf)
-{
-    int err = vslSaveStreamM(state->stream, buf);
+void irk_get_state_mkl(irk_state *state, char *buf) {
+  int err = vslSaveStreamM(state->stream, buf);
 
-    if (err != VSL_STATUS_OK)
-    {
-        assert(err == VSL_STATUS_OK);
-        printf(
-            "irk_get_state_mkl encountered error when calling Intel(R) MKL\n");
-    }
+  if (err != VSL_STATUS_OK) {
+    assert(err == VSL_STATUS_OK);
+    printf("irk_get_state_mkl encountered error when calling Intel(R) MKL\n");
+  }
 }
 
-int irk_set_state_mkl(irk_state *state, char *buf)
-{
-    int err = vslLoadStreamM(&(state->stream), buf);
+int irk_set_state_mkl(irk_state *state, char *buf) {
+  int err = vslLoadStreamM(&(state->stream), buf);
 
-    return (err == VSL_STATUS_OK) ? 0 : 1;
+  return (err == VSL_STATUS_OK) ? 0 : 1;
 }
 
-int irk_leapfrog_stream_mkl(irk_state *state, const MKL_INT k, const MKL_INT nstreams)
-{
-    int err;
+int irk_leapfrog_stream_mkl(irk_state *state, const MKL_INT k,
+                            const MKL_INT nstreams) {
+  int err;
 
-    err = vslLeapfrogStream(state->stream, k, nstreams);
+  err = vslLeapfrogStream(state->stream, k, nstreams);
 
-    switch (err)
-    {
-    case VSL_STATUS_OK:
-        return 0;
-    case VSL_RNG_ERROR_LEAPFROG_UNSUPPORTED:
-        return 1;
-    default:
-        return -1;
-    }
+  switch (err) {
+  case VSL_STATUS_OK:
+    return 0;
+  case VSL_RNG_ERROR_LEAPFROG_UNSUPPORTED:
+    return 1;
+  default:
+    return -1;
+  }
 }
 
-int irk_skipahead_stream_mkl(irk_state *state, const long long int nskip)
-{
-    int err;
+int irk_skipahead_stream_mkl(irk_state *state, const long long int nskip) {
+  int err;
 
-    err = vslSkipAheadStream(state->stream, nskip);
+  err = vslSkipAheadStream(state->stream, nskip);
 
-    switch (err)
-    {
-    case VSL_STATUS_OK:
-        return 0;
-    case VSL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED:
-        return 1;
-    default:
-        return -1;
-    }
+  switch (err) {
+  case VSL_STATUS_OK:
+    return 0;
+  case VSL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED:
+    return 1;
+  default:
+    return -1;
+  }
 }
 
 /* Thomas Wang 32 bits integer hash function */
-static unsigned long
-irk_hash(unsigned long key)
-{
-    key += ~(key << 15);
-    key ^= (key >> 10);
-    key += (key << 3);
-    key ^= (key >> 6);
-    key += ~(key << 11);
-    key ^= (key >> 16);
-    return key;
+static unsigned long irk_hash(unsigned long key) {
+  key += ~(key << 15);
+  key ^= (key >> 10);
+  key += (key << 3);
+  key ^= (key >> 6);
+  key += ~(key << 11);
+  key ^= (key >> 16);
+  return key;
 }
 
-void irk_random_vec(irk_state *state, const int len, unsigned int *res)
-{
-    viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, state->stream, len, res);
+void irk_random_vec(irk_state *state, const int len, unsigned int *res) {
+  viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, state->stream, len, res);
 }
 
-void irk_fill(void *buffer, size_t size, irk_state *state)
-{
-    unsigned int r;
-    unsigned char *buf = reinterpret_cast<unsigned char *>(buffer);
-    int err, len;
-
-    /* len = size / 4 */
-    len = (size >> 2);
-    err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream, len, (unsigned int *)buf);
-    assert(err == VSL_STATUS_OK);
-
-    /* size = size % 4 */
-    size &= 0x03;
-    if (!size)
-    {
-        return;
-    }
-
-    buf += (len << 2);
-    err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream, 1, &r);
-    assert(err == VSL_STATUS_OK);
-
-    for (; size; r >>= 8, size--)
-    {
-        *(buf++) = (unsigned char)(r & 0xFF);
-    }
-    if (err)
-        printf("irk_fill: error encountered when calling Intel(R) MKL \n");
+void irk_fill(void *buffer, size_t size, irk_state *state) {
+  unsigned int r;
+  unsigned char *buf = reinterpret_cast<unsigned char *>(buffer);
+  int err, len;
+
+  /* len = size / 4 */
+  len = (size >> 2);
+  err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream, len,
+                           (unsigned int *)buf);
+  assert(err == VSL_STATUS_OK);
+
+  /* size = size % 4 */
+  size &= 0x03;
+  if (!size) {
+    return;
+  }
+
+  buf += (len << 2);
+  err = viRngUniformBits32(VSL_RNG_METHOD_UNIFORMBITS32_STD, state->stream, 1,
+                           &r);
+  assert(err == VSL_STATUS_OK);
+
+  for (; size; r >>= 8, size--) {
+    *(buf++) = (unsigned char)(r & 0xFF);
+  }
+  if (err)
+    printf("irk_fill: error encountered when calling Intel(R) MKL \n");
 }
 
-irk_error
-irk_devfill(void *buffer, size_t size, int strong)
-{
+irk_error irk_devfill(void *buffer, size_t size, int strong) {
 #ifndef _WIN32
-    FILE *rfile;
-    int done;
-
-    if (strong)
-    {
-        rfile = fopen(RK_DEV_RANDOM, "rb");
-    }
-    else
-    {
-        rfile = fopen(RK_DEV_URANDOM, "rb");
-    }
-    if (rfile == NULL)
-    {
-        return RK_ENODEV;
-    }
-    done = fread(buffer, size, 1, rfile);
-    fclose(rfile);
-    if (done)
-    {
-        return RK_NOERR;
-    }
+  FILE *rfile;
+  int done;
+
+  if (strong) {
+    rfile = fopen(RK_DEV_RANDOM, "rb");
+  } else {
+    rfile = fopen(RK_DEV_URANDOM, "rb");
+  }
+  if (rfile == NULL) {
+    return RK_ENODEV;
+  }
+  done = fread(buffer, size, 1, rfile);
+  fclose(rfile);
+  if (done) {
+    return RK_NOERR;
+  }
 #else
 
 #ifndef RK_NO_WINCRYPT
-    HCRYPTPROV hCryptProv;
-    BOOL done;
-
-    if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL,
-                             CRYPT_VERIFYCONTEXT) ||
-        !hCryptProv)
-    {
-        return RK_ENODEV;
-    }
-    done = CryptGenRandom(hCryptProv, size, (unsigned char *)buffer);
-    CryptReleaseContext(hCryptProv, 0);
-    if (done)
-    {
-        return RK_NOERR;
-    }
+  HCRYPTPROV hCryptProv;
+  BOOL done;
+
+  if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL,
+                           CRYPT_VERIFYCONTEXT) ||
+      !hCryptProv) {
+    return RK_ENODEV;
+  }
+  done = CryptGenRandom(hCryptProv, size, (unsigned char *)buffer);
+  CryptReleaseContext(hCryptProv, 0);
+  if (done) {
+    return RK_NOERR;
+  }
 #endif
 
 #endif
-    return RK_ENODEV;
+  return RK_ENODEV;
 }
 
-irk_error
-irk_altfill(void *buffer, size_t size, int strong, irk_state *state)
-{
-    irk_error err;
+irk_error irk_altfill(void *buffer, size_t size, int strong, irk_state *state) {
+  irk_error err;
 
-    err = irk_devfill(buffer, size, strong);
-    if (err)
-    {
-        irk_fill(buffer, size, state);
-    }
-    return err;
+  err = irk_devfill(buffer, size, strong);
+  if (err) {
+    irk_fill(buffer, size, state);
+  }
+  return err;
 }
diff --git a/mkl_random/src/randomkit.h b/mkl_random/src/randomkit.h
index e01f187..dcf3a09 100644
--- a/mkl_random/src/randomkit.h
+++ b/mkl_random/src/randomkit.h
@@ -25,43 +25,43 @@
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+// clang-format off
 #include <stddef.h>
 #include "mkl_vsl.h"
 #include "Python.h"
 #include "numpy/npy_common.h"
+// clang-format on
 
 #ifndef _I_RANDOMKIT_
 #define _I_RANDOMKIT_
 
-typedef struct irk_state_
-{
-    VSLStreamStatePtr stream;
+typedef struct irk_state_ {
+  VSLStreamStatePtr stream;
 } irk_state;
 
 typedef enum {
-    RK_NOERR = 0, /* no error */
-    RK_ENODEV = 1, /* no RK_DEV_RANDOM device */
-    RK_ERR_MAX = 2
+  RK_NOERR = 0,  /* no error */
+  RK_ENODEV = 1, /* no RK_DEV_RANDOM device */
+  RK_ERR_MAX = 2
 } irk_error;
 
 /* if changing this, also adjust brng_list[BRNG_KINDS] in randomkit.c */
 #define BRNG_KINDS 11
 
 typedef enum {
-    MT19937       = 0,
-    SFMT19937     = 1,
-    WH            = 2,
-    MT2203        = 3,
-    MCG31         = 4,
-    R250          = 5,
-    MRG32K3A      = 6,
-    MCG59         = 7,
-    PHILOX4X32X10 = 8,
-    NONDETERM     = 9,
-    ARS5          = 10
+  MT19937 = 0,
+  SFMT19937 = 1,
+  WH = 2,
+  MT2203 = 3,
+  MCG31 = 4,
+  R250 = 5,
+  MRG32K3A = 6,
+  MCG59 = 7,
+  PHILOX4X32X10 = 8,
+  NONDETERM = 9,
+  ARS5 = 10
 } irk_brng_t;
 
-
 /* error strings */
 extern const char *irk_strerror[RK_ERR_MAX];
 
@@ -76,7 +76,6 @@ extern "C" {
  * Initialize the RNG state using the given seed.
  */
 
-
 /*
  * Initialize the RNG state using a random seed.
  * Uses /dev/random or, when unavailable, the clock (see randomkit.c).
@@ -90,18 +89,24 @@ extern "C" {
  * Initialize the RNG state using the given seed.
  */
 extern void irk_dealloc_stream(irk_state *state);
-extern void irk_seed_mkl(irk_state *state, const unsigned int seed, const irk_brng_t brng, const unsigned int stream_id);
+extern void irk_seed_mkl(irk_state *state, const unsigned int seed,
+                         const irk_brng_t brng, const unsigned int stream_id);
 extern void irk_seed_mkl_array(irk_state *state, const unsigned int *seed_vec,
-    const int seed_len, const irk_brng_t brng, const unsigned int stream_id);
-extern irk_error irk_randomseed_mkl(irk_state *state, const irk_brng_t brng, const unsigned int stream_id);
+                               const int seed_len, const irk_brng_t brng,
+                               const unsigned int stream_id);
+extern irk_error irk_randomseed_mkl(irk_state *state, const irk_brng_t brng,
+                                    const unsigned int stream_id);
 extern int irk_get_stream_size(irk_state *state);
-extern void irk_get_state_mkl(irk_state *state, char * buf);
-extern int irk_set_state_mkl(irk_state *state, char * buf);
+extern void irk_get_state_mkl(irk_state *state, char *buf);
+extern int irk_set_state_mkl(irk_state *state, char *buf);
 extern int irk_get_brng_mkl(irk_state *state);
-extern int irk_get_brng_and_stream_mkl(irk_state *state, unsigned int* stream_id);
+extern int irk_get_brng_and_stream_mkl(irk_state *state,
+                                       unsigned int *stream_id);
 
-extern int irk_leapfrog_stream_mkl(irk_state *state, const int k, const int nstreams);
-extern int irk_skipahead_stream_mkl(irk_state *state, const long long int nskip);
+extern int irk_leapfrog_stream_mkl(irk_state *state, const int k,
+                                   const int nstreams);
+extern int irk_skipahead_stream_mkl(irk_state *state,
+                                    const long long int nskip);
 
 /*
  * fill the buffer with size random bytes
@@ -125,7 +130,7 @@ extern irk_error irk_devfill(void *buffer, size_t size, int strong);
  * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is
  */
 extern irk_error irk_altfill(void *buffer, size_t size, int strong,
-                            irk_state *state);
+                             irk_state *state);
 
 #ifdef __cplusplus
 }
diff --git a/mkl_random/tests/test_random.py b/mkl_random/tests/test_random.py
index f961ac2..d01d289 100644
--- a/mkl_random/tests/test_random.py
+++ b/mkl_random/tests/test_random.py
@@ -24,25 +24,34 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import sys
+import warnings
 from typing import NamedTuple
 
 import numpy as np
-import mkl_random as rnd
+import pytest
 from numpy.testing import (
-        assert_, assert_raises, assert_equal,
-        suppress_warnings, assert_no_warnings)
-import sys
-import warnings
+    assert_,
+    assert_equal,
+    assert_no_warnings,
+    assert_raises,
+    suppress_warnings,
+)
 
-import pytest
+import mkl_random as rnd
 
 
 def test_zero_scalar_seed():
     evs_zero_seed = {
-        'MT19937': 844, 'SFMT19937': 857,
-        'WH': 0,        'MT2203': 890,
-        'MCG31': 0,     'R250': 229,
-        'MRG32K3A': 0,  'MCG59': 0}
+        "MT19937": 844,
+        "SFMT19937": 857,
+        "WH": 0,
+        "MT2203": 890,
+        "MCG31": 0,
+        "R250": 229,
+        "MRG32K3A": 0,
+        "MCG59": 0,
+    }
     for brng_algo in evs_zero_seed:
         s = rnd.MKLRandomState(0, brng=brng_algo)
         assert_equal(s.get_state()[0], brng_algo)
@@ -51,10 +60,15 @@ def test_zero_scalar_seed():
 
 def test_max_scalar_seed():
     evs_max_seed = {
-        'MT19937': 635,  'SFMT19937': 25,
-        'WH': 100,       'MT2203': 527,
-        'MCG31': 0,      'R250': 229,
-        'MRG32K3A': 961, 'MCG59': 0}
+        "MT19937": 635,
+        "SFMT19937": 25,
+        "WH": 100,
+        "MT2203": 527,
+        "MCG31": 0,
+        "R250": 229,
+        "MRG32K3A": 961,
+        "MCG59": 0,
+    }
     for brng_algo in evs_max_seed:
         s = rnd.MKLRandomState(4294967295, brng=brng_algo)
         assert_equal(s.get_state()[0], brng_algo)
@@ -62,13 +76,13 @@ def test_max_scalar_seed():
 
 
 def test_array_seed():
-    s = rnd.MKLRandomState(range(10), brng='MT19937')
+    s = rnd.MKLRandomState(range(10), brng="MT19937")
     assert_equal(s.randint(1000), 410)
-    s = rnd.MKLRandomState(np.arange(10), brng='MT19937')
+    s = rnd.MKLRandomState(np.arange(10), brng="MT19937")
     assert_equal(s.randint(1000), 410)
-    s = rnd.MKLRandomState([0], brng='MT19937')
+    s = rnd.MKLRandomState([0], brng="MT19937")
     assert_equal(s.randint(1000), 844)
-    s = rnd.MKLRandomState([4294967295], brng='MT19937')
+    s = rnd.MKLRandomState([4294967295], brng="MT19937")
     assert_equal(s.randint(1000), 635)
 
 
@@ -88,7 +102,7 @@ def test_invalid_array_seed():
 
 
 def test_non_deterministic_brng():
-    rs = rnd.MKLRandomState(brng='nondeterministic')
+    rs = rnd.MKLRandomState(brng="nondeterministic")
     v = rs.rand(10)
     assert isinstance(v, np.ndarray)
     v = rs.randint(0, 10)
@@ -96,8 +110,8 @@ def test_non_deterministic_brng():
 
 
 def test_binomial_n_zero():
-    zeros = np.zeros(2, dtype='int32')
-    for p in [0, .5, 1]:
+    zeros = np.zeros(2, dtype="int32")
+    for p in [0, 0.5, 1]:
         assert rnd.binomial(0, p) == 0
         actual = rnd.binomial(zeros, p)
         np.testing.assert_allclose(actual, zeros)
@@ -211,8 +225,15 @@ class RandIntData(NamedTuple):
 def randint():
     rfunc_method = rnd.randint
     integral_dtypes = [
-        np.bool_, np.int8, np.uint8, np.int16, np.uint16,
-        np.int32, np.uint32, np.int64, np.uint64
+        np.bool_,
+        np.int8,
+        np.uint8,
+        np.int16,
+        np.uint16,
+        np.int32,
+        np.uint32,
+        np.int64,
+        np.uint64,
     ]
     return RandIntData(rfunc_method, integral_dtypes)
 
@@ -239,7 +260,7 @@ def test_randint_rng_zero_and_extremes(randint):
         assert_equal(randint.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt)
         tgt = lbnd
         assert_equal(randint.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt)
-        tgt = lbnd + ((ubnd - lbnd)//2)
+        tgt = lbnd + ((ubnd - lbnd) // 2)
         assert_equal(randint.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt)
 
 
@@ -251,31 +272,34 @@ def test_randint_in_bounds_fuzz(randint):
             vals = randint.rfunc(2, ubnd, size=2**16, dtype=dt)
             assert_(vals.max() < ubnd)
             assert_(vals.min() >= 2)
-    vals = randint.rfunc(0, 2, size=2**16, dtype='bool')
-    assert (vals.max() < 2)
-    assert (vals.min() >= 0)
+    vals = randint.rfunc(0, 2, size=2**16, dtype="bool")
+    assert vals.max() < 2
+    assert vals.min() >= 0
 
 
 def test_randint_repeatability(randint):
     import hashlib
+
     # We use a md5 hash of generated sequences of 1000 samples
     # in the range [0, 6) for all but np.bool, where the range
     # is [0, 2). Hashes are for little endian numbers.
-    tgt = {'bool': '4fee98a6885457da67c39331a9ec336f',
-           'int16': '80a5ff69c315ab6f80b03da1d570b656',
-           'int32': '15a3c379b6c7b0f296b162194eab68bc',
-           'int64': 'ea9875f9334c2775b00d4976b85a1458',
-           'int8': '0f56333af47de94930c799806158a274',
-           'uint16': '80a5ff69c315ab6f80b03da1d570b656',
-           'uint32': '15a3c379b6c7b0f296b162194eab68bc',
-           'uint64': 'ea9875f9334c2775b00d4976b85a1458',
-           'uint8': '0f56333af47de94930c799806158a274'}
+    tgt = {
+        "bool": "4fee98a6885457da67c39331a9ec336f",
+        "int16": "80a5ff69c315ab6f80b03da1d570b656",
+        "int32": "15a3c379b6c7b0f296b162194eab68bc",
+        "int64": "ea9875f9334c2775b00d4976b85a1458",
+        "int8": "0f56333af47de94930c799806158a274",
+        "uint16": "80a5ff69c315ab6f80b03da1d570b656",
+        "uint32": "15a3c379b6c7b0f296b162194eab68bc",
+        "uint64": "ea9875f9334c2775b00d4976b85a1458",
+        "uint8": "0f56333af47de94930c799806158a274",
+    }
 
     for dt in randint.itype[1:]:
-        rnd.seed(1234, brng='MT19937')
+        rnd.seed(1234, brng="MT19937")
 
         # view as little endian for hash
-        if sys.byteorder == 'little':
+        if sys.byteorder == "little":
             val = randint.rfunc(0, 6, size=1000, dtype=dt)
         else:
             val = randint.rfunc(0, 6, size=1000, dtype=dt).byteswap()
@@ -284,10 +308,10 @@ def test_randint_repeatability(randint):
         assert tgt[np.dtype(dt).name] == res
 
     # bools do not depend on endianness
-    rnd.seed(1234, brng='MT19937')
-    val = randint.rfunc(0, 2, size=1000, dtype='bool').view(np.int8)
+    rnd.seed(1234, brng="MT19937")
+    val = randint.rfunc(0, 2, size=1000, dtype="bool").view(np.int8)
     res = hashlib.md5(val).hexdigest()
-    assert (tgt[np.dtype('bool').name] == res)
+    assert tgt[np.dtype("bool").name] == res
 
 
 def test_randint_respect_dtype_singleton(randint):
@@ -305,8 +329,8 @@ def test_randint_respect_dtype_singleton(randint):
 
         # gh-7284: Ensure that we get Python data types
         sample = randint.rfunc(lbnd, ubnd, dtype=dt)
-        assert not hasattr(sample, 'dtype')
-        assert (type(sample) is dt)
+        assert not hasattr(sample, "dtype")
+        assert type(sample) is dt
 
 
 class RandomDistData(NamedTuple):
@@ -316,7 +340,7 @@ class RandomDistData(NamedTuple):
 
 @pytest.fixture
 def randomdist():
-    return RandomDistData(seed=1234567890, brng='SFMT19937')
+    return RandomDistData(seed=1234567890, brng="SFMT19937")
 
 
 # Make sure the random distribution returns the correct value for a
@@ -326,21 +350,30 @@ def randomdist():
 # indistinguishable to the end user, that is no computationally feasible
 # statistical experiment can detect the difference.
 
+
 def test_randomdist_rand(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.rand(3, 2)
-    desired = np.array([[0.9838694715872407, 0.019142669625580311],
-                        [0.1767608025111258, 0.70966427633538842],
-                        [0.518550637178123, 0.98780936631374061]])
+    desired = np.array(
+        [
+            [0.9838694715872407, 0.019142669625580311],
+            [0.1767608025111258, 0.70966427633538842],
+            [0.518550637178123, 0.98780936631374061],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_randn(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.randn(3, 2)
-    desired = np.array([[2.1411609928913298, -2.0717866791744819],
-                        [-0.92778018318550248, 0.55240420724917727],
-                        [0.04651632135517459, 2.2510674226058036]])
+    desired = np.array(
+        [
+            [2.1411609928913298, -2.0717866791744819],
+            [-0.92778018318550248, 0.55240420724917727],
+            [0.04651632135517459, 2.2510674226058036],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10)
 
 
@@ -370,9 +403,9 @@ def test_random_integers_max_int():
     # to generate this integer.
     with suppress_warnings() as sup:
         w = sup.record(DeprecationWarning)
-        actual = rnd.random_integers(np.iinfo('l').max, np.iinfo('l').max)
+        actual = rnd.random_integers(np.iinfo("l").max, np.iinfo("l").max)
         assert len(w) == 1
-    desired = np.iinfo('l').max
+    desired = np.iinfo("l").max
     np.testing.assert_equal(actual, desired)
 
 
@@ -382,24 +415,28 @@ def test_random_integers_deprecated():
 
         # DeprecationWarning raised with high == None
         assert_raises(
-            DeprecationWarning, rnd.random_integers, np.iinfo('l').max
+            DeprecationWarning, rnd.random_integers, np.iinfo("l").max
         )
 
         # DeprecationWarning raised with high != None
         assert_raises(
             DeprecationWarning,
             rnd.random_integers,
-            np.iinfo('l').max,
-            np.iinfo('l').max
+            np.iinfo("l").max,
+            np.iinfo("l").max,
         )
 
 
 def test_randomdist_random_sample(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.random_sample((3, 2))
-    desired = np.array([[0.9838694715872407, 0.01914266962558031],
-                        [0.1767608025111258, 0.7096642763353884],
-                        [0.518550637178123, 0.9878093663137406]])
+    desired = np.array(
+        [
+            [0.9838694715872407, 0.01914266962558031],
+            [0.1767608025111258, 0.7096642763353884],
+            [0.518550637178123, 0.9878093663137406],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
@@ -433,15 +470,15 @@ def test_randomdist_choice_nonuniform_noreplace(randomdist):
 
 def test_randomdist_choice_noninteger(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.choice(['a', 'b', 'c', 'd'], 4)
-    desired = np.array(['d', 'a', 'a', 'c'])
+    actual = rnd.choice(["a", "b", "c", "d"], 4)
+    desired = np.array(["d", "a", "a", "c"])
     np.testing.assert_array_equal(actual, desired)
 
 
 def test_choice_exceptions():
     sample = rnd.choice
     pytest.raises(ValueError, sample, -1, 3)
-    pytest.raises(ValueError, sample, 3., 3)
+    pytest.raises(ValueError, sample, 3.0, 3)
     pytest.raises(ValueError, sample, [[1, 2], [3, 4]], 3)
     pytest.raises(ValueError, sample, [], 3)
     pytest.raises(
@@ -451,9 +488,7 @@ def test_choice_exceptions():
     pytest.raises(ValueError, sample, [1, 2], 3, p=[1.1, -0.1])
     pytest.raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4])
     pytest.raises(ValueError, sample, [1, 2, 3], 4, replace=False)
-    pytest.raises(
-        ValueError, sample, [1, 2, 3], 2, replace=False, p=[1, 0, 0]
-    )
+    pytest.raises(ValueError, sample, [1, 2, 3], 2, replace=False, p=[1, 0, 0])
 
 
 def test_choice_return_shape():
@@ -471,7 +506,7 @@ def test_choice_return_shape():
     assert rnd.choice(arr, replace=True) is a
 
     # Check 0-d array
-    s = tuple()
+    s = ()
     assert not np.isscalar(rnd.choice(2, s, replace=True))
     assert not np.isscalar(rnd.choice(2, s, replace=False))
     assert not np.isscalar(rnd.choice(2, s, replace=True, p=p))
@@ -496,27 +531,28 @@ def test_choice_return_shape():
 def test_randomdist_bytes(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.bytes(10)
-    desired = b'\xa4\xde\xde{\xb4\x88\xe6\x84*2'
+    desired = b"\xa4\xde\xde{\xb4\x88\xe6\x84*2"
     np.testing.assert_equal(actual, desired)
 
 
 def test_randomdist_shuffle(randomdist):
     # Test lists, arrays (of various dtypes), and multidimensional versions
     # of both, c-contiguous or not:
-    for conv in [lambda x: np.array([]),
-                 lambda x: x,
-                 lambda x: np.asarray(x).astype(np.int8),
-                 lambda x: np.asarray(x).astype(np.float32),
-                 lambda x: np.asarray(x).astype(np.complex64),
-                 lambda x: np.asarray(x).astype(object),
-                 lambda x: [(i, i) for i in x],
-                 lambda x: np.asarray([[i, i] for i in x]),
-                 lambda x: np.vstack([x, x]).T,
-                 # gh-4270
-                 lambda x: np.asarray(
-                            [(i, i) for i in x],
-                            [("a", object, (1,)),
-                             ("b", np.int32, (1,))])]:
+    for conv in [
+        lambda x: np.array([]),
+        lambda x: x,
+        lambda x: np.asarray(x).astype(np.int8),
+        lambda x: np.asarray(x).astype(np.float32),
+        lambda x: np.asarray(x).astype(np.complex64),
+        lambda x: np.asarray(x).astype(object),
+        lambda x: [(i, i) for i in x],
+        lambda x: np.asarray([[i, i] for i in x]),
+        lambda x: np.vstack([x, x]).T,
+        # gh-4270
+        lambda x: np.asarray(
+            [(i, i) for i in x], [("a", object, (1,)), ("b", np.int32, (1,))]
+        ),
+    ]:
         rnd.seed(randomdist.seed, brng=randomdist.brng)
         alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0])
         rnd.shuffle(alist)
@@ -531,28 +567,29 @@ def test_shuffle_masked():
     b = np.ma.masked_values(np.arange(20) % 3 - 1, -1)
     a_orig = a.copy()
     b_orig = b.copy()
-    for i in range(50):
+    for _ in range(50):
         rnd.shuffle(a)
-        assert_equal(
-            sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask]))
+        assert_equal(sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask]))
         rnd.shuffle(b)
-        assert_equal(
-            sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask]))
+        assert_equal(sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask]))
 
 
 def test_randomdist_beta(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.beta(.1, .9, size=(3, 2))
+    actual = rnd.beta(0.1, 0.9, size=(3, 2))
     desired = np.array(
-        [[0.9856952034381025, 4.35869375658114e-08],
+        [
+            [0.9856952034381025, 4.35869375658114e-08],
             [0.0014230232791189966, 1.4981856288121975e-06],
-            [1.426135763875603e-06, 4.5801786040477326e-07]])
+            [1.426135763875603e-06, 4.5801786040477326e-07],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_binomial(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.binomial(100.123, .456, size=(3, 2))
+    actual = rnd.binomial(100.123, 0.456, size=(3, 2))
     desired = np.array([[43, 48], [55, 48], [46, 53]])
     np.testing.assert_array_equal(actual, desired)
 
@@ -560,9 +597,13 @@ def test_randomdist_binomial(randomdist):
 def test_randomdist_chisquare(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.chisquare(50, size=(3, 2))
-    desired = np.array([[50.955833609920589, 50.133178918244099],
-                        [61.513615847062013, 50.757127871422448],
-                        [52.79816819717081, 49.973023331993552]])
+    desired = np.array(
+        [
+            [50.955833609920589, 50.133178918244099],
+            [61.513615847062013, 50.757127871422448],
+            [52.79816819717081, 49.973023331993552],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
@@ -570,12 +611,22 @@ def test_randomdist_dirichlet(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     alpha = np.array([51.72840233779265162, 39.74494232180943953])
     actual = rnd.dirichlet(alpha, size=(3, 2))
-    desired = np.array([[[0.6332947001908874, 0.36670529980911254],
-                         [0.5376828907571894, 0.4623171092428107]],
-                        [[0.6835615930093024, 0.3164384069906976],
-                         [0.5452378139016114, 0.45476218609838875]],
-                        [[0.6498494402738553, 0.3501505597261446],
-                         [0.5622024400324822, 0.43779755996751785]]])
+    desired = np.array(
+        [
+            [
+                [0.6332947001908874, 0.36670529980911254],
+                [0.5376828907571894, 0.4623171092428107],
+            ],
+            [
+                [0.6835615930093024, 0.3164384069906976],
+                [0.5452378139016114, 0.45476218609838875],
+            ],
+            [
+                [0.6498494402738553, 0.3501505597261446],
+                [0.5622024400324822, 0.43779755996751785],
+            ],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=4e-10, rtol=4e-10)
 
 
@@ -595,43 +646,59 @@ def test_dirichlet_size():
 def test_randomdist_exponential(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.exponential(1.1234, size=(3, 2))
-    desired = np.array([[0.01826877748252199, 4.4439855151117005],
-                        [1.9468048583654507, 0.38528493864979607],
-                        [0.7377565464231758, 0.013779117663987912]])
+    desired = np.array(
+        [
+            [0.01826877748252199, 4.4439855151117005],
+            [1.9468048583654507, 0.38528493864979607],
+            [0.7377565464231758, 0.013779117663987912],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_f(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.f(12, 77, size=(3, 2))
-    desired = np.array([[1.325076177478387, 0.8670927327120197],
-                        [2.1190792007836827, 0.9095296301824258],
-                        [1.4953697422236187, 0.9547125618834837]])
+    desired = np.array(
+        [
+            [1.325076177478387, 0.8670927327120197],
+            [2.1190792007836827, 0.9095296301824258],
+            [1.4953697422236187, 0.9547125618834837],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-8, rtol=1e-9)
 
 
 def test_randomdist_gamma(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.gamma(5, 3, size=(3, 2))
-    desired = np.array([[15.073510060334929, 14.525495858042685],
-                        [22.73897210140115, 14.94044782480266],
-                        [16.327929995271095, 14.419692564592896]])
+    desired = np.array(
+        [
+            [15.073510060334929, 14.525495858042685],
+            [22.73897210140115, 14.94044782480266],
+            [16.327929995271095, 14.419692564592896],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
 def test_randomdsit_geometric(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.geometric(.123456789, size=(3, 2))
+    actual = rnd.geometric(0.123456789, size=(3, 2))
     desired = np.array([[0, 30], [13, 2], [4, 0]])
     np.testing.assert_array_equal(actual, desired)
 
 
 def test_randomdist_gumbel(randomdist):
     rnd.seed(randomdist.seed, randomdist.brng)
-    actual = rnd.gumbel(loc=.123456789, scale=2.0, size=(3, 2))
-    desired = np.array([[-8.114386462751979, 2.873840411460178],
-                        [1.2231161758452016, -2.0168070493213532],
-                        [-0.7175455966332102, -8.678464904504784]])
+    actual = rnd.gumbel(loc=0.123456789, scale=2.0, size=(3, 2))
+    desired = np.array(
+        [
+            [-8.114386462751979, 2.873840411460178],
+            [1.2231161758452016, -2.0168070493213532],
+            [-0.7175455966332102, -8.678464904504784],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
@@ -662,60 +729,80 @@ def test_randomdist_hypergeometric(randomdist):
 
 def test_randomdist_laplace(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.laplace(loc=.123456789, scale=2.0, size=(3, 2))
-    desired = np.array([[0.15598087210935016, -3.3424589282252994],
-                        [-1.189978401356375, 3.0607925598732253],
-                        [0.0030946589024587745, 3.14795824463997]])
+    actual = rnd.laplace(loc=0.123456789, scale=2.0, size=(3, 2))
+    desired = np.array(
+        [
+            [0.15598087210935016, -3.3424589282252994],
+            [-1.189978401356375, 3.0607925598732253],
+            [0.0030946589024587745, 3.14795824463997],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_logistic(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.logistic(loc=.123456789, scale=2.0, size=(3, 2))
-    desired = np.array([[8.345015961402696, -7.749557532940552],
-                        [-2.9534419690278444, 1.910964962531448],
-                        [0.2719300361499433, 8.913100396613983]])
+    actual = rnd.logistic(loc=0.123456789, scale=2.0, size=(3, 2))
+    desired = np.array(
+        [
+            [8.345015961402696, -7.749557532940552],
+            [-2.9534419690278444, 1.910964962531448],
+            [0.2719300361499433, 8.913100396613983],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_lognormal(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.lognormal(mean=.123456789, sigma=2.0, size=(3, 2))
-    desired = np.array([[81.92291750917155, 0.01795087229603931],
-                        [0.1769118704670423, 3.415299544410577],
-                        [1.2417099625339398, 102.0631392685238]])
+    actual = rnd.lognormal(mean=0.123456789, sigma=2.0, size=(3, 2))
+    desired = np.array(
+        [
+            [81.92291750917155, 0.01795087229603931],
+            [0.1769118704670423, 3.415299544410577],
+            [1.2417099625339398, 102.0631392685238],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-6, rtol=1e-10)
     actual = rnd.lognormal(
-        mean=.123456789, sigma=2.0, size=(3, 2),  method='Box-Muller2'
+        mean=0.123456789, sigma=2.0, size=(3, 2), method="Box-Muller2"
+    )
+    desired = np.array(
+        [
+            [0.2585388231094821, 0.43734953048924663],
+            [26.050836228611697, 26.76266237820882],
+            [0.24216420175675096, 0.2481945765083541],
+        ]
     )
-    desired = np.array([[0.2585388231094821, 0.43734953048924663],
-                        [26.050836228611697, 26.76266237820882],
-                        [0.24216420175675096, 0.2481945765083541]])
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
 def test_randomdist_logseries(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.logseries(p=.923456789, size=(3, 2))
+    actual = rnd.logseries(p=0.923456789, size=(3, 2))
     desired = np.array([[18, 1], [1, 1], [5, 19]])
     np.testing.assert_array_equal(actual, desired)
 
 
 def test_randomdist_multinomial(randomdist):
     rs = rnd.MKLRandomState(randomdist.seed, brng=randomdist.brng)
-    actual = rs.multinomial(20, [1/6.]*6, size=(3, 2))
+    actual = rs.multinomial(20, [1 / 6.0] * 6, size=(3, 2))
     desired = np.full((3, 2), 20, dtype=actual.dtype)
     np.testing.assert_array_equal(actual.sum(axis=-1), desired)
-    expected = np.array([
-        [[6, 2, 1, 3, 2, 6], [7, 5, 1, 2, 3, 2]],
-        [[5, 1, 8, 3, 2, 1], [4, 6, 0, 4, 4, 2]],
-        [[6, 3, 1, 4, 4, 2], [3, 2, 4, 2, 1, 8]]], actual.dtype)
+    expected = np.array(
+        [
+            [[6, 2, 1, 3, 2, 6], [7, 5, 1, 2, 3, 2]],
+            [[5, 1, 8, 3, 2, 1], [4, 6, 0, 4, 4, 2]],
+            [[6, 3, 1, 4, 4, 2], [3, 2, 4, 2, 1, 8]],
+        ],
+        actual.dtype,
+    )
     np.testing.assert_array_equal(actual, expected)
 
 
 def test_randomdist_multivariate_normal(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    mean = (.123456789, 10)
+    mean = (0.123456789, 10)
     # Hmm... not even symmetric.
     cov = [[1, 0], [1, 0]]
     size = (3, 2)
@@ -723,12 +810,13 @@ def test_randomdist_multivariate_normal(randomdist):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", RuntimeWarning)
         actual = rnd.multivariate_normal(mean, cov, size)
-    desired = np.array([[[-2.42282709811266, 10.0],
-                         [1.2267795840027274, 10.0]],
-                        [[0.06813924868067336, 10.0],
-                         [1.001190462507746, 10.0]],
-                        [[-1.74157261455869, 10.0],
-                         [1.0400952859037553, 10.0]]])
+    desired = np.array(
+        [
+            [[-2.42282709811266, 10.0], [1.2267795840027274, 10.0]],
+            [[0.06813924868067336, 10.0], [1.001190462507746, 10.0]],
+            [[-1.74157261455869, 10.0], [1.0400952859037553, 10.0]],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
     # Check for default size, was raising deprecation warning
@@ -746,9 +834,7 @@ def test_randomdist_multivariate_normal(randomdist):
     pytest.warns(RuntimeWarning, rnd.multivariate_normal, mean, cov)
 
     # and that it doesn't warn with RuntimeWarning check_valid='ignore'
-    assert_no_warnings(
-        rnd.multivariate_normal, mean, cov, check_valid="ignore"
-    )
+    assert_no_warnings(rnd.multivariate_normal, mean, cov, check_valid="ignore")
 
     # and that it raises with RuntimeWarning check_valid='raises'
     assert_raises(
@@ -765,32 +851,40 @@ def test_randomdist_multivariate_normal(randomdist):
     assert_raises(
         ValueError, rnd.multivariate_normal, mean, cov, check_valid="other"
     )
-    assert_raises(
-        ValueError, rnd.multivariate_normal, np.zeros((2, 1, 1)), cov
-    )
+    assert_raises(ValueError, rnd.multivariate_normal, np.zeros((2, 1, 1)), cov)
     assert_raises(ValueError, rnd.multivariate_normal, mu, np.empty((3, 2)))
     assert_raises(ValueError, rnd.multivariate_normal, mu, np.eye(3))
 
 
 def test_randomdist_multinormal_cholesky(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    mean = (.123456789, 10)
+    mean = (0.123456789, 10)
     # lower-triangular cholesky matrix
     chol_mat = [[1, 0], [-0.5, 1]]
     size = (3, 2)
-    actual = rnd.multinormal_cholesky(mean, chol_mat, size, method='ICDF')
-    desired = np.array([[[2.26461778189133, 6.857632824379853],
-                         [-0.8043233941855025, 11.01629429884193]],
-                        [[0.1699731103551746, 12.227809261928217],
-                         [-0.6146263106001378, 9.893801873973892]],
-                        [[1.691753328795276, 10.797627196240155],
-                         [-0.647341237129921, 9.626899489691816]]])
+    actual = rnd.multinormal_cholesky(mean, chol_mat, size, method="ICDF")
+    desired = np.array(
+        [
+            [
+                [2.26461778189133, 6.857632824379853],
+                [-0.8043233941855025, 11.01629429884193],
+            ],
+            [
+                [0.1699731103551746, 12.227809261928217],
+                [-0.6146263106001378, 9.893801873973892],
+            ],
+            [
+                [1.691753328795276, 10.797627196240155],
+                [-0.647341237129921, 9.626899489691816],
+            ],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_negative_binomial(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.negative_binomial(n=100, p=.12345, size=(3, 2))
+    actual = rnd.negative_binomial(n=100, p=0.12345, size=(3, 2))
     desired = np.array([[667, 679], [677, 676], [779, 648]])
     np.testing.assert_array_equal(actual, desired)
 
@@ -798,61 +892,88 @@ def test_randomdist_negative_binomial(randomdist):
 def test_randomdist_noncentral_chisquare(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.noncentral_chisquare(df=5, nonc=5, size=(3, 2))
-    desired = np.array([[5.871334619375055, 8.756238913383225],
-                        [17.29576535176833, 3.9028417087862177],
-                        [5.1315133729432505, 9.942717979531027]])
+    desired = np.array(
+        [
+            [5.871334619375055, 8.756238913383225],
+            [17.29576535176833, 3.9028417087862177],
+            [5.1315133729432505, 9.942717979531027],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
-    actual = rnd.noncentral_chisquare(df=.5, nonc=.2, size=(3, 2))
-    desired = np.array([[0.0008971007339949436, 0.08948578998156566],
-                        [0.6721835871997511, 2.8892645287699352],
-                        [5.0858149962761007e-05, 1.7315797643658821]])
+    actual = rnd.noncentral_chisquare(df=0.5, nonc=0.2, size=(3, 2))
+    desired = np.array(
+        [
+            [0.0008971007339949436, 0.08948578998156566],
+            [0.6721835871997511, 2.8892645287699352],
+            [5.0858149962761007e-05, 1.7315797643658821],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
 def test_randomdist_noncentral_f(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.noncentral_f(dfnum=5, dfden=2, nonc=1, size=(3, 2))
-    desired = np.array([[0.2216297348371284, 0.7632696724492449],
-                        [98.67664232828238, 0.9500319825372799],
-                        [0.3489618249246971, 1.5035633972571092]])
+    desired = np.array(
+        [
+            [0.2216297348371284, 0.7632696724492449],
+            [98.67664232828238, 0.9500319825372799],
+            [0.3489618249246971, 1.5035633972571092],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
 def test_randomdist_normal(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.normal(loc=.123456789, scale=2.0, size=(3, 2))
-    desired = np.array([[4.405778774782659, -4.020116569348963],
-                        [-1.732103577371005, 1.2282652034983546],
-                        [0.21648943171034918, 4.625591634211608]])
+    actual = rnd.normal(loc=0.123456789, scale=2.0, size=(3, 2))
+    desired = np.array(
+        [
+            [4.405778774782659, -4.020116569348963],
+            [-1.732103577371005, 1.2282652034983546],
+            [0.21648943171034918, 4.625591634211608],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.normal(
-        loc=.123456789, scale=2.0, size=(3, 2), method="BoxMuller"
+        loc=0.123456789, scale=2.0, size=(3, 2), method="BoxMuller"
+    )
+    desired = np.array(
+        [
+            [0.16673479781277187, -3.4809986872165952],
+            [-0.05193761082535492, 3.249201213154922],
+            [-0.11915582299214138, 3.555636100927892],
+        ]
     )
-    desired = np.array([[0.16673479781277187, -3.4809986872165952],
-                        [-0.05193761082535492, 3.249201213154922],
-                        [-0.11915582299214138, 3.555636100927892]])
     np.testing.assert_allclose(actual, desired, atol=1e-8, rtol=1e-8)
 
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.normal(
-        loc=.123456789, scale=2.0, size=(3, 2), method="BoxMuller2"
+        loc=0.123456789, scale=2.0, size=(3, 2), method="BoxMuller2"
+    )
+    desired = np.array(
+        [
+            [0.16673479781277187, 0.48153966449249175],
+            [-3.4809986872165952, -0.8101190082826486],
+            [-0.051937610825354905, 2.4088402362484342],
+        ]
     )
-    desired = np.array([[0.16673479781277187, 0.48153966449249175],
-                        [-3.4809986872165952, -0.8101190082826486],
-                        [-0.051937610825354905, 2.4088402362484342]])
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-7)
 
 
 def test_randomdist_pareto(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.pareto(a=.123456789, size=(3, 2))
+    actual = rnd.pareto(a=0.123456789, size=(3, 2))
     desired = np.array(
-        [[0.14079174875385214, 82372044085468.92],
-         [1247881.6368437486, 15.086855668610944],
-         [203.2638558933401, 0.10445383654349749]])
+        [
+            [0.14079174875385214, 82372044085468.92],
+            [1247881.6368437486, 15.086855668610944],
+            [203.2638558933401, 0.10445383654349749],
+        ]
+    )
     # For some reason on 32-bit x86 Ubuntu 12.10 the [1, 0] entry in this
     # matrix differs by 24 nulps. Discussion:
     #   http://mail.scipy.org/pipermail/numpy-discussion/2012-September/063801.html
@@ -864,7 +985,7 @@ def test_randomdist_pareto(randomdist):
 
 def test_randomdist_poisson(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.poisson(lam=.123456789, size=(3, 2))
+    actual = rnd.poisson(lam=0.123456789, size=(3, 2))
     desired = np.array([[1, 0], [0, 0], [0, 1]])
     np.testing.assert_array_equal(actual, desired)
 
@@ -875,109 +996,149 @@ def test_randomdist_poisson(randomdist):
 
 
 def test_poisson_exceptions():
-    lambig = np.iinfo('l').max
+    lambig = np.iinfo("l").max
     lamneg = -1
     assert_raises(ValueError, rnd.poisson, lamneg)
-    assert_raises(ValueError, rnd.poisson, [lamneg]*10)
+    assert_raises(ValueError, rnd.poisson, [lamneg] * 10)
     assert_raises(ValueError, rnd.poisson, lambig)
-    assert_raises(ValueError, rnd.poisson, [lambig]*10)
+    assert_raises(ValueError, rnd.poisson, [lambig] * 10)
 
 
 def test_randomdist_power(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.power(a=.123456789, size=(3, 2))
-    desired = np.array([[0.8765841803224415, 1.2140041091640163e-14],
-                        [8.013574117268635e-07, 0.06216255187464781],
-                        [0.004895628723087296, 0.9054248959192386]])
+    actual = rnd.power(a=0.123456789, size=(3, 2))
+    desired = np.array(
+        [
+            [0.8765841803224415, 1.2140041091640163e-14],
+            [8.013574117268635e-07, 0.06216255187464781],
+            [0.004895628723087296, 0.9054248959192386],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_rayleigh(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.rayleigh(scale=10, size=(3, 2))
-    desired = np.array([[1.80344345931194, 28.127692489122378],
-                        [18.6169699930609, 8.282068232120208],
-                        [11.460520015934597, 1.5662406536967712]])
+    desired = np.array(
+        [
+            [1.80344345931194, 28.127692489122378],
+            [18.6169699930609, 8.282068232120208],
+            [11.460520015934597, 1.5662406536967712],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
 def test_randomdist_standard_cauchy(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.standard_cauchy(size=(3, 2))
-    desired = np.array([[19.716487700629912, -16.608240276131227],
-                        [-1.6117703817332278, 0.7739915895826882],
-                        [0.058344614106131, 26.09825325697747]])
+    desired = np.array(
+        [
+            [19.716487700629912, -16.608240276131227],
+            [-1.6117703817332278, 0.7739915895826882],
+            [0.058344614106131, 26.09825325697747],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-9, rtol=1e-10)
 
 
 def test_randomdist_standard_exponential(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.standard_exponential(size=(3, 2))
-    desired = np.array([[0.016262041554675085, 3.955835423813157],
-                        [1.7329578586126497, 0.3429632710074738],
-                        [0.6567175951781875, 0.012265548926462446]])
+    desired = np.array(
+        [
+            [0.016262041554675085, 3.955835423813157],
+            [1.7329578586126497, 0.3429632710074738],
+            [0.6567175951781875, 0.012265548926462446],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_standard_gamma(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.standard_gamma(shape=3, size=(3, 2))
-    desired = np.array([[2.939330965027084, 2.799606052259993],
-                        [4.988193705918075, 2.905305108691164],
-                        [3.2630929395548147, 2.772756340265377]])
+    desired = np.array(
+        [
+            [2.939330965027084, 2.799606052259993],
+            [4.988193705918075, 2.905305108691164],
+            [3.2630929395548147, 2.772756340265377],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
 def test_randomdist_standard_normal(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.standard_normal(size=(3, 2))
-    desired = np.array([[2.1411609928913298, -2.071786679174482],
-                        [-0.9277801831855025, 0.5524042072491773],
-                        [0.04651632135517459, 2.2510674226058036]])
+    desired = np.array(
+        [
+            [2.1411609928913298, -2.071786679174482],
+            [-0.9277801831855025, 0.5524042072491773],
+            [0.04651632135517459, 2.2510674226058036],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    actual = rnd.standard_normal(size=(3, 2), method='BoxMuller2')
-    desired = np.array([[0.021639004406385935, 0.17904143774624587],
-                        [-1.8022277381082976, -0.4667878986413243],
-                        [-0.08769719991267745, 1.1426917236242171]])
+    actual = rnd.standard_normal(size=(3, 2), method="BoxMuller2")
+    desired = np.array(
+        [
+            [0.021639004406385935, 0.17904143774624587],
+            [-1.8022277381082976, -0.4667878986413243],
+            [-0.08769719991267745, 1.1426917236242171],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10)
 
 
 def test_randomdist_standard_t(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.standard_t(df=10, size=(3, 2))
-    desired = np.array([[-0.783927044239963, 0.04762883516531178],
-                        [0.7624597987725193, -1.8045540288955506],
-                        [-1.2657694296239195, 0.307870906117017]])
+    desired = np.array(
+        [
+            [-0.783927044239963, 0.04762883516531178],
+            [0.7624597987725193, -1.8045540288955506],
+            [-1.2657694296239195, 0.307870906117017],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=5e-10, rtol=5e-10)
 
 
 def test_randomdist_triangular(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.triangular(left=5.12, mode=10.23, right=20.34, size=(3, 2))
-    desired = np.array([[18.764540652669638, 6.340166306695037],
-                        [8.827752689522429, 13.65605077739865],
-                        [11.732872979633328, 18.970392754850423]])
+    desired = np.array(
+        [
+            [18.764540652669638, 6.340166306695037],
+            [8.827752689522429, 13.65605077739865],
+            [11.732872979633328, 18.970392754850423],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_uniform(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.uniform(low=1.23, high=10.54, size=(3, 2))
-    desired = np.array([[10.38982478047721, 1.408218254214153],
-                        [2.8756430713785814, 7.836974412682466],
-                        [6.057706432128325, 10.426505200380925]])
+    desired = np.array(
+        [
+            [10.38982478047721, 1.408218254214153],
+            [2.8756430713785814, 7.836974412682466],
+            [6.057706432128325, 10.426505200380925],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_uniform_range_bounds():
-    fmin = np.finfo('float').min
-    fmax = np.finfo('float').max
+    fmin = np.finfo("float").min
+    fmax = np.finfo("float").max
 
     func = rnd.uniform
     np.testing.assert_raises(OverflowError, func, -np.inf, 0)
-    np.testing.assert_raises(OverflowError, func,  0,      np.inf)
+    np.testing.assert_raises(OverflowError, func, 0, np.inf)
     # this should not throw any error, since rng can be sampled as
     # fmin*u + fmax*(1-u) for 0<u<1 and it stays completely in range
     rnd.uniform(fmin, fmax)
@@ -989,16 +1150,20 @@ def test_uniform_range_bounds():
 def test_randomdist_vonmises(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.vonmises(mu=1.23, kappa=1.54, size=(3, 2))
-    desired = np.array([[1.1027657269593822, 1.2539311427727782],
-                        [2.0281801137277764, 1.3262040229028056],
-                        [0.9510301598100863, 2.0284972823322818]])
+    desired = np.array(
+        [
+            [1.1027657269593822, 1.2539311427727782],
+            [2.0281801137277764, 1.3262040229028056],
+            [0.9510301598100863, 2.0284972823322818],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
 
 def test_randomdist_vonmises_small(randomdist):
     # check infinite loop, gh-4720
     rnd.seed(randomdist.seed, brng=randomdist.brng)
-    r = rnd.vonmises(mu=0., kappa=1.1e-8, size=10**6)
+    r = rnd.vonmises(mu=0.0, kappa=1.1e-8, size=10**6)
     np.testing.assert_(np.isfinite(r).all())
 
 
@@ -1006,9 +1171,11 @@ def test_randomdist_wald(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.wald(mean=1.23, scale=1.54, size=(3, 2))
     desired = np.array(
-        [[0.22448558337033758, 0.23485255518098838],
-         [2.756850184899666, 2.005347850108636],
-         [1.179918636588408, 0.20928649815442452]]
+        [
+            [0.22448558337033758, 0.23485255518098838],
+            [2.756850184899666, 2.005347850108636],
+            [1.179918636588408, 0.20928649815442452],
+        ]
     )
     np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10)
 
@@ -1016,9 +1183,13 @@ def test_randomdist_wald(randomdist):
 def test_randomdist_weibull(randomdist):
     rnd.seed(randomdist.seed, brng=randomdist.brng)
     actual = rnd.weibull(a=1.23, size=(3, 2))
-    desired = np.array([[0.035129404330214734, 3.058859465984936],
-                        [1.5636393343788513, 0.4189406773709585],
-                        [0.710439924774508, 0.02793103204502023]])
+    desired = np.array(
+        [
+            [0.035129404330214734, 3.058859465984936],
+            [1.5636393343788513, 0.4189406773709585],
+            [0.710439924774508, 0.02793103204502023],
+        ]
+    )
     np.testing.assert_allclose(actual, desired, atol=1e-10)
 
 
@@ -1042,8 +1213,10 @@ def _check_function(seed_list, function, sz):
     out2 = np.empty((len(seed_list),) + sz)
 
     # threaded generation
-    t = [Thread(target=function, args=(rnd.MKLRandomState(s), o))
-         for s, o in zip(seed_list, out1)]
+    t = [
+        Thread(target=function, args=(rnd.MKLRandomState(s), o))
+        for s, o in zip(seed_list, out1)
+    ]
     [x.start() for x in t]
     [x.join() for x in t]
 
@@ -1052,7 +1225,7 @@ def _check_function(seed_list, function, sz):
         function(rnd.MKLRandomState(s), o)
 
     # these platforms change x87 fpu precision mode in threads
-    if (np.intp().dtype.itemsize == 4 and sys.platform == "win32"):
+    if np.intp().dtype.itemsize == 4 and sys.platform == "win32":
         np.testing.assert_allclose(out1, out2)
     else:
         np.testing.assert_array_equal(out1, out2)
@@ -1061,6 +1234,7 @@ def _check_function(seed_list, function, sz):
 def test_thread_normal(seed_vector):
     def gen_random(state, out):
         out[...] = state.normal(size=10000)
+
     _check_function(seed_vector, gen_random, sz=(10000,))
 
 
@@ -1068,11 +1242,13 @@ def test_thread_exp(seed_vector):
     # make sure each state produces the same sequence even in threads
     def gen_random(state, out):
         out[...] = state.exponential(scale=np.ones((100, 1000)))
+
     _check_function(seed_vector, gen_random, sz=(100, 1000))
 
 
 def test_multinomial(seed_vector):
     # make sure each state produces the same sequence even in threads
     def gen_random(state, out):
-        out[...] = state.multinomial(10, [1/6.]*6, size=10000)
+        out[...] = state.multinomial(10, [1 / 6.0] * 6, size=10000)
+
     _check_function(seed_vector, gen_random, sz=(10000, 6))
diff --git a/mkl_random/tests/test_regression.py b/mkl_random/tests/test_regression.py
index 817032f..66732f6 100644
--- a/mkl_random/tests/test_regression.py
+++ b/mkl_random/tests/test_regression.py
@@ -24,20 +24,18 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import sys
-from numpy.testing import (TestCase, assert_,
-                           assert_array_equal, assert_raises)
-import mkl_random as rnd
+import gc
+
 import numpy as np
+from numpy.testing import assert_, assert_array_equal, assert_raises
 
-import pytest
-import gc
+import mkl_random as rnd
 
 
 def test_VonMises_range():
     # Make sure generated random variables are in [-pi, pi].
     # Regression test for ticket #986.
-    for mu in np.linspace(-7., 7., 5):
+    for mu in np.linspace(-7.0, 7.0, 5):
         r = rnd.vonmises(mu, 1, 50)
         assert_(np.all(r > -np.pi) and np.all(r <= np.pi))
 
@@ -50,7 +48,7 @@ def test_hypergeometric_range():
     # Test for ticket #5623
     args = [
         (2**20 - 2, 2**20 - 2, 2**20 - 2),  # Check for 32-bit systems
-        (2 ** 30 - 1, 2 ** 30 - 2, 2 ** 30 - 1)
+        (2**30 - 1, 2**30 - 2, 2**30 - 1),
     ]
     for arg in args:
         assert_(rnd.hypergeometric(*arg) > 0)
@@ -59,7 +57,7 @@ def test_hypergeometric_range():
 def test_logseries_convergence():
     # Test for ticket #923
     N = 1000
-    rnd.seed(0, brng='MT19937')
+    rnd.seed(0, brng="MT19937")
     rvsn = rnd.logseries(0.8, size=N)
     # these two frequency counts should be close to theoretical
     # numbers with this large sample
@@ -74,9 +72,9 @@ def test_logseries_convergence():
 
 
 def test_permutation_longs():
-    rnd.seed(1234, brng='MT19937')
+    rnd.seed(1234, brng="MT19937")
     a = rnd.permutation(12)
-    rnd.seed(1234, brng='MT19937')
+    rnd.seed(1234, brng="MT19937")
     dt_long = np.dtype("long")
     twelve_long = dt_long.type(12)
     b = rnd.permutation(twelve_long)
@@ -85,21 +83,25 @@ def test_permutation_longs():
 
 def test_randint_range():
     # Test for ticket #1690
-    lmax = np.iinfo('l').max
-    lmin = np.iinfo('l').min
+    lmax = np.iinfo("l").max
+    lmin = np.iinfo("l").min
     try:
         rnd.randint(lmin, lmax)
-    except:
-        raise AssertionError
+    except Exception as e:
+        raise AssertionError(
+            "error raised with the following message:\n\n%s" % str(e)
+        )
 
 
 def test_shuffle_mixed_dimension():
     # Test for trac ticket #2074
-    for t in [[1, 2, 3, None],
-                [(1, 1), (2, 2), (3, 3), None],
-                [1, (2, 2), (3, 3), None],
-                [(1, 1), 2, 3, None]]:
-        rnd.seed(12345, brng='MT2203')
+    for t in [
+        [1, 2, 3, None],
+        [(1, 1), (2, 2), (3, 3), None],
+        [1, (2, 2), (3, 3), None],
+        [(1, 1), 2, 3, None],
+    ]:
+        rnd.seed(12345, brng="MT2203")
         shuffled = np.array(list(t), dtype=object)
         rnd.shuffle(shuffled)
         expected = np.array([t[0], t[2], t[1], t[3]], dtype=object)
@@ -112,9 +114,9 @@ def test_call_within_randomstate():
     res = np.array([5, 7, 5, 4, 5, 5, 6, 9, 6, 1])
     for i in range(3):
         rnd.seed(i)
-        m.seed(4321, brng='SFMT19937')
+        m.seed(4321, brng="SFMT19937")
         # If m.state is not honored, the result will change
-        assert_array_equal(m.choice(10, size=10, p=np.ones(10)/10.), res)
+        assert_array_equal(m.choice(10, size=10, p=np.ones(10) / 10.0), res)
 
 
 def test_multivariate_normal_size_types():
@@ -129,32 +131,32 @@ def test_multivariate_normal_size_types():
 def test_beta_small_parameters():
     # Test that beta with small a and b parameters does not produce
     # NaNs due to roundoff errors causing 0 / 0, gh-5851
-    rnd.seed(1234567890, brng='MT19937')
+    rnd.seed(1234567890, brng="MT19937")
     x = rnd.beta(0.0001, 0.0001, size=100)
-    assert_(not np.any(np.isnan(x)), 'Nans in rnd.beta')
+    assert_(not np.any(np.isnan(x)), "Nans in rnd.beta")
 
 
 def test_choice_sum_of_probs_tolerance():
     # The sum of probs should be 1.0 with some tolerance.
     # For low precision dtypes the tolerance was too tight.
     # See numpy github issue 6123.
-    rnd.seed(1234, brng='MT19937')
+    rnd.seed(1234, brng="MT19937")
     a = [1, 2, 3]
     counts = [4, 4, 2]
     for dt in np.float16, np.float32, np.float64:
         probs = np.array(counts, dtype=dt) / sum(counts)
         c = rnd.choice(a, p=probs)
         assert_(c in a)
-        assert_raises(ValueError, rnd.choice, a, p=probs*0.9)
+        assert_raises(ValueError, rnd.choice, a, p=probs * 0.9)
 
 
 def test_shuffle_of_array_of_different_length_strings():
     # Test that permuting an array of different length strings
     # will not cause a segfault on garbage collection
     # Tests gh-7710
-    rnd.seed(1234, brng='MT19937')
+    rnd.seed(1234, brng="MT19937")
 
-    a = np.array(['a', 'a' * 1000])
+    a = np.array(["a", "a" * 1000])
 
     for _ in range(100):
         rnd.shuffle(a)
@@ -167,7 +169,7 @@ def test_shuffle_of_array_of_objects():
     # Test that permuting an array of objects will not cause
     # a segfault on garbage collection.
     # See gh-7719
-    rnd.seed(1234, brng='MT19937')
+    rnd.seed(1234, brng="MT19937")
     a = np.array([np.arange(4), np.arange(4)])
 
     for _ in range(1000):
@@ -178,5 +180,5 @@ def test_shuffle_of_array_of_objects():
 
 
 def test_non_central_chi_squared_df_one():
-    a = rnd.noncentral_chisquare(df = 1.0, nonc=2.3, size=10**4)
-    assert(a.min() > 0.0)
+    a = rnd.noncentral_chisquare(df=1.0, nonc=2.3, size=10**4)
+    assert a.min() > 0.0
diff --git a/pyproject.toml b/pyproject.toml
index 0152b7e..3352468 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,14 +3,14 @@
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
-#     * Redistributions of source code must retain the above copyright notice,
-#       this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of Intel Corporation nor the names of its contributors
-#       may be used to endorse or promote products derived from this software
-#       without specific prior written permission.
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Intel Corporation nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -29,33 +29,43 @@ requires = ["setuptools>=77", "Cython", "numpy"]
 
 [project]
 authors = [
-    {name = "Intel Corporation", email = "scripting@intel.com"}
+  {name = "Intel Corporation", email = "scripting@intel.com"}
 ]
 classifiers = [
-    "Development Status :: 5 - Production/Stable",
-    "Intended Audience :: Science/Research",
-    "Intended Audience :: Developers",
-    "Programming Language :: C",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
-    "Programming Language :: Python :: 3.14",
-    "Programming Language :: Python :: Implementation :: CPython",
-    "Topic :: Software Development",
-    "Topic :: Scientific/Engineering",
-    "Operating System :: Microsoft :: Windows",
-    "Operating System :: POSIX",
-    "Operating System :: Unix"
+  "Development Status :: 5 - Production/Stable",
+  "Intended Audience :: Science/Research",
+  "Intended Audience :: Developers",
+  "Programming Language :: C",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Topic :: Software Development",
+  "Topic :: Scientific/Engineering",
+  "Operating System :: Microsoft :: Windows",
+  "Operating System :: POSIX",
+  "Operating System :: Unix"
 ]
 dependencies = ["numpy >=1.26.4", "mkl"]
 description = "NumPy-based Python interface to Intel (R) MKL Random Number Generation functionality"
 dynamic = ["version"]
-keywords = ["MKL", "VSL", "true randomness", "pseudorandomness",
-            "Philox", "MT-19937", "SFMT-19937", "MT-2203", "ARS-5",
-            "R-250", "MCG-31"]
+keywords = [
+  "MKL",
+  "VSL",
+  "true randomness",
+  "pseudorandomness",
+  "Philox",
+  "MT-19937",
+  "SFMT-19937",
+  "MT-2203",
+  "ARS-5",
+  "R-250",
+  "MCG-31"
+]
 license = "BSD-3-Clause"
 name = "mkl_random"
 readme = {file = "README.md", content-type = "text/markdown"}
@@ -68,14 +78,29 @@ test = ["pytest"]
 Download = "http://github.com/IntelPython/mkl_random"
 Homepage = "http://github.com/IntelPython/mkl_random"
 
+[tool.black]
+line-length = 80
+
+[tool.cython-lint]
+ignore = ['E722']  # do not use bare 'except'
+max-line-length = 80
+
+[tool.isort]
+ensure_newline_before_comments = true
+force_grid_wrap = 0
+include_trailing_comma = true
+line_length = 80
+multi_line_output = 3
+use_parentheses = true
+
 [tool.setuptools]
 include-package-data = true
 
-[tool.setuptools.packages.find]
-include = ["mkl_random", "mkl_random.interfaces"]
+[tool.setuptools.dynamic]
+version = {attr = "mkl_random._version.__version__"}
 
 [tool.setuptools.package-data]
 "mkl_random" = ["tests/*.py"]
 
-[tool.setuptools.dynamic]
-version = {attr = "mkl_random._version.__version__"}
+[tool.setuptools.packages.find]
+include = ["mkl_random", "mkl_random.interfaces"]
diff --git a/setup.py b/setup.py
index c47ebfb..bd64aaa 100644
--- a/setup.py
+++ b/setup.py
@@ -26,61 +26,72 @@
 import os
 import sys
 from os.path import join
+
 import Cython.Build
-from setuptools import setup, Extension
 import numpy as np
+from setuptools import Extension, setup
 
 
 def extensions():
-    mkl_root = os.environ.get('MKLROOT', None)
+    mkl_root = os.environ.get("MKLROOT", None)
     if mkl_root:
         mkl_info = {
-            'include_dirs': [join(mkl_root, 'include')],
-            'library_dirs': [join(mkl_root, 'lib'), join(mkl_root, 'lib', 'intel64')],
-            'libraries': ['mkl_rt']
+            "include_dirs": [join(mkl_root, "include")],
+            "library_dirs": [
+                join(mkl_root, "lib"),
+                join(mkl_root, "lib", "intel64"),
+            ],
+            "libraries": ["mkl_rt"],
         }
     else:
         raise ValueError("MKLROOT environment variable not set.")
 
-    mkl_include_dirs = mkl_info.get('include_dirs', [])
-    mkl_library_dirs = mkl_info.get('library_dirs', [])
-    mkl_libraries = mkl_info.get('libraries', ['mkl_rt'])
+    mkl_include_dirs = mkl_info.get("include_dirs", [])
+    mkl_library_dirs = mkl_info.get("library_dirs", [])
+    mkl_libraries = mkl_info.get("libraries", ["mkl_rt"])
 
     libs = mkl_libraries
     lib_dirs = mkl_library_dirs
 
-    if sys.platform == 'win32':
-        libs.append('Advapi32')
+    if sys.platform == "win32":
+        libs.append("Advapi32")
 
-    Q = '/Q' if sys.platform.startswith('win') or sys.platform == 'cygwin' else '-'
+    Q = (
+        "/Q"
+        if sys.platform.startswith("win") or sys.platform == "cygwin"
+        else "-"
+    )
     eca = [Q + "std=c++11"]
     if sys.platform == "linux":
         eca.extend(["-Wno-unused-but-set-variable", "-Wno-unused-function"])
 
-    defs = [('_FILE_OFFSET_BITS', '64'),
-            ('_LARGEFILE_SOURCE', '1'),
-            ('_LARGEFILE64_SOURCE', '1'),
-            ("PY_ARRAY_UNIQUE_SYMBOL", "mkl_random_ext")]
+    defs = [
+        ("_FILE_OFFSET_BITS", "64"),
+        ("_LARGEFILE_SOURCE", "1"),
+        ("_LARGEFILE64_SOURCE", "1"),
+        ("PY_ARRAY_UNIQUE_SYMBOL", "mkl_random_ext"),
+    ]
 
     exts = [
         Extension(
             "mkl_random.mklrand",
-            sources = [
+            sources=[
                 join("mkl_random", "mklrand.pyx"),
                 join("mkl_random", "src", "mkl_distributions.cpp"),
                 join("mkl_random", "src", "randomkit.cpp"),
             ],
-            depends = [
+            depends=[
                 join("mkl_random", "src", "mkl_distributions.hpp"),
                 join("mkl_random", "src", "randomkit.h"),
-                join("mkl_random", "src", "numpy_multiiter_workaround.h")
+                join("mkl_random", "src", "numpy_multiiter_workaround.h"),
             ],
-            include_dirs = [join("mkl_random", "src"), np.get_include()] + mkl_include_dirs,
-            libraries = libs,
-            library_dirs = lib_dirs,
-            extra_compile_args = eca,
+            include_dirs=[join("mkl_random", "src"), np.get_include()]
+            + mkl_include_dirs,
+            libraries=libs,
+            library_dirs=lib_dirs,
+            extra_compile_args=eca,
             define_macros=defs + [("NDEBUG", None)],
-            language="c++"
+            language="c++",
         )
     ]
 
@@ -88,7 +99,7 @@ def extensions():
 
 
 setup(
-    cmdclass={'build_ext': Cython.Build.build_ext},
+    cmdclass={"build_ext": Cython.Build.build_ext},
     ext_modules=extensions(),
     zip_safe=False,
 )