mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-23 00:03:02 +01:00
Update re2 to version 2021-04-01.
This commit is contained in:
commit
3085e2c689
2
extern/re2/kokoro/bazel.sh → extern/re2/.github/bazel.sh
vendored
Normal file → Executable file
2
extern/re2/kokoro/bazel.sh → extern/re2/.github/bazel.sh
vendored
Normal file → Executable file
@ -1,8 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -eux
|
set -eux
|
||||||
|
|
||||||
cd git/re2
|
|
||||||
|
|
||||||
bazel clean
|
bazel clean
|
||||||
bazel build --compilation_mode=dbg -- //:all
|
bazel build --compilation_mode=dbg -- //:all
|
||||||
bazel test --compilation_mode=dbg --test_output=errors -- //:all \
|
bazel test --compilation_mode=dbg --test_output=errors -- //:all \
|
12
extern/re2/.github/cmake.sh
vendored
Executable file
12
extern/re2/.github/cmake.sh
vendored
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
cmake -D CMAKE_BUILD_TYPE=Debug
|
||||||
|
cmake --build . --config Debug --clean-first
|
||||||
|
ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
|
||||||
|
|
||||||
|
cmake -D CMAKE_BUILD_TYPE=Release
|
||||||
|
cmake --build . --config Release --clean-first
|
||||||
|
ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
|
||||||
|
|
||||||
|
exit 0
|
17
extern/re2/.github/workflows/ci-bazel.yml
vendored
Normal file
17
extern/re2/.github/workflows/ci-bazel.yml
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
name: CI (Bazel)
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [master]
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [macos-latest, ubuntu-latest, windows-latest]
|
||||||
|
env:
|
||||||
|
BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- run: .github/bazel.sh
|
||||||
|
shell: bash
|
15
extern/re2/.github/workflows/ci-cmake.yml
vendored
Normal file
15
extern/re2/.github/workflows/ci-cmake.yml
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
name: CI (CMake)
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [master]
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [macos-latest, ubuntu-latest, windows-latest]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- run: .github/cmake.sh
|
||||||
|
shell: bash
|
51
extern/re2/.github/workflows/ci.yml
vendored
Normal file
51
extern/re2/.github/workflows/ci.yml
vendored
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
name: CI
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [master]
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [macos-latest, ubuntu-latest]
|
||||||
|
env:
|
||||||
|
CC: clang
|
||||||
|
CXX: clang++
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- run: make && make test
|
||||||
|
shell: bash
|
||||||
|
build-clang:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
tag: [9, 10, 11]
|
||||||
|
env:
|
||||||
|
CC: clang-${{ matrix.tag }}
|
||||||
|
CXX: clang++-${{ matrix.tag }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Install Clang ${{ matrix.tag }}
|
||||||
|
run: |
|
||||||
|
wget https://apt.llvm.org/llvm.sh
|
||||||
|
chmod +x ./llvm.sh
|
||||||
|
sudo ./llvm.sh ${{ matrix.tag }}
|
||||||
|
shell: bash
|
||||||
|
- run: make && make test
|
||||||
|
shell: bash
|
||||||
|
build-gcc:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container: gcc:${{ matrix.tag }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
tag: [4, 5, 6, 7, 8, 9, 10]
|
||||||
|
env:
|
||||||
|
CC: gcc
|
||||||
|
CXX: g++
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- run: make && make test
|
||||||
|
shell: bash
|
179
extern/re2/.travis.yml
vendored
179
extern/re2/.travis.yml
vendored
@ -1,179 +0,0 @@
|
|||||||
language: cpp
|
|
||||||
sudo: false
|
|
||||||
dist: trusty
|
|
||||||
script:
|
|
||||||
- make
|
|
||||||
- make test
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
packages:
|
|
||||||
- g++-4.8
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=gcc-4.8 CXX=g++-4.8"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
packages:
|
|
||||||
- g++-4.9
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=gcc-4.9 CXX=g++-4.9"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
packages:
|
|
||||||
- g++-5
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=gcc-5 CXX=g++-5"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
packages:
|
|
||||||
- g++-6
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=gcc-6 CXX=g++-6"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
packages:
|
|
||||||
- g++-7
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=gcc-7 CXX=g++-7"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
packages:
|
|
||||||
- g++-8
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=gcc-8 CXX=g++-8"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
packages:
|
|
||||||
- g++-9
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=gcc-9 CXX=g++-9"
|
|
||||||
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- llvm-toolchain-precise-3.5
|
|
||||||
packages:
|
|
||||||
- clang-3.5
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-3.5 CXX=clang++-3.5"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- llvm-toolchain-precise-3.6
|
|
||||||
packages:
|
|
||||||
- clang-3.6
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-3.6 CXX=clang++-3.6"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- llvm-toolchain-precise-3.7
|
|
||||||
packages:
|
|
||||||
- clang-3.7
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-3.7 CXX=clang++-3.7"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- llvm-toolchain-precise-3.8
|
|
||||||
packages:
|
|
||||||
- clang-3.8
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-3.8 CXX=clang++-3.8"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- llvm-toolchain-precise-3.9
|
|
||||||
packages:
|
|
||||||
- clang-3.9
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-3.9 CXX=clang++-3.9"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- llvm-toolchain-trusty-4.0
|
|
||||||
packages:
|
|
||||||
- clang-4.0
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-4.0 CXX=clang++-4.0"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- llvm-toolchain-trusty-5.0
|
|
||||||
packages:
|
|
||||||
- clang-5.0
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-5.0 CXX=clang++-5.0"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-6.0 main'
|
|
||||||
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
|
|
||||||
packages:
|
|
||||||
- clang-6.0
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-6.0 CXX=clang++-6.0"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main'
|
|
||||||
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
|
|
||||||
packages:
|
|
||||||
- clang-7
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-7 CXX=clang++-7"
|
|
||||||
- os: linux
|
|
||||||
addons:
|
|
||||||
apt:
|
|
||||||
sources:
|
|
||||||
- ubuntu-toolchain-r-test
|
|
||||||
- sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-8 main'
|
|
||||||
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
|
|
||||||
packages:
|
|
||||||
- clang-8
|
|
||||||
env:
|
|
||||||
- MATRIX_EVAL="CC=clang-8 CXX=clang++-8"
|
|
||||||
|
|
||||||
before_install:
|
|
||||||
- eval "${MATRIX_EVAL}"
|
|
151
extern/re2/BUILD
vendored
151
extern/re2/BUILD
vendored
@ -9,19 +9,21 @@ licenses(["notice"])
|
|||||||
exports_files(["LICENSE"])
|
exports_files(["LICENSE"])
|
||||||
|
|
||||||
config_setting(
|
config_setting(
|
||||||
name = "darwin",
|
name = "macos",
|
||||||
values = {"cpu": "darwin"},
|
values = {"cpu": "darwin"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "wasm",
|
||||||
|
values = {"cpu": "wasm32"},
|
||||||
|
)
|
||||||
|
|
||||||
config_setting(
|
config_setting(
|
||||||
name = "windows",
|
name = "windows",
|
||||||
values = {"cpu": "x64_windows"},
|
values = {"cpu": "x64_windows"},
|
||||||
)
|
)
|
||||||
|
|
||||||
config_setting(
|
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
|
||||||
name = "windows_msvc",
|
|
||||||
values = {"cpu": "x64_windows_msvc"},
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "re2",
|
name = "re2",
|
||||||
@ -36,6 +38,7 @@ cc_library(
|
|||||||
"re2/onepass.cc",
|
"re2/onepass.cc",
|
||||||
"re2/parse.cc",
|
"re2/parse.cc",
|
||||||
"re2/perl_groups.cc",
|
"re2/perl_groups.cc",
|
||||||
|
"re2/pod_array.h",
|
||||||
"re2/prefilter.cc",
|
"re2/prefilter.cc",
|
||||||
"re2/prefilter.h",
|
"re2/prefilter.h",
|
||||||
"re2/prefilter_tree.cc",
|
"re2/prefilter_tree.cc",
|
||||||
@ -47,6 +50,8 @@ cc_library(
|
|||||||
"re2/regexp.h",
|
"re2/regexp.h",
|
||||||
"re2/set.cc",
|
"re2/set.cc",
|
||||||
"re2/simplify.cc",
|
"re2/simplify.cc",
|
||||||
|
"re2/sparse_array.h",
|
||||||
|
"re2/sparse_set.h",
|
||||||
"re2/stringpiece.cc",
|
"re2/stringpiece.cc",
|
||||||
"re2/tostring.cc",
|
"re2/tostring.cc",
|
||||||
"re2/unicode_casefold.cc",
|
"re2/unicode_casefold.cc",
|
||||||
@ -54,14 +59,10 @@ cc_library(
|
|||||||
"re2/unicode_groups.cc",
|
"re2/unicode_groups.cc",
|
||||||
"re2/unicode_groups.h",
|
"re2/unicode_groups.h",
|
||||||
"re2/walker-inl.h",
|
"re2/walker-inl.h",
|
||||||
"util/flags.h",
|
|
||||||
"util/logging.h",
|
"util/logging.h",
|
||||||
"util/mix.h",
|
"util/mix.h",
|
||||||
"util/mutex.h",
|
"util/mutex.h",
|
||||||
"util/pod_array.h",
|
|
||||||
"util/rune.cc",
|
"util/rune.cc",
|
||||||
"util/sparse_array.h",
|
|
||||||
"util/sparse_set.h",
|
|
||||||
"util/strutil.cc",
|
"util/strutil.cc",
|
||||||
"util/strutil.h",
|
"util/strutil.h",
|
||||||
"util/utf.h",
|
"util/utf.h",
|
||||||
@ -74,17 +75,17 @@ cc_library(
|
|||||||
"re2/stringpiece.h",
|
"re2/stringpiece.h",
|
||||||
],
|
],
|
||||||
copts = select({
|
copts = select({
|
||||||
|
":wasm": [],
|
||||||
":windows": [],
|
":windows": [],
|
||||||
":windows_msvc": [],
|
|
||||||
"//conditions:default": ["-pthread"],
|
"//conditions:default": ["-pthread"],
|
||||||
}),
|
}),
|
||||||
linkopts = select({
|
linkopts = select({
|
||||||
# Darwin doesn't need `-pthread' when linking and it appears that
|
# macOS doesn't need `-pthread' when linking and it appears that
|
||||||
# older versions of Clang will warn about the unused command line
|
# older versions of Clang will warn about the unused command line
|
||||||
# argument, so just don't pass it.
|
# argument, so just don't pass it.
|
||||||
":darwin": [],
|
":macos": [],
|
||||||
|
":wasm": [],
|
||||||
":windows": [],
|
":windows": [],
|
||||||
":windows_msvc": [],
|
|
||||||
"//conditions:default": ["-pthread"],
|
"//conditions:default": ["-pthread"],
|
||||||
}),
|
}),
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
@ -109,6 +110,8 @@ cc_library(
|
|||||||
"re2/testing/string_generator.h",
|
"re2/testing/string_generator.h",
|
||||||
"re2/testing/tester.h",
|
"re2/testing/tester.h",
|
||||||
"util/benchmark.h",
|
"util/benchmark.h",
|
||||||
|
"util/flags.h",
|
||||||
|
"util/malloc_counter.h",
|
||||||
"util/pcre.h",
|
"util/pcre.h",
|
||||||
"util/test.h",
|
"util/test.h",
|
||||||
],
|
],
|
||||||
@ -122,106 +125,144 @@ cc_library(
|
|||||||
deps = [":testing"],
|
deps = [":testing"],
|
||||||
)
|
)
|
||||||
|
|
||||||
load(":re2_test.bzl", "re2_test")
|
cc_test(
|
||||||
|
name = "charclass_test",
|
||||||
re2_test(
|
|
||||||
"charclass_test",
|
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/charclass_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"compile_test",
|
name = "compile_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/compile_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"filtered_re2_test",
|
name = "filtered_re2_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/filtered_re2_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"mimics_pcre_test",
|
name = "mimics_pcre_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/mimics_pcre_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"parse_test",
|
name = "parse_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/parse_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"possible_match_test",
|
name = "possible_match_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/possible_match_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"re2_arg_test",
|
name = "re2_arg_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/re2_arg_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"re2_test",
|
name = "re2_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/re2_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"regexp_test",
|
name = "regexp_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/regexp_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"required_prefix_test",
|
name = "required_prefix_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/required_prefix_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"search_test",
|
name = "search_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/search_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"set_test",
|
name = "set_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/set_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"simplify_test",
|
name = "simplify_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/simplify_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"string_generator_test",
|
name = "string_generator_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
srcs = ["re2/testing/string_generator_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"dfa_test",
|
name = "dfa_test",
|
||||||
size = "large",
|
size = "large",
|
||||||
|
srcs = ["re2/testing/dfa_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"exhaustive1_test",
|
name = "exhaustive1_test",
|
||||||
size = "large",
|
size = "large",
|
||||||
|
srcs = ["re2/testing/exhaustive1_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"exhaustive2_test",
|
name = "exhaustive2_test",
|
||||||
size = "large",
|
size = "large",
|
||||||
|
srcs = ["re2/testing/exhaustive2_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"exhaustive3_test",
|
name = "exhaustive3_test",
|
||||||
size = "large",
|
size = "large",
|
||||||
|
srcs = ["re2/testing/exhaustive3_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"exhaustive_test",
|
name = "exhaustive_test",
|
||||||
size = "large",
|
size = "large",
|
||||||
|
srcs = ["re2/testing/exhaustive_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
re2_test(
|
cc_test(
|
||||||
"random_test",
|
name = "random_test",
|
||||||
size = "large",
|
size = "large",
|
||||||
|
srcs = ["re2/testing/random_test.cc"],
|
||||||
|
deps = [":test"],
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
|
41
extern/re2/CMakeLists.txt
vendored
41
extern/re2/CMakeLists.txt
vendored
@ -2,8 +2,8 @@
|
|||||||
# Use of this source code is governed by a BSD-style
|
# Use of this source code is governed by a BSD-style
|
||||||
# license that can be found in the LICENSE file.
|
# license that can be found in the LICENSE file.
|
||||||
|
|
||||||
# Old enough to support Ubuntu Trusty.
|
# Old enough to support Ubuntu Xenial.
|
||||||
cmake_minimum_required(VERSION 2.8.12)
|
cmake_minimum_required(VERSION 3.5.1)
|
||||||
|
|
||||||
if(POLICY CMP0048)
|
if(POLICY CMP0048)
|
||||||
cmake_policy(SET CMP0048 NEW)
|
cmake_policy(SET CMP0048 NEW)
|
||||||
@ -11,6 +11,12 @@ endif()
|
|||||||
|
|
||||||
project(RE2 CXX)
|
project(RE2 CXX)
|
||||||
include(CTest)
|
include(CTest)
|
||||||
|
include(GNUInstallDirs)
|
||||||
|
|
||||||
|
if(NOT CMAKE_CXX_STANDARD)
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
||||||
option(USEPCRE "use PCRE in tests and benchmarks" OFF)
|
option(USEPCRE "use PCRE in tests and benchmarks" OFF)
|
||||||
@ -19,6 +25,10 @@ option(USEPCRE "use PCRE in tests and benchmarks" OFF)
|
|||||||
# so we provide an option similar to BUILD_TESTING, but just for RE2.
|
# so we provide an option similar to BUILD_TESTING, but just for RE2.
|
||||||
option(RE2_BUILD_TESTING "enable testing for RE2" ON)
|
option(RE2_BUILD_TESTING "enable testing for RE2" ON)
|
||||||
|
|
||||||
|
# ABI version
|
||||||
|
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
|
||||||
|
set(SONAME 9)
|
||||||
|
|
||||||
set(EXTRA_TARGET_LINK_LIBRARIES)
|
set(EXTRA_TARGET_LINK_LIBRARIES)
|
||||||
|
|
||||||
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
||||||
@ -27,7 +37,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
|||||||
endif()
|
endif()
|
||||||
if(BUILD_SHARED_LIBS)
|
if(BUILD_SHARED_LIBS)
|
||||||
# See http://www.kitware.com/blog/home/post/939 for details.
|
# See http://www.kitware.com/blog/home/post/939 for details.
|
||||||
cmake_minimum_required(VERSION 3.4)
|
|
||||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||||
endif()
|
endif()
|
||||||
# CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX,
|
# CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX,
|
||||||
@ -38,11 +47,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
|||||||
add_compile_options(/utf-8)
|
add_compile_options(/utf-8)
|
||||||
# allow multi-processor compilation
|
# allow multi-processor compilation
|
||||||
add_compile_options(/MP)
|
add_compile_options(/MP)
|
||||||
elseif(CYGWIN OR MINGW)
|
|
||||||
# See https://stackoverflow.com/questions/38139631 for details.
|
|
||||||
add_compile_options(-std=gnu++11)
|
|
||||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
|
||||||
add_compile_options(-std=c++11)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
@ -58,8 +62,6 @@ if(USEPCRE)
|
|||||||
list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre)
|
list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
set(RE2_SOURCES
|
set(RE2_SOURCES
|
||||||
re2/bitstate.cc
|
re2/bitstate.cc
|
||||||
re2/compile.cc
|
re2/compile.cc
|
||||||
@ -86,6 +88,8 @@ set(RE2_SOURCES
|
|||||||
)
|
)
|
||||||
|
|
||||||
add_library(re2 ${RE2_SOURCES})
|
add_library(re2 ${RE2_SOURCES})
|
||||||
|
target_include_directories(re2 PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
|
||||||
|
set_target_properties(re2 PROPERTIES SOVERSION ${SONAME} VERSION ${SONAME}.0.0)
|
||||||
add_library(re2::re2 ALIAS re2)
|
add_library(re2::re2 ALIAS re2)
|
||||||
|
|
||||||
if(RE2_BUILD_TESTING)
|
if(RE2_BUILD_TESTING)
|
||||||
@ -101,6 +105,7 @@ if(RE2_BUILD_TESTING)
|
|||||||
)
|
)
|
||||||
|
|
||||||
add_library(testing STATIC ${TESTING_SOURCES})
|
add_library(testing STATIC ${TESTING_SOURCES})
|
||||||
|
target_link_libraries(testing PUBLIC re2)
|
||||||
|
|
||||||
set(TEST_TARGETS
|
set(TEST_TARGETS
|
||||||
charclass_test
|
charclass_test
|
||||||
@ -132,13 +137,13 @@ if(RE2_BUILD_TESTING)
|
|||||||
|
|
||||||
foreach(target ${TEST_TARGETS})
|
foreach(target ${TEST_TARGETS})
|
||||||
add_executable(${target} re2/testing/${target}.cc util/test.cc)
|
add_executable(${target} re2/testing/${target}.cc util/test.cc)
|
||||||
target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES})
|
target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||||
add_test(NAME ${target} COMMAND ${target})
|
add_test(NAME ${target} COMMAND ${target})
|
||||||
endforeach(target)
|
endforeach(target)
|
||||||
|
|
||||||
foreach(target ${BENCHMARK_TARGETS})
|
foreach(target ${BENCHMARK_TARGETS})
|
||||||
add_executable(${target} re2/testing/${target}.cc util/benchmark.cc)
|
add_executable(${target} re2/testing/${target}.cc util/benchmark.cc)
|
||||||
target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES})
|
target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||||
endforeach(target)
|
endforeach(target)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -149,6 +154,12 @@ set(RE2_HEADERS
|
|||||||
re2/stringpiece.h
|
re2/stringpiece.h
|
||||||
)
|
)
|
||||||
|
|
||||||
install(FILES ${RE2_HEADERS} DESTINATION include/re2)
|
install(FILES ${RE2_HEADERS}
|
||||||
install(TARGETS re2 EXPORT re2Config ARCHIVE DESTINATION lib LIBRARY DESTINATION lib RUNTIME DESTINATION bin INCLUDES DESTINATION include)
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/re2)
|
||||||
install(EXPORT re2Config DESTINATION lib/cmake/re2 NAMESPACE re2::)
|
install(TARGETS re2 EXPORT re2Config
|
||||||
|
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
|
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||||
|
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||||
|
install(EXPORT re2Config
|
||||||
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2 NAMESPACE re2::)
|
||||||
|
88
extern/re2/Makefile
vendored
88
extern/re2/Makefile
vendored
@ -44,7 +44,7 @@ endif
|
|||||||
|
|
||||||
# ABI version
|
# ABI version
|
||||||
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
|
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
|
||||||
SONAME=0
|
SONAME=9
|
||||||
|
|
||||||
# To rebuild the Tables generated by Perl and Python scripts (requires Internet
|
# To rebuild the Tables generated by Perl and Python scripts (requires Internet
|
||||||
# access for Unicode data), uncomment the following line:
|
# access for Unicode data), uncomment the following line:
|
||||||
@ -55,7 +55,7 @@ ifeq ($(shell uname),Darwin)
|
|||||||
SOEXT=dylib
|
SOEXT=dylib
|
||||||
SOEXTVER=$(SONAME).$(SOEXT)
|
SOEXTVER=$(SONAME).$(SOEXT)
|
||||||
SOEXTVER00=$(SONAME).0.0.$(SOEXT)
|
SOEXTVER00=$(SONAME).0.0.$(SOEXT)
|
||||||
MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS)
|
MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-compatibility_version,$(SONAME),-current_version,$(SONAME).0.0,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS)
|
||||||
else ifeq ($(shell uname),SunOS)
|
else ifeq ($(shell uname),SunOS)
|
||||||
SOEXT=so
|
SOEXT=so
|
||||||
SOEXTVER=$(SOEXT).$(SONAME)
|
SOEXTVER=$(SOEXT).$(SONAME)
|
||||||
@ -68,6 +68,7 @@ SOEXTVER00=$(SOEXT).$(SONAME).0.0
|
|||||||
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS)
|
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: obj/libre2.a obj/so/libre2.$(SOEXT)
|
all: obj/libre2.a obj/so/libre2.$(SOEXT)
|
||||||
|
|
||||||
INSTALL_HFILES=\
|
INSTALL_HFILES=\
|
||||||
@ -80,24 +81,25 @@ HFILES=\
|
|||||||
util/benchmark.h\
|
util/benchmark.h\
|
||||||
util/flags.h\
|
util/flags.h\
|
||||||
util/logging.h\
|
util/logging.h\
|
||||||
|
util/malloc_counter.h\
|
||||||
util/mix.h\
|
util/mix.h\
|
||||||
util/mutex.h\
|
util/mutex.h\
|
||||||
util/pcre.h\
|
util/pcre.h\
|
||||||
util/pod_array.h\
|
|
||||||
util/sparse_array.h\
|
|
||||||
util/sparse_set.h\
|
|
||||||
util/strutil.h\
|
util/strutil.h\
|
||||||
util/test.h\
|
util/test.h\
|
||||||
util/utf.h\
|
util/utf.h\
|
||||||
util/util.h\
|
util/util.h\
|
||||||
re2/bitmap256.h\
|
re2/bitmap256.h\
|
||||||
re2/filtered_re2.h\
|
re2/filtered_re2.h\
|
||||||
|
re2/pod_array.h\
|
||||||
re2/prefilter.h\
|
re2/prefilter.h\
|
||||||
re2/prefilter_tree.h\
|
re2/prefilter_tree.h\
|
||||||
re2/prog.h\
|
re2/prog.h\
|
||||||
re2/re2.h\
|
re2/re2.h\
|
||||||
re2/regexp.h\
|
re2/regexp.h\
|
||||||
re2/set.h\
|
re2/set.h\
|
||||||
|
re2/sparse_array.h\
|
||||||
|
re2/sparse_set.h\
|
||||||
re2/stringpiece.h\
|
re2/stringpiece.h\
|
||||||
re2/testing/exhaustive_tester.h\
|
re2/testing/exhaustive_tester.h\
|
||||||
re2/testing/regexp_generator.h\
|
re2/testing/regexp_generator.h\
|
||||||
@ -175,117 +177,156 @@ DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
|
|||||||
DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
|
DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
|
||||||
DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
|
DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
|
||||||
|
|
||||||
|
.PRECIOUS: obj/%.o
|
||||||
obj/%.o: %.cc $(HFILES)
|
obj/%.o: %.cc $(HFILES)
|
||||||
@mkdir -p $$(dirname $@)
|
@mkdir -p $$(dirname $@)
|
||||||
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
||||||
|
|
||||||
|
.PRECIOUS: obj/dbg/%.o
|
||||||
obj/dbg/%.o: %.cc $(HFILES)
|
obj/dbg/%.o: %.cc $(HFILES)
|
||||||
@mkdir -p $$(dirname $@)
|
@mkdir -p $$(dirname $@)
|
||||||
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc
|
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc
|
||||||
|
|
||||||
|
.PRECIOUS: obj/so/%.o
|
||||||
obj/so/%.o: %.cc $(HFILES)
|
obj/so/%.o: %.cc $(HFILES)
|
||||||
@mkdir -p $$(dirname $@)
|
@mkdir -p $$(dirname $@)
|
||||||
$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
||||||
|
|
||||||
|
.PRECIOUS: obj/libre2.a
|
||||||
obj/libre2.a: $(OFILES)
|
obj/libre2.a: $(OFILES)
|
||||||
@mkdir -p obj
|
@mkdir -p obj
|
||||||
$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
|
$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
|
||||||
|
|
||||||
|
.PRECIOUS: obj/dbg/libre2.a
|
||||||
obj/dbg/libre2.a: $(DOFILES)
|
obj/dbg/libre2.a: $(DOFILES)
|
||||||
@mkdir -p obj/dbg
|
@mkdir -p obj/dbg
|
||||||
$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
|
$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
|
||||||
|
|
||||||
obj/so/libre2.$(SOEXT): $(SOFILES)
|
.PRECIOUS: obj/so/libre2.$(SOEXT)
|
||||||
|
obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin
|
||||||
@mkdir -p obj/so
|
@mkdir -p obj/so
|
||||||
$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)
|
$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)
|
||||||
ln -sf libre2.$(SOEXTVER) $@
|
ln -sf libre2.$(SOEXTVER) $@
|
||||||
|
|
||||||
|
.PRECIOUS: obj/dbg/test/%
|
||||||
obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
|
obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
|
||||||
@mkdir -p obj/dbg/test
|
@mkdir -p obj/dbg/test
|
||||||
$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
|
.PRECIOUS: obj/test/%
|
||||||
obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
|
obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
|
||||||
@mkdir -p obj/test
|
@mkdir -p obj/test
|
||||||
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
# Test the shared lib, falling back to the static lib for private symbols
|
# Test the shared lib, falling back to the static lib for private symbols
|
||||||
|
.PRECIOUS: obj/so/test/%
|
||||||
obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
|
obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
|
||||||
@mkdir -p obj/so/test
|
@mkdir -p obj/so/test
|
||||||
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
|
# Filter out dump.o because testing::TempDir() isn't available for it.
|
||||||
obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
|
obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
|
||||||
@mkdir -p obj/test
|
@mkdir -p obj/test
|
||||||
$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(filter-out obj/re2/testing/dump.o, $(TESTOFILES)) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
# re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing
|
# re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing
|
||||||
# is simply a way to check that the target builds and then to run it against a
|
# is simply a way to check that the target builds and then to run it against a
|
||||||
# fixed set of inputs. To perform real fuzzing, refer to the documentation for
|
# fixed set of inputs. To perform real fuzzing, refer to the documentation for
|
||||||
# libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/).
|
# libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/).
|
||||||
|
obj/test/re2_fuzzer: CXXFLAGS:=-I./re2/fuzzing/compiler-rt/include $(CXXFLAGS)
|
||||||
obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o
|
obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o
|
||||||
@mkdir -p obj/test
|
@mkdir -p obj/test
|
||||||
$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
ifdef REBUILD_TABLES
|
ifdef REBUILD_TABLES
|
||||||
|
.PRECIOUS: re2/perl_groups.cc
|
||||||
re2/perl_groups.cc: re2/make_perl_groups.pl
|
re2/perl_groups.cc: re2/make_perl_groups.pl
|
||||||
perl $< > $@
|
perl $< > $@
|
||||||
|
|
||||||
|
.PRECIOUS: re2/unicode_%.cc
|
||||||
re2/unicode_%.cc: re2/make_unicode_%.py
|
re2/unicode_%.cc: re2/make_unicode_%.py
|
||||||
python $< > $@
|
python $< > $@
|
||||||
|
|
||||||
.PRECIOUS: re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
.PHONY: distclean
|
||||||
distclean: clean
|
distclean: clean
|
||||||
rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
|
rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
rm -rf obj
|
rm -rf obj
|
||||||
rm -f re2/*.pyc
|
rm -f re2/*.pyc
|
||||||
|
|
||||||
|
.PHONY: testofiles
|
||||||
testofiles: $(TESTOFILES)
|
testofiles: $(TESTOFILES)
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
|
test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
|
||||||
|
|
||||||
|
.PHONY: debug-test
|
||||||
debug-test: $(DTESTS)
|
debug-test: $(DTESTS)
|
||||||
@./runtests $(DTESTS)
|
@./runtests $(DTESTS)
|
||||||
|
|
||||||
|
.PHONY: static-test
|
||||||
static-test: $(TESTS)
|
static-test: $(TESTS)
|
||||||
@./runtests $(TESTS)
|
@./runtests $(TESTS)
|
||||||
|
|
||||||
|
.PHONY: shared-test
|
||||||
shared-test: $(STESTS)
|
shared-test: $(STESTS)
|
||||||
@./runtests -shared-library-path obj/so $(STESTS)
|
@./runtests -shared-library-path obj/so $(STESTS)
|
||||||
|
|
||||||
|
.PHONY: debug-bigtest
|
||||||
debug-bigtest: $(DTESTS) $(DBIGTESTS)
|
debug-bigtest: $(DTESTS) $(DBIGTESTS)
|
||||||
@./runtests $(DTESTS) $(DBIGTESTS)
|
@./runtests $(DTESTS) $(DBIGTESTS)
|
||||||
|
|
||||||
|
.PHONY: static-bigtest
|
||||||
static-bigtest: $(TESTS) $(BIGTESTS)
|
static-bigtest: $(TESTS) $(BIGTESTS)
|
||||||
@./runtests $(TESTS) $(BIGTESTS)
|
@./runtests $(TESTS) $(BIGTESTS)
|
||||||
|
|
||||||
|
.PHONY: shared-bigtest
|
||||||
shared-bigtest: $(STESTS) $(SBIGTESTS)
|
shared-bigtest: $(STESTS) $(SBIGTESTS)
|
||||||
@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)
|
@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)
|
||||||
|
|
||||||
|
.PHONY: benchmark
|
||||||
benchmark: obj/test/regexp_benchmark
|
benchmark: obj/test/regexp_benchmark
|
||||||
|
|
||||||
|
.PHONY: fuzz
|
||||||
fuzz: obj/test/re2_fuzzer
|
fuzz: obj/test/re2_fuzzer
|
||||||
|
|
||||||
install: obj/libre2.a obj/so/libre2.$(SOEXT)
|
.PHONY: install
|
||||||
mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
|
install: static-install shared-install
|
||||||
$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
|
|
||||||
|
.PHONY: static
|
||||||
|
static: obj/libre2.a
|
||||||
|
|
||||||
|
.PHONY: static-install
|
||||||
|
static-install: obj/libre2.a common-install
|
||||||
$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
|
$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
|
||||||
|
|
||||||
|
.PHONY: shared
|
||||||
|
shared: obj/so/libre2.$(SOEXT)
|
||||||
|
|
||||||
|
.PHONY: shared-install
|
||||||
|
shared-install: obj/so/libre2.$(SOEXT) common-install
|
||||||
$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
|
$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
|
||||||
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
|
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
|
||||||
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
|
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
|
||||||
$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
|
||||||
$(SED_INPLACE) -e "s#@prefix@#${prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
|
||||||
$(SED_INPLACE) -e "s#@exec_prefix@#${exec_prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
|
||||||
$(SED_INPLACE) -e "s#@includedir@#${includedir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
|
||||||
$(SED_INPLACE) -e "s#@libdir@#${libdir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
|
||||||
|
|
||||||
|
.PHONY: common-install
|
||||||
|
common-install:
|
||||||
|
mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
|
||||||
|
$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
|
||||||
|
$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||||
|
$(SED_INPLACE) -e "s#@includedir@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||||
|
$(SED_INPLACE) -e "s#@libdir@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||||
|
|
||||||
|
.PHONY: testinstall
|
||||||
testinstall: static-testinstall shared-testinstall
|
testinstall: static-testinstall shared-testinstall
|
||||||
@echo
|
@echo
|
||||||
@echo Install tests passed.
|
@echo Install tests passed.
|
||||||
@echo
|
@echo
|
||||||
|
|
||||||
|
.PHONY: static-testinstall
|
||||||
static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
|
static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
|
||||||
static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
|
static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
|
||||||
static-testinstall:
|
static-testinstall:
|
||||||
@ -300,6 +341,7 @@ else
|
|||||||
obj/testinstall
|
obj/testinstall
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
.PHONY: shared-testinstall
|
||||||
shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
|
shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
|
||||||
shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
|
shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
|
||||||
shared-testinstall:
|
shared-testinstall:
|
||||||
@ -312,19 +354,14 @@ else
|
|||||||
LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/testinstall
|
LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/testinstall
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
.PHONY: benchlog
|
||||||
benchlog: obj/test/regexp_benchmark
|
benchlog: obj/test/regexp_benchmark
|
||||||
(echo '==BENCHMARK==' `hostname` `date`; \
|
(echo '==BENCHMARK==' `hostname` `date`; \
|
||||||
(uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
|
(uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
|
||||||
echo; \
|
echo; \
|
||||||
./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
|
./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
|
||||||
|
|
||||||
# Keep gmake from deleting intermediate files it creates.
|
.PHONY: log
|
||||||
# This makes repeated builds faster and preserves debug info on OS X.
|
|
||||||
|
|
||||||
.PRECIOUS: obj/%.o obj/dbg/%.o obj/so/%.o obj/libre2.a \
|
|
||||||
obj/dbg/libre2.a obj/so/libre2.a \
|
|
||||||
obj/test/% obj/so/test/% obj/dbg/test/%
|
|
||||||
|
|
||||||
log:
|
log:
|
||||||
$(MAKE) clean
|
$(MAKE) clean
|
||||||
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
|
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
|
||||||
@ -340,6 +377,3 @@ log:
|
|||||||
echo '#' RE2 basic search tests built by make $@ >re2-search.txt
|
echo '#' RE2 basic search tests built by make $@ >re2-search.txt
|
||||||
echo '#' $$(date) >>re2-search.txt
|
echo '#' $$(date) >>re2-search.txt
|
||||||
obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt
|
obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt
|
||||||
|
|
||||||
x: x.cc obj/libre2.a
|
|
||||||
g++ -I. -o x x.cc obj/libre2.a
|
|
||||||
|
5
extern/re2/README
vendored
5
extern/re2/README
vendored
@ -27,12 +27,15 @@ under the BSD-style license found in the LICENSE file.
|
|||||||
|
|
||||||
RE2's native language is C++.
|
RE2's native language is C++.
|
||||||
|
|
||||||
|
The Python wrapper is at https://github.com/google/re2/tree/abseil/python
|
||||||
|
and on PyPI (https://pypi.org/project/google-re2/).
|
||||||
|
|
||||||
A C wrapper is at https://github.com/marcomaggi/cre2/.
|
A C wrapper is at https://github.com/marcomaggi/cre2/.
|
||||||
An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
|
An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
|
||||||
An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
|
An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
|
||||||
A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
|
A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
|
||||||
An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
|
An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
|
||||||
A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
|
A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
|
||||||
A Python wrapper is at https://github.com/facebook/pyre2/ and on PyPI (pypi.org).
|
|
||||||
An R wrapper is at https://github.com/qinwf/re2r/ and on CRAN (cran.r-project.org).
|
An R wrapper is at https://github.com/qinwf/re2r/ and on CRAN (cran.r-project.org).
|
||||||
A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
|
A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
|
||||||
|
A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).
|
||||||
|
9
extern/re2/WORKSPACE
vendored
9
extern/re2/WORKSPACE
vendored
@ -3,4 +3,13 @@
|
|||||||
# license that can be found in the LICENSE file.
|
# license that can be found in the LICENSE file.
|
||||||
|
|
||||||
# Bazel (http://bazel.io/) WORKSPACE file for RE2.
|
# Bazel (http://bazel.io/) WORKSPACE file for RE2.
|
||||||
|
|
||||||
workspace(name = "com_googlesource_code_re2")
|
workspace(name = "com_googlesource_code_re2")
|
||||||
|
|
||||||
|
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
|
||||||
|
|
||||||
|
http_archive(
|
||||||
|
name = "rules_cc",
|
||||||
|
strip_prefix = "rules_cc-master",
|
||||||
|
urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
|
||||||
|
)
|
||||||
|
0
extern/re2/benchlog/benchplot.py
vendored
Normal file → Executable file
0
extern/re2/benchlog/benchplot.py
vendored
Normal file → Executable file
0
extern/re2/benchlog/mktable
vendored
Normal file → Executable file
0
extern/re2/benchlog/mktable
vendored
Normal file → Executable file
3
extern/re2/doc/mksyntaxgo
vendored
Normal file → Executable file
3
extern/re2/doc/mksyntaxgo
vendored
Normal file → Executable file
@ -15,7 +15,7 @@ sam -d $out <<'!'
|
|||||||
,s/\n\n\n+/\n\n/g
|
,s/\n\n\n+/\n\n/g
|
||||||
,x/(^.* .*\n)+/ | awk -F' ' '{printf(" %-14s %s\n", $1, $2)}'
|
,x/(^.* .*\n)+/ | awk -F' ' '{printf(" %-14s %s\n", $1, $2)}'
|
||||||
1,2c
|
1,2c
|
||||||
// Copyright 2012 The Go Authors. All rights reserved.
|
// Copyright 2012 The Go Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
@ -33,6 +33,7 @@ Parts of the syntax can be disabled by passing alternate flags to Parse.
|
|||||||
|
|
||||||
.
|
.
|
||||||
$a
|
$a
|
||||||
|
Unicode character classes are those in unicode.Categories and unicode.Scripts.
|
||||||
*/
|
*/
|
||||||
package syntax
|
package syntax
|
||||||
.
|
.
|
||||||
|
0
extern/re2/doc/mksyntaxhtml
vendored
Normal file → Executable file
0
extern/re2/doc/mksyntaxhtml
vendored
Normal file → Executable file
0
extern/re2/doc/mksyntaxwiki
vendored
Normal file → Executable file
0
extern/re2/doc/mksyntaxwiki
vendored
Normal file → Executable file
273
extern/re2/doc/syntax.html
vendored
273
extern/re2/doc/syntax.html
vendored
@ -47,6 +47,10 @@
|
|||||||
<tr><td><code><font color=#808080>x{-n}</font></code></td><td>(≡ <code>x{n}?</code>) <font size=-2>VIM</font></td></tr>
|
<tr><td><code><font color=#808080>x{-n}</font></code></td><td>(≡ <code>x{n}?</code>) <font size=-2>VIM</font></td></tr>
|
||||||
<tr><td><code><font color=#808080>x=</font></code></td><td>(≡ <code>x?</code>) <font size=-2>VIM</font></td></tr>
|
<tr><td><code><font color=#808080>x=</font></code></td><td>(≡ <code>x?</code>) <font size=-2>VIM</font></td></tr>
|
||||||
<tr><td></td></tr>
|
<tr><td></td></tr>
|
||||||
|
<tr><td colspan=2>Implementation restriction: The counting forms <code>x{n,m}</code>, <code>x{n,}</code>, and <code>x{n}</code></td></tr>
|
||||||
|
<tr><td colspan=2>reject forms that create a minimum or maximum repetition count above 1000.</td></tr>
|
||||||
|
<tr><td colspan=2>Unlimited repetitions are not subject to this restriction.</td></tr>
|
||||||
|
<tr><td></td></tr>
|
||||||
<tr><td colspan=2><b>Possessive repetitions:</b></td></tr>
|
<tr><td colspan=2><b>Possessive repetitions:</b></td></tr>
|
||||||
<tr><td><code><font color=#808080>x*+</font></code></td><td>zero or more <code>x</code>, possessive </td></tr>
|
<tr><td><code><font color=#808080>x*+</font></code></td><td>zero or more <code>x</code>, possessive </td></tr>
|
||||||
<tr><td><code><font color=#808080>x++</font></code></td><td>one or more <code>x</code>, possessive </td></tr>
|
<tr><td><code><font color=#808080>x++</font></code></td><td>one or more <code>x</code>, possessive </td></tr>
|
||||||
@ -56,10 +60,10 @@
|
|||||||
<tr><td><code><font color=#808080>x{n}+</font></code></td><td>exactly <code>n</code> <code>x</code>, possessive </td></tr>
|
<tr><td><code><font color=#808080>x{n}+</font></code></td><td>exactly <code>n</code> <code>x</code>, possessive </td></tr>
|
||||||
<tr><td></td></tr>
|
<tr><td></td></tr>
|
||||||
<tr><td colspan=2><b>Grouping:</b></td></tr>
|
<tr><td colspan=2><b>Grouping:</b></td></tr>
|
||||||
<tr><td><code>(re)</code></td><td>numbered capturing group</td></tr>
|
<tr><td><code>(re)</code></td><td>numbered capturing group (submatch)</td></tr>
|
||||||
<tr><td><code>(?P<name>re)</code></td><td>named & numbered capturing group</td></tr>
|
<tr><td><code>(?P<name>re)</code></td><td>named & numbered capturing group (submatch)</td></tr>
|
||||||
<tr><td><code><font color=#808080>(?<name>re)</font></code></td><td>named & numbered capturing group </td></tr>
|
<tr><td><code><font color=#808080>(?<name>re)</font></code></td><td>named & numbered capturing group (submatch) </td></tr>
|
||||||
<tr><td><code><font color=#808080>(?'name're)</font></code></td><td>named & numbered capturing group </td></tr>
|
<tr><td><code><font color=#808080>(?'name're)</font></code></td><td>named & numbered capturing group (submatch) </td></tr>
|
||||||
<tr><td><code>(?:re)</code></td><td>non-capturing group</td></tr>
|
<tr><td><code>(?:re)</code></td><td>non-capturing group</td></tr>
|
||||||
<tr><td><code>(?flags)</code></td><td>set flags within current group; non-capturing</td></tr>
|
<tr><td><code>(?flags)</code></td><td>set flags within current group; non-capturing</td></tr>
|
||||||
<tr><td><code>(?flags:re)</code></td><td>set flags during re; non-capturing</td></tr>
|
<tr><td><code>(?flags:re)</code></td><td>set flags during re; non-capturing</td></tr>
|
||||||
@ -80,8 +84,8 @@
|
|||||||
<tr><td><code>^</code></td><td>at beginning of text or line (<code>m</code>=true)</td></tr>
|
<tr><td><code>^</code></td><td>at beginning of text or line (<code>m</code>=true)</td></tr>
|
||||||
<tr><td><code>$</code></td><td>at end of text (like <code>\z</code> not <code>\Z</code>) or line (<code>m</code>=true)</td></tr>
|
<tr><td><code>$</code></td><td>at end of text (like <code>\z</code> not <code>\Z</code>) or line (<code>m</code>=true)</td></tr>
|
||||||
<tr><td><code>\A</code></td><td>at beginning of text</td></tr>
|
<tr><td><code>\A</code></td><td>at beginning of text</td></tr>
|
||||||
<tr><td><code>\b</code></td><td>at word boundary (<code>\w</code> on one side and <code>\W</code>, <code>\A</code>, or <code>\z</code> on the other)</td></tr>
|
<tr><td><code>\b</code></td><td>at ASCII word boundary (<code>\w</code> on one side and <code>\W</code>, <code>\A</code>, or <code>\z</code> on the other)</td></tr>
|
||||||
<tr><td><code>\B</code></td><td>not a word boundary</td></tr>
|
<tr><td><code>\B</code></td><td>not at ASCII word boundary</td></tr>
|
||||||
<tr><td><code><font color=#808080>\G</font></code></td><td>at beginning of subtext being searched <font size=-2>PCRE</font></td></tr>
|
<tr><td><code><font color=#808080>\G</font></code></td><td>at beginning of subtext being searched <font size=-2>PCRE</font></td></tr>
|
||||||
<tr><td><code><font color=#808080>\G</font></code></td><td>at end of last match <font size=-2>PERL</font></td></tr>
|
<tr><td><code><font color=#808080>\G</font></code></td><td>at end of last match <font size=-2>PERL</font></td></tr>
|
||||||
<tr><td><code><font color=#808080>\Z</font></code></td><td>at end of text, or before newline at end of text </td></tr>
|
<tr><td><code><font color=#808080>\Z</font></code></td><td>at end of text, or before newline at end of text </td></tr>
|
||||||
@ -166,7 +170,7 @@
|
|||||||
<tr><td><code>[\p{Name}]</code></td><td>named Unicode property inside character class (≡ <code>\p{Name}</code>)</td></tr>
|
<tr><td><code>[\p{Name}]</code></td><td>named Unicode property inside character class (≡ <code>\p{Name}</code>)</td></tr>
|
||||||
<tr><td><code>[^\p{Name}]</code></td><td>named Unicode property inside negated character class (≡ <code>\P{Name}</code>)</td></tr>
|
<tr><td><code>[^\p{Name}]</code></td><td>named Unicode property inside negated character class (≡ <code>\P{Name}</code>)</td></tr>
|
||||||
<tr><td></td></tr>
|
<tr><td></td></tr>
|
||||||
<tr><td colspan=2><b>Perl character classes:</b></td></tr>
|
<tr><td colspan=2><b>Perl character classes (all ASCII-only):</b></td></tr>
|
||||||
<tr><td><code>\d</code></td><td>digits (≡ <code>[0-9]</code>)</td></tr>
|
<tr><td><code>\d</code></td><td>digits (≡ <code>[0-9]</code>)</td></tr>
|
||||||
<tr><td><code>\D</code></td><td>not digits (≡ <code>[^0-9]</code>)</td></tr>
|
<tr><td><code>\D</code></td><td>not digits (≡ <code>[^0-9]</code>)</td></tr>
|
||||||
<tr><td><code>\s</code></td><td>whitespace (≡ <code>[\t\n\f\r ]</code>)</td></tr>
|
<tr><td><code>\s</code></td><td>whitespace (≡ <code>[\t\n\f\r ]</code>)</td></tr>
|
||||||
@ -237,105 +241,162 @@
|
|||||||
<tr><td><code>Zs</code></td><td>space separator</td></tr>
|
<tr><td><code>Zs</code></td><td>space separator</td></tr>
|
||||||
<tr><td></td></tr>
|
<tr><td></td></tr>
|
||||||
<tr><td colspan=2><b>Unicode character class names--scripts:</b></td></tr>
|
<tr><td colspan=2><b>Unicode character class names--scripts:</b></td></tr>
|
||||||
<tr><td><code>Arabic</code></td><td>Arabic</td></tr>
|
<tr><td colspan=2>Adlam</td></tr>
|
||||||
<tr><td><code>Armenian</code></td><td>Armenian</td></tr>
|
<tr><td colspan=2>Ahom</td></tr>
|
||||||
<tr><td><code>Balinese</code></td><td>Balinese</td></tr>
|
<tr><td colspan=2>Anatolian_Hieroglyphs</td></tr>
|
||||||
<tr><td><code>Bamum</code></td><td>Bamum</td></tr>
|
<tr><td colspan=2>Arabic</td></tr>
|
||||||
<tr><td><code>Batak</code></td><td>Batak</td></tr>
|
<tr><td colspan=2>Armenian</td></tr>
|
||||||
<tr><td><code>Bengali</code></td><td>Bengali</td></tr>
|
<tr><td colspan=2>Avestan</td></tr>
|
||||||
<tr><td><code>Bopomofo</code></td><td>Bopomofo</td></tr>
|
<tr><td colspan=2>Balinese</td></tr>
|
||||||
<tr><td><code>Brahmi</code></td><td>Brahmi</td></tr>
|
<tr><td colspan=2>Bamum</td></tr>
|
||||||
<tr><td><code>Braille</code></td><td>Braille</td></tr>
|
<tr><td colspan=2>Bassa_Vah</td></tr>
|
||||||
<tr><td><code>Buginese</code></td><td>Buginese</td></tr>
|
<tr><td colspan=2>Batak</td></tr>
|
||||||
<tr><td><code>Buhid</code></td><td>Buhid</td></tr>
|
<tr><td colspan=2>Bengali</td></tr>
|
||||||
<tr><td><code>Canadian_Aboriginal</code></td><td>Canadian Aboriginal</td></tr>
|
<tr><td colspan=2>Bhaiksuki</td></tr>
|
||||||
<tr><td><code>Carian</code></td><td>Carian</td></tr>
|
<tr><td colspan=2>Bopomofo</td></tr>
|
||||||
<tr><td><code>Chakma</code></td><td>Chakma</td></tr>
|
<tr><td colspan=2>Brahmi</td></tr>
|
||||||
<tr><td><code>Cham</code></td><td>Cham</td></tr>
|
<tr><td colspan=2>Braille</td></tr>
|
||||||
<tr><td><code>Cherokee</code></td><td>Cherokee</td></tr>
|
<tr><td colspan=2>Buginese</td></tr>
|
||||||
<tr><td><code>Common</code></td><td>characters not specific to one script</td></tr>
|
<tr><td colspan=2>Buhid</td></tr>
|
||||||
<tr><td><code>Coptic</code></td><td>Coptic</td></tr>
|
<tr><td colspan=2>Canadian_Aboriginal</td></tr>
|
||||||
<tr><td><code>Cuneiform</code></td><td>Cuneiform</td></tr>
|
<tr><td colspan=2>Carian</td></tr>
|
||||||
<tr><td><code>Cypriot</code></td><td>Cypriot</td></tr>
|
<tr><td colspan=2>Caucasian_Albanian</td></tr>
|
||||||
<tr><td><code>Cyrillic</code></td><td>Cyrillic</td></tr>
|
<tr><td colspan=2>Chakma</td></tr>
|
||||||
<tr><td><code>Deseret</code></td><td>Deseret</td></tr>
|
<tr><td colspan=2>Cham</td></tr>
|
||||||
<tr><td><code>Devanagari</code></td><td>Devanagari</td></tr>
|
<tr><td colspan=2>Cherokee</td></tr>
|
||||||
<tr><td><code>Egyptian_Hieroglyphs</code></td><td>Egyptian Hieroglyphs</td></tr>
|
<tr><td colspan=2>Chorasmian</td></tr>
|
||||||
<tr><td><code>Ethiopic</code></td><td>Ethiopic</td></tr>
|
<tr><td colspan=2>Common</td></tr>
|
||||||
<tr><td><code>Georgian</code></td><td>Georgian</td></tr>
|
<tr><td colspan=2>Coptic</td></tr>
|
||||||
<tr><td><code>Glagolitic</code></td><td>Glagolitic</td></tr>
|
<tr><td colspan=2>Cuneiform</td></tr>
|
||||||
<tr><td><code>Gothic</code></td><td>Gothic</td></tr>
|
<tr><td colspan=2>Cypriot</td></tr>
|
||||||
<tr><td><code>Greek</code></td><td>Greek</td></tr>
|
<tr><td colspan=2>Cyrillic</td></tr>
|
||||||
<tr><td><code>Gujarati</code></td><td>Gujarati</td></tr>
|
<tr><td colspan=2>Deseret</td></tr>
|
||||||
<tr><td><code>Gurmukhi</code></td><td>Gurmukhi</td></tr>
|
<tr><td colspan=2>Devanagari</td></tr>
|
||||||
<tr><td><code>Han</code></td><td>Han</td></tr>
|
<tr><td colspan=2>Dives_Akuru</td></tr>
|
||||||
<tr><td><code>Hangul</code></td><td>Hangul</td></tr>
|
<tr><td colspan=2>Dogra</td></tr>
|
||||||
<tr><td><code>Hanunoo</code></td><td>Hanunoo</td></tr>
|
<tr><td colspan=2>Duployan</td></tr>
|
||||||
<tr><td><code>Hebrew</code></td><td>Hebrew</td></tr>
|
<tr><td colspan=2>Egyptian_Hieroglyphs</td></tr>
|
||||||
<tr><td><code>Hiragana</code></td><td>Hiragana</td></tr>
|
<tr><td colspan=2>Elbasan</td></tr>
|
||||||
<tr><td><code>Imperial_Aramaic</code></td><td>Imperial Aramaic</td></tr>
|
<tr><td colspan=2>Elymaic</td></tr>
|
||||||
<tr><td><code>Inherited</code></td><td>inherit script from previous character</td></tr>
|
<tr><td colspan=2>Ethiopic</td></tr>
|
||||||
<tr><td><code>Inscriptional_Pahlavi</code></td><td>Inscriptional Pahlavi</td></tr>
|
<tr><td colspan=2>Georgian</td></tr>
|
||||||
<tr><td><code>Inscriptional_Parthian</code></td><td>Inscriptional Parthian</td></tr>
|
<tr><td colspan=2>Glagolitic</td></tr>
|
||||||
<tr><td><code>Javanese</code></td><td>Javanese</td></tr>
|
<tr><td colspan=2>Gothic</td></tr>
|
||||||
<tr><td><code>Kaithi</code></td><td>Kaithi</td></tr>
|
<tr><td colspan=2>Grantha</td></tr>
|
||||||
<tr><td><code>Kannada</code></td><td>Kannada</td></tr>
|
<tr><td colspan=2>Greek</td></tr>
|
||||||
<tr><td><code>Katakana</code></td><td>Katakana</td></tr>
|
<tr><td colspan=2>Gujarati</td></tr>
|
||||||
<tr><td><code>Kayah_Li</code></td><td>Kayah Li</td></tr>
|
<tr><td colspan=2>Gunjala_Gondi</td></tr>
|
||||||
<tr><td><code>Kharoshthi</code></td><td>Kharoshthi</td></tr>
|
<tr><td colspan=2>Gurmukhi</td></tr>
|
||||||
<tr><td><code>Khmer</code></td><td>Khmer</td></tr>
|
<tr><td colspan=2>Han</td></tr>
|
||||||
<tr><td><code>Lao</code></td><td>Lao</td></tr>
|
<tr><td colspan=2>Hangul</td></tr>
|
||||||
<tr><td><code>Latin</code></td><td>Latin</td></tr>
|
<tr><td colspan=2>Hanifi_Rohingya</td></tr>
|
||||||
<tr><td><code>Lepcha</code></td><td>Lepcha</td></tr>
|
<tr><td colspan=2>Hanunoo</td></tr>
|
||||||
<tr><td><code>Limbu</code></td><td>Limbu</td></tr>
|
<tr><td colspan=2>Hatran</td></tr>
|
||||||
<tr><td><code>Linear_B</code></td><td>Linear B</td></tr>
|
<tr><td colspan=2>Hebrew</td></tr>
|
||||||
<tr><td><code>Lycian</code></td><td>Lycian</td></tr>
|
<tr><td colspan=2>Hiragana</td></tr>
|
||||||
<tr><td><code>Lydian</code></td><td>Lydian</td></tr>
|
<tr><td colspan=2>Imperial_Aramaic</td></tr>
|
||||||
<tr><td><code>Malayalam</code></td><td>Malayalam</td></tr>
|
<tr><td colspan=2>Inherited</td></tr>
|
||||||
<tr><td><code>Mandaic</code></td><td>Mandaic</td></tr>
|
<tr><td colspan=2>Inscriptional_Pahlavi</td></tr>
|
||||||
<tr><td><code>Meetei_Mayek</code></td><td>Meetei Mayek</td></tr>
|
<tr><td colspan=2>Inscriptional_Parthian</td></tr>
|
||||||
<tr><td><code>Meroitic_Cursive</code></td><td>Meroitic Cursive</td></tr>
|
<tr><td colspan=2>Javanese</td></tr>
|
||||||
<tr><td><code>Meroitic_Hieroglyphs</code></td><td>Meroitic Hieroglyphs</td></tr>
|
<tr><td colspan=2>Kaithi</td></tr>
|
||||||
<tr><td><code>Miao</code></td><td>Miao</td></tr>
|
<tr><td colspan=2>Kannada</td></tr>
|
||||||
<tr><td><code>Mongolian</code></td><td>Mongolian</td></tr>
|
<tr><td colspan=2>Katakana</td></tr>
|
||||||
<tr><td><code>Myanmar</code></td><td>Myanmar</td></tr>
|
<tr><td colspan=2>Kayah_Li</td></tr>
|
||||||
<tr><td><code>New_Tai_Lue</code></td><td>New Tai Lue (aka Simplified Tai Lue)</td></tr>
|
<tr><td colspan=2>Kharoshthi</td></tr>
|
||||||
<tr><td><code>Nko</code></td><td>Nko</td></tr>
|
<tr><td colspan=2>Khitan_Small_Script</td></tr>
|
||||||
<tr><td><code>Ogham</code></td><td>Ogham</td></tr>
|
<tr><td colspan=2>Khmer</td></tr>
|
||||||
<tr><td><code>Ol_Chiki</code></td><td>Ol Chiki</td></tr>
|
<tr><td colspan=2>Khojki</td></tr>
|
||||||
<tr><td><code>Old_Italic</code></td><td>Old Italic</td></tr>
|
<tr><td colspan=2>Khudawadi</td></tr>
|
||||||
<tr><td><code>Old_Persian</code></td><td>Old Persian</td></tr>
|
<tr><td colspan=2>Lao</td></tr>
|
||||||
<tr><td><code>Old_South_Arabian</code></td><td>Old South Arabian</td></tr>
|
<tr><td colspan=2>Latin</td></tr>
|
||||||
<tr><td><code>Old_Turkic</code></td><td>Old Turkic</td></tr>
|
<tr><td colspan=2>Lepcha</td></tr>
|
||||||
<tr><td><code>Oriya</code></td><td>Oriya</td></tr>
|
<tr><td colspan=2>Limbu</td></tr>
|
||||||
<tr><td><code>Osmanya</code></td><td>Osmanya</td></tr>
|
<tr><td colspan=2>Linear_A</td></tr>
|
||||||
<tr><td><code>Phags_Pa</code></td><td>'Phags Pa</td></tr>
|
<tr><td colspan=2>Linear_B</td></tr>
|
||||||
<tr><td><code>Phoenician</code></td><td>Phoenician</td></tr>
|
<tr><td colspan=2>Lisu</td></tr>
|
||||||
<tr><td><code>Rejang</code></td><td>Rejang</td></tr>
|
<tr><td colspan=2>Lycian</td></tr>
|
||||||
<tr><td><code>Runic</code></td><td>Runic</td></tr>
|
<tr><td colspan=2>Lydian</td></tr>
|
||||||
<tr><td><code>Saurashtra</code></td><td>Saurashtra</td></tr>
|
<tr><td colspan=2>Mahajani</td></tr>
|
||||||
<tr><td><code>Sharada</code></td><td>Sharada</td></tr>
|
<tr><td colspan=2>Makasar</td></tr>
|
||||||
<tr><td><code>Shavian</code></td><td>Shavian</td></tr>
|
<tr><td colspan=2>Malayalam</td></tr>
|
||||||
<tr><td><code>Sinhala</code></td><td>Sinhala</td></tr>
|
<tr><td colspan=2>Mandaic</td></tr>
|
||||||
<tr><td><code>Sora_Sompeng</code></td><td>Sora Sompeng</td></tr>
|
<tr><td colspan=2>Manichaean</td></tr>
|
||||||
<tr><td><code>Sundanese</code></td><td>Sundanese</td></tr>
|
<tr><td colspan=2>Marchen</td></tr>
|
||||||
<tr><td><code>Syloti_Nagri</code></td><td>Syloti Nagri</td></tr>
|
<tr><td colspan=2>Masaram_Gondi</td></tr>
|
||||||
<tr><td><code>Syriac</code></td><td>Syriac</td></tr>
|
<tr><td colspan=2>Medefaidrin</td></tr>
|
||||||
<tr><td><code>Tagalog</code></td><td>Tagalog</td></tr>
|
<tr><td colspan=2>Meetei_Mayek</td></tr>
|
||||||
<tr><td><code>Tagbanwa</code></td><td>Tagbanwa</td></tr>
|
<tr><td colspan=2>Mende_Kikakui</td></tr>
|
||||||
<tr><td><code>Tai_Le</code></td><td>Tai Le</td></tr>
|
<tr><td colspan=2>Meroitic_Cursive</td></tr>
|
||||||
<tr><td><code>Tai_Tham</code></td><td>Tai Tham</td></tr>
|
<tr><td colspan=2>Meroitic_Hieroglyphs</td></tr>
|
||||||
<tr><td><code>Tai_Viet</code></td><td>Tai Viet</td></tr>
|
<tr><td colspan=2>Miao</td></tr>
|
||||||
<tr><td><code>Takri</code></td><td>Takri</td></tr>
|
<tr><td colspan=2>Modi</td></tr>
|
||||||
<tr><td><code>Tamil</code></td><td>Tamil</td></tr>
|
<tr><td colspan=2>Mongolian</td></tr>
|
||||||
<tr><td><code>Telugu</code></td><td>Telugu</td></tr>
|
<tr><td colspan=2>Mro</td></tr>
|
||||||
<tr><td><code>Thaana</code></td><td>Thaana</td></tr>
|
<tr><td colspan=2>Multani</td></tr>
|
||||||
<tr><td><code>Thai</code></td><td>Thai</td></tr>
|
<tr><td colspan=2>Myanmar</td></tr>
|
||||||
<tr><td><code>Tibetan</code></td><td>Tibetan</td></tr>
|
<tr><td colspan=2>Nabataean</td></tr>
|
||||||
<tr><td><code>Tifinagh</code></td><td>Tifinagh</td></tr>
|
<tr><td colspan=2>Nandinagari</td></tr>
|
||||||
<tr><td><code>Ugaritic</code></td><td>Ugaritic</td></tr>
|
<tr><td colspan=2>New_Tai_Lue</td></tr>
|
||||||
<tr><td><code>Vai</code></td><td>Vai</td></tr>
|
<tr><td colspan=2>Newa</td></tr>
|
||||||
<tr><td><code>Yi</code></td><td>Yi</td></tr>
|
<tr><td colspan=2>Nko</td></tr>
|
||||||
|
<tr><td colspan=2>Nushu</td></tr>
|
||||||
|
<tr><td colspan=2>Nyiakeng_Puachue_Hmong</td></tr>
|
||||||
|
<tr><td colspan=2>Ogham</td></tr>
|
||||||
|
<tr><td colspan=2>Ol_Chiki</td></tr>
|
||||||
|
<tr><td colspan=2>Old_Hungarian</td></tr>
|
||||||
|
<tr><td colspan=2>Old_Italic</td></tr>
|
||||||
|
<tr><td colspan=2>Old_North_Arabian</td></tr>
|
||||||
|
<tr><td colspan=2>Old_Permic</td></tr>
|
||||||
|
<tr><td colspan=2>Old_Persian</td></tr>
|
||||||
|
<tr><td colspan=2>Old_Sogdian</td></tr>
|
||||||
|
<tr><td colspan=2>Old_South_Arabian</td></tr>
|
||||||
|
<tr><td colspan=2>Old_Turkic</td></tr>
|
||||||
|
<tr><td colspan=2>Oriya</td></tr>
|
||||||
|
<tr><td colspan=2>Osage</td></tr>
|
||||||
|
<tr><td colspan=2>Osmanya</td></tr>
|
||||||
|
<tr><td colspan=2>Pahawh_Hmong</td></tr>
|
||||||
|
<tr><td colspan=2>Palmyrene</td></tr>
|
||||||
|
<tr><td colspan=2>Pau_Cin_Hau</td></tr>
|
||||||
|
<tr><td colspan=2>Phags_Pa</td></tr>
|
||||||
|
<tr><td colspan=2>Phoenician</td></tr>
|
||||||
|
<tr><td colspan=2>Psalter_Pahlavi</td></tr>
|
||||||
|
<tr><td colspan=2>Rejang</td></tr>
|
||||||
|
<tr><td colspan=2>Runic</td></tr>
|
||||||
|
<tr><td colspan=2>Samaritan</td></tr>
|
||||||
|
<tr><td colspan=2>Saurashtra</td></tr>
|
||||||
|
<tr><td colspan=2>Sharada</td></tr>
|
||||||
|
<tr><td colspan=2>Shavian</td></tr>
|
||||||
|
<tr><td colspan=2>Siddham</td></tr>
|
||||||
|
<tr><td colspan=2>SignWriting</td></tr>
|
||||||
|
<tr><td colspan=2>Sinhala</td></tr>
|
||||||
|
<tr><td colspan=2>Sogdian</td></tr>
|
||||||
|
<tr><td colspan=2>Sora_Sompeng</td></tr>
|
||||||
|
<tr><td colspan=2>Soyombo</td></tr>
|
||||||
|
<tr><td colspan=2>Sundanese</td></tr>
|
||||||
|
<tr><td colspan=2>Syloti_Nagri</td></tr>
|
||||||
|
<tr><td colspan=2>Syriac</td></tr>
|
||||||
|
<tr><td colspan=2>Tagalog</td></tr>
|
||||||
|
<tr><td colspan=2>Tagbanwa</td></tr>
|
||||||
|
<tr><td colspan=2>Tai_Le</td></tr>
|
||||||
|
<tr><td colspan=2>Tai_Tham</td></tr>
|
||||||
|
<tr><td colspan=2>Tai_Viet</td></tr>
|
||||||
|
<tr><td colspan=2>Takri</td></tr>
|
||||||
|
<tr><td colspan=2>Tamil</td></tr>
|
||||||
|
<tr><td colspan=2>Tangut</td></tr>
|
||||||
|
<tr><td colspan=2>Telugu</td></tr>
|
||||||
|
<tr><td colspan=2>Thaana</td></tr>
|
||||||
|
<tr><td colspan=2>Thai</td></tr>
|
||||||
|
<tr><td colspan=2>Tibetan</td></tr>
|
||||||
|
<tr><td colspan=2>Tifinagh</td></tr>
|
||||||
|
<tr><td colspan=2>Tirhuta</td></tr>
|
||||||
|
<tr><td colspan=2>Ugaritic</td></tr>
|
||||||
|
<tr><td colspan=2>Vai</td></tr>
|
||||||
|
<tr><td colspan=2>Wancho</td></tr>
|
||||||
|
<tr><td colspan=2>Warang_Citi</td></tr>
|
||||||
|
<tr><td colspan=2>Yezidi</td></tr>
|
||||||
|
<tr><td colspan=2>Yi</td></tr>
|
||||||
|
<tr><td colspan=2>Zanabazar_Square</td></tr>
|
||||||
<tr><td></td></tr>
|
<tr><td></td></tr>
|
||||||
<tr><td colspan=2><b>Vim character classes:</b></td></tr>
|
<tr><td colspan=2><b>Vim character classes:</b></td></tr>
|
||||||
<tr><td><code><font color=#808080>\i</font></code></td><td>identifier character <font size=-2>VIM</font></td></tr>
|
<tr><td><code><font color=#808080>\i</font></code></td><td>identifier character <font size=-2>VIM</font></td></tr>
|
||||||
|
4
extern/re2/doc/syntax.txt
vendored
4
extern/re2/doc/syntax.txt
vendored
@ -253,6 +253,7 @@ Caucasian_Albanian
|
|||||||
Chakma
|
Chakma
|
||||||
Cham
|
Cham
|
||||||
Cherokee
|
Cherokee
|
||||||
|
Chorasmian
|
||||||
Common
|
Common
|
||||||
Coptic
|
Coptic
|
||||||
Cuneiform
|
Cuneiform
|
||||||
@ -260,6 +261,7 @@ Cypriot
|
|||||||
Cyrillic
|
Cyrillic
|
||||||
Deseret
|
Deseret
|
||||||
Devanagari
|
Devanagari
|
||||||
|
Dives_Akuru
|
||||||
Dogra
|
Dogra
|
||||||
Duployan
|
Duployan
|
||||||
Egyptian_Hieroglyphs
|
Egyptian_Hieroglyphs
|
||||||
@ -291,6 +293,7 @@ Kannada
|
|||||||
Katakana
|
Katakana
|
||||||
Kayah_Li
|
Kayah_Li
|
||||||
Kharoshthi
|
Kharoshthi
|
||||||
|
Khitan_Small_Script
|
||||||
Khmer
|
Khmer
|
||||||
Khojki
|
Khojki
|
||||||
Khudawadi
|
Khudawadi
|
||||||
@ -380,6 +383,7 @@ Ugaritic
|
|||||||
Vai
|
Vai
|
||||||
Wancho
|
Wancho
|
||||||
Warang_Citi
|
Warang_Citi
|
||||||
|
Yezidi
|
||||||
Yi
|
Yi
|
||||||
Zanabazar_Square
|
Zanabazar_Square
|
||||||
|
|
||||||
|
25
extern/re2/kokoro/cmake.sh
vendored
25
extern/re2/kokoro/cmake.sh
vendored
@ -1,25 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -eux
|
|
||||||
|
|
||||||
cd git/re2
|
|
||||||
|
|
||||||
case "${KOKORO_JOB_NAME}" in
|
|
||||||
*/windows-*)
|
|
||||||
CMAKE_G_A_FLAGS=('-G' 'Visual Studio 14 2015' '-A' 'x64')
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
CMAKE_G_A_FLAGS=()
|
|
||||||
# Work around a bug in older versions of bash. :/
|
|
||||||
set +u
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
cmake -D CMAKE_BUILD_TYPE=Debug "${CMAKE_G_A_FLAGS[@]}" .
|
|
||||||
cmake --build . --config Debug --clean-first
|
|
||||||
ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
|
|
||||||
|
|
||||||
cmake -D CMAKE_BUILD_TYPE=Release "${CMAKE_G_A_FLAGS[@]}" .
|
|
||||||
cmake --build . --config Release --clean-first
|
|
||||||
ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
|
|
||||||
|
|
||||||
exit 0
|
|
1
extern/re2/kokoro/macos-bazel.cfg
vendored
1
extern/re2/kokoro/macos-bazel.cfg
vendored
@ -1 +0,0 @@
|
|||||||
build_file: "re2/kokoro/macos-bazel.sh"
|
|
4
extern/re2/kokoro/macos-bazel.sh
vendored
4
extern/re2/kokoro/macos-bazel.sh
vendored
@ -1,4 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -eux
|
|
||||||
bash git/re2/kokoro/bazel.sh
|
|
||||||
exit $?
|
|
1
extern/re2/kokoro/macos-cmake.cfg
vendored
1
extern/re2/kokoro/macos-cmake.cfg
vendored
@ -1 +0,0 @@
|
|||||||
build_file: "re2/kokoro/macos-cmake.sh"
|
|
4
extern/re2/kokoro/macos-cmake.sh
vendored
4
extern/re2/kokoro/macos-cmake.sh
vendored
@ -1,4 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -eux
|
|
||||||
bash git/re2/kokoro/cmake.sh
|
|
||||||
exit $?
|
|
1
extern/re2/kokoro/ubuntu-bazel.cfg
vendored
1
extern/re2/kokoro/ubuntu-bazel.cfg
vendored
@ -1 +0,0 @@
|
|||||||
build_file: "re2/kokoro/ubuntu-bazel.sh"
|
|
4
extern/re2/kokoro/ubuntu-bazel.sh
vendored
4
extern/re2/kokoro/ubuntu-bazel.sh
vendored
@ -1,4 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -eux
|
|
||||||
bash git/re2/kokoro/bazel.sh
|
|
||||||
exit $?
|
|
2
extern/re2/kokoro/windows-bazel.bat
vendored
2
extern/re2/kokoro/windows-bazel.bat
vendored
@ -1,2 +0,0 @@
|
|||||||
bash git/re2/kokoro/bazel.sh
|
|
||||||
EXIT /B %ERRORLEVEL%
|
|
1
extern/re2/kokoro/windows-bazel.cfg
vendored
1
extern/re2/kokoro/windows-bazel.cfg
vendored
@ -1 +0,0 @@
|
|||||||
build_file: "re2/kokoro/windows-bazel.bat"
|
|
2
extern/re2/kokoro/windows-cmake.bat
vendored
2
extern/re2/kokoro/windows-cmake.bat
vendored
@ -1,2 +0,0 @@
|
|||||||
bash git/re2/kokoro/cmake.sh
|
|
||||||
EXIT /B %ERRORLEVEL%
|
|
1
extern/re2/kokoro/windows-cmake.cfg
vendored
1
extern/re2/kokoro/windows-cmake.cfg
vendored
@ -1 +0,0 @@
|
|||||||
build_file: "re2/kokoro/windows-cmake.bat"
|
|
0
extern/re2/lib/git/commit-msg.hook
vendored
Normal file → Executable file
0
extern/re2/lib/git/commit-msg.hook
vendored
Normal file → Executable file
3
extern/re2/libre2.symbols
vendored
3
extern/re2/libre2.symbols
vendored
@ -11,6 +11,9 @@
|
|||||||
# re2::FilteredRE2*
|
# re2::FilteredRE2*
|
||||||
_ZN3re211FilteredRE2*;
|
_ZN3re211FilteredRE2*;
|
||||||
_ZNK3re211FilteredRE2*;
|
_ZNK3re211FilteredRE2*;
|
||||||
|
# re2::re2_internal*
|
||||||
|
_ZN3re212re2_internal*;
|
||||||
|
_ZNK3re212re2_internal*;
|
||||||
local:
|
local:
|
||||||
*;
|
*;
|
||||||
};
|
};
|
||||||
|
3
extern/re2/libre2.symbols.darwin
vendored
3
extern/re2/libre2.symbols.darwin
vendored
@ -10,3 +10,6 @@ __ZN3re2ls*
|
|||||||
# re2::FilteredRE2*
|
# re2::FilteredRE2*
|
||||||
__ZN3re211FilteredRE2*
|
__ZN3re211FilteredRE2*
|
||||||
__ZNK3re211FilteredRE2*
|
__ZNK3re211FilteredRE2*
|
||||||
|
# re2::re2_internal*
|
||||||
|
__ZN3re212re2_internal*
|
||||||
|
__ZNK3re212re2_internal*
|
||||||
|
2
extern/re2/re2.pc
vendored
2
extern/re2/re2.pc
vendored
@ -1,5 +1,3 @@
|
|||||||
prefix=@prefix@
|
|
||||||
exec_prefix=@exec_prefix@
|
|
||||||
includedir=@includedir@
|
includedir=@includedir@
|
||||||
libdir=@libdir@
|
libdir=@libdir@
|
||||||
|
|
||||||
|
7
extern/re2/re2/bitmap256.h
vendored
7
extern/re2/re2/bitmap256.h
vendored
@ -32,7 +32,7 @@ class Bitmap256 {
|
|||||||
DCHECK_GE(c, 0);
|
DCHECK_GE(c, 0);
|
||||||
DCHECK_LE(c, 255);
|
DCHECK_LE(c, 255);
|
||||||
|
|
||||||
return (words_[c / 64] & (1ULL << (c % 64))) != 0;
|
return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sets the bit with index c.
|
// Sets the bit with index c.
|
||||||
@ -40,7 +40,7 @@ class Bitmap256 {
|
|||||||
DCHECK_GE(c, 0);
|
DCHECK_GE(c, 0);
|
||||||
DCHECK_LE(c, 255);
|
DCHECK_LE(c, 255);
|
||||||
|
|
||||||
words_[c / 64] |= (1ULL << (c % 64));
|
words_[c / 64] |= (uint64_t{1} << (c % 64));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finds the next non-zero bit with index >= c.
|
// Finds the next non-zero bit with index >= c.
|
||||||
@ -51,7 +51,6 @@ class Bitmap256 {
|
|||||||
// Finds the least significant non-zero bit in n.
|
// Finds the least significant non-zero bit in n.
|
||||||
static int FindLSBSet(uint64_t n) {
|
static int FindLSBSet(uint64_t n) {
|
||||||
DCHECK_NE(n, 0);
|
DCHECK_NE(n, 0);
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
return __builtin_ctzll(n);
|
return __builtin_ctzll(n);
|
||||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||||
@ -89,7 +88,7 @@ int Bitmap256::FindNextSetBit(int c) const {
|
|||||||
|
|
||||||
// Check the word that contains the bit. Mask out any lower bits.
|
// Check the word that contains the bit. Mask out any lower bits.
|
||||||
int i = c / 64;
|
int i = c / 64;
|
||||||
uint64_t word = words_[i] & (~0ULL << (c % 64));
|
uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
|
||||||
if (word != 0)
|
if (word != 0)
|
||||||
return (i * 64) + FindLSBSet(word);
|
return (i * 64) + FindLSBSet(word);
|
||||||
|
|
||||||
|
47
extern/re2/re2/bitstate.cc
vendored
47
extern/re2/re2/bitstate.cc
vendored
@ -7,7 +7,7 @@
|
|||||||
// Prog::SearchBitState is a regular expression search with submatch
|
// Prog::SearchBitState is a regular expression search with submatch
|
||||||
// tracking for small regular expressions and texts. Similarly to
|
// tracking for small regular expressions and texts. Similarly to
|
||||||
// testing/backtrack.cc, it allocates a bitmap with (count of
|
// testing/backtrack.cc, it allocates a bitmap with (count of
|
||||||
// lists) * (length of prog) bits to make sure it never explores the
|
// lists) * (length of text) bits to make sure it never explores the
|
||||||
// same (instruction list, character position) multiple times. This
|
// same (instruction list, character position) multiple times. This
|
||||||
// limits the search to run in time linear in the length of the text.
|
// limits the search to run in time linear in the length of the text.
|
||||||
//
|
//
|
||||||
@ -24,7 +24,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/pod_array.h"
|
#include "re2/pod_array.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
|
|
||||||
@ -63,11 +63,14 @@ class BitState {
|
|||||||
int nsubmatch_; // # of submatches to fill in
|
int nsubmatch_; // # of submatches to fill in
|
||||||
|
|
||||||
// Search state
|
// Search state
|
||||||
static const int VisitedBits = 32;
|
static constexpr int kVisitedBits = 64;
|
||||||
PODArray<uint32_t> visited_; // bitmap: (list ID, char*) pairs visited
|
PODArray<uint64_t> visited_; // bitmap: (list ID, char*) pairs visited
|
||||||
PODArray<const char*> cap_; // capture registers
|
PODArray<const char*> cap_; // capture registers
|
||||||
PODArray<Job> job_; // stack of text positions to explore
|
PODArray<Job> job_; // stack of text positions to explore
|
||||||
int njob_; // stack size
|
int njob_; // stack size
|
||||||
|
|
||||||
|
BitState(const BitState&) = delete;
|
||||||
|
BitState& operator=(const BitState&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
BitState::BitState(Prog* prog)
|
BitState::BitState(Prog* prog)
|
||||||
@ -86,10 +89,10 @@ BitState::BitState(Prog* prog)
|
|||||||
// we don't repeat the visit.
|
// we don't repeat the visit.
|
||||||
bool BitState::ShouldVisit(int id, const char* p) {
|
bool BitState::ShouldVisit(int id, const char* p) {
|
||||||
int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
|
int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
|
||||||
static_cast<int>(p-text_.begin());
|
static_cast<int>(p-text_.data());
|
||||||
if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
|
if (visited_[n/kVisitedBits] & (uint64_t{1} << (n & (kVisitedBits-1))))
|
||||||
return false;
|
return false;
|
||||||
visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
|
visited_[n/kVisitedBits] |= uint64_t{1} << (n & (kVisitedBits-1));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -134,7 +137,7 @@ void BitState::Push(int id, const char* p) {
|
|||||||
// Return whether it succeeded.
|
// Return whether it succeeded.
|
||||||
bool BitState::TrySearch(int id0, const char* p0) {
|
bool BitState::TrySearch(int id0, const char* p0) {
|
||||||
bool matched = false;
|
bool matched = false;
|
||||||
const char* end = text_.end();
|
const char* end = text_.data() + text_.size();
|
||||||
njob_ = 0;
|
njob_ = 0;
|
||||||
// Push() no longer checks ShouldVisit(),
|
// Push() no longer checks ShouldVisit(),
|
||||||
// so we must perform the check ourselves.
|
// so we must perform the check ourselves.
|
||||||
@ -251,7 +254,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
|
|||||||
matched = true;
|
matched = true;
|
||||||
cap_[1] = p;
|
cap_[1] = p;
|
||||||
if (submatch_[0].data() == NULL ||
|
if (submatch_[0].data() == NULL ||
|
||||||
(longest_ && p > submatch_[0].end())) {
|
(longest_ && p > submatch_[0].data() + submatch_[0].size())) {
|
||||||
for (int i = 0; i < nsubmatch_; i++)
|
for (int i = 0; i < nsubmatch_; i++)
|
||||||
submatch_[i] =
|
submatch_[i] =
|
||||||
StringPiece(cap_[2 * i],
|
StringPiece(cap_[2 * i],
|
||||||
@ -288,7 +291,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
|||||||
// Search parameters.
|
// Search parameters.
|
||||||
text_ = text;
|
text_ = text;
|
||||||
context_ = context;
|
context_ = context;
|
||||||
if (context_.begin() == NULL)
|
if (context_.data() == NULL)
|
||||||
context_ = text;
|
context_ = text;
|
||||||
if (prog_->anchor_start() && context_.begin() != text.begin())
|
if (prog_->anchor_start() && context_.begin() != text.begin())
|
||||||
return false;
|
return false;
|
||||||
@ -304,8 +307,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
|||||||
|
|
||||||
// Allocate scratch space.
|
// Allocate scratch space.
|
||||||
int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
|
int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
|
||||||
nvisited = (nvisited + VisitedBits-1) / VisitedBits;
|
nvisited = (nvisited + kVisitedBits-1) / kVisitedBits;
|
||||||
visited_ = PODArray<uint32_t>(nvisited);
|
visited_ = PODArray<uint64_t>(nvisited);
|
||||||
memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
|
memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
|
||||||
|
|
||||||
int ncap = 2*nsubmatch;
|
int ncap = 2*nsubmatch;
|
||||||
@ -319,8 +322,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
|||||||
|
|
||||||
// Anchored search must start at text.begin().
|
// Anchored search must start at text.begin().
|
||||||
if (anchored_) {
|
if (anchored_) {
|
||||||
cap_[0] = text.begin();
|
cap_[0] = text.data();
|
||||||
return TrySearch(prog_->start(), text.begin());
|
return TrySearch(prog_->start(), text.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unanchored search, starting from each possible text position.
|
// Unanchored search, starting from each possible text position.
|
||||||
@ -329,18 +332,22 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
|||||||
// This looks like it's quadratic in the size of the text,
|
// This looks like it's quadratic in the size of the text,
|
||||||
// but we are not clearing visited_ between calls to TrySearch,
|
// but we are not clearing visited_ between calls to TrySearch,
|
||||||
// so no work is duplicated and it ends up still being linear.
|
// so no work is duplicated and it ends up still being linear.
|
||||||
for (const char* p = text.begin(); p <= text.end(); p++) {
|
const char* etext = text.data() + text.size();
|
||||||
// Try to use memchr to find the first byte quickly.
|
for (const char* p = text.data(); p <= etext; p++) {
|
||||||
int fb = prog_->first_byte();
|
// Try to use prefix accel (e.g. memchr) to skip ahead.
|
||||||
if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
|
if (p < etext && prog_->can_prefix_accel()) {
|
||||||
p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
|
p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
|
||||||
if (p == NULL)
|
if (p == NULL)
|
||||||
p = text.end();
|
p = etext;
|
||||||
}
|
}
|
||||||
|
|
||||||
cap_[0] = p;
|
cap_[0] = p;
|
||||||
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
|
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
|
||||||
return true;
|
return true;
|
||||||
|
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||||
|
// by simply not continuing the loop.
|
||||||
|
if (p == NULL)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
234
extern/re2/re2/compile.cc
vendored
234
extern/re2/re2/compile.cc
vendored
@ -14,8 +14,8 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/pod_array.h"
|
|
||||||
#include "util/utf.h"
|
#include "util/utf.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
@ -30,91 +30,57 @@ namespace re2 {
|
|||||||
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
|
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
|
||||||
//
|
//
|
||||||
// Because the out and out1 fields in Inst are no longer pointers,
|
// Because the out and out1 fields in Inst are no longer pointers,
|
||||||
// we can't use pointers directly here either. Instead, p refers
|
// we can't use pointers directly here either. Instead, head refers
|
||||||
// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1).
|
// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
|
||||||
// p == 0 represents the NULL list. This is okay because instruction #0
|
// head == 0 represents the NULL list. This is okay because instruction #0
|
||||||
// is always the fail instruction, which never appears on a list.
|
// is always the fail instruction, which never appears on a list.
|
||||||
|
|
||||||
struct PatchList {
|
struct PatchList {
|
||||||
uint32_t p;
|
|
||||||
|
|
||||||
// Returns patch list containing just p.
|
// Returns patch list containing just p.
|
||||||
static PatchList Mk(uint32_t p);
|
static PatchList Mk(uint32_t p) {
|
||||||
|
return {p, p};
|
||||||
|
}
|
||||||
|
|
||||||
// Patches all the entries on l to have value v.
|
// Patches all the entries on l to have value p.
|
||||||
// Caller must not ever use patch list again.
|
// Caller must not ever use patch list again.
|
||||||
static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v);
|
static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
|
||||||
|
while (l.head != 0) {
|
||||||
// Deref returns the next pointer pointed at by p.
|
Prog::Inst* ip = &inst0[l.head>>1];
|
||||||
static PatchList Deref(Prog::Inst *inst0, PatchList l);
|
if (l.head&1) {
|
||||||
|
l.head = ip->out1();
|
||||||
// Appends two patch lists and returns result.
|
ip->out1_ = p;
|
||||||
static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
|
} else {
|
||||||
};
|
l.head = ip->out();
|
||||||
|
ip->set_out(p);
|
||||||
static PatchList nullPatchList = { 0 };
|
}
|
||||||
|
|
||||||
// Returns patch list containing just p.
|
|
||||||
PatchList PatchList::Mk(uint32_t p) {
|
|
||||||
PatchList l;
|
|
||||||
l.p = p;
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the next pointer pointed at by l.
|
|
||||||
PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) {
|
|
||||||
Prog::Inst* ip = &inst0[l.p>>1];
|
|
||||||
if (l.p&1)
|
|
||||||
l.p = ip->out1();
|
|
||||||
else
|
|
||||||
l.p = ip->out();
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Patches all the entries on l to have value v.
|
|
||||||
void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32_t val) {
|
|
||||||
while (l.p != 0) {
|
|
||||||
Prog::Inst* ip = &inst0[l.p>>1];
|
|
||||||
if (l.p&1) {
|
|
||||||
l.p = ip->out1();
|
|
||||||
ip->out1_ = val;
|
|
||||||
} else {
|
|
||||||
l.p = ip->out();
|
|
||||||
ip->set_out(val);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Appends two patch lists and returns result.
|
// Appends two patch lists and returns result.
|
||||||
PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
|
static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
|
||||||
if (l1.p == 0)
|
if (l1.head == 0)
|
||||||
return l2;
|
return l2;
|
||||||
if (l2.p == 0)
|
if (l2.head == 0)
|
||||||
return l1;
|
return l1;
|
||||||
|
Prog::Inst* ip = &inst0[l1.tail>>1];
|
||||||
PatchList l = l1;
|
if (l1.tail&1)
|
||||||
for (;;) {
|
ip->out1_ = l2.head;
|
||||||
PatchList next = PatchList::Deref(inst0, l);
|
else
|
||||||
if (next.p == 0)
|
ip->set_out(l2.head);
|
||||||
break;
|
return {l1.head, l2.tail};
|
||||||
l = next;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Prog::Inst* ip = &inst0[l.p>>1];
|
uint32_t head;
|
||||||
if (l.p&1)
|
uint32_t tail; // for constant-time append
|
||||||
ip->out1_ = l2.p;
|
};
|
||||||
else
|
|
||||||
ip->set_out(l2.p);
|
|
||||||
|
|
||||||
return l1;
|
static const PatchList kNullPatchList = {0, 0};
|
||||||
}
|
|
||||||
|
|
||||||
// Compiled program fragment.
|
// Compiled program fragment.
|
||||||
struct Frag {
|
struct Frag {
|
||||||
uint32_t begin;
|
uint32_t begin;
|
||||||
PatchList end;
|
PatchList end;
|
||||||
|
|
||||||
Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector
|
Frag() : begin(0) { end.head = 0; } // needed so Frag can go in vector
|
||||||
Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {}
|
Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -212,8 +178,8 @@ class Compiler : public Regexp::Walker<Frag> {
|
|||||||
int AddSuffixRecursive(int root, int id);
|
int AddSuffixRecursive(int root, int id);
|
||||||
|
|
||||||
// Finds the trie node for the given suffix. Returns a Frag in order to
|
// Finds the trie node for the given suffix. Returns a Frag in order to
|
||||||
// distinguish between pointing at the root node directly (end.p == 0)
|
// distinguish between pointing at the root node directly (end.head == 0)
|
||||||
// and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively).
|
// and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
|
||||||
Frag FindByteRange(int root, int id);
|
Frag FindByteRange(int root, int id);
|
||||||
|
|
||||||
// Compares two ByteRanges and returns true iff they are equal.
|
// Compares two ByteRanges and returns true iff they are equal.
|
||||||
@ -225,8 +191,8 @@ class Compiler : public Regexp::Walker<Frag> {
|
|||||||
// Single rune.
|
// Single rune.
|
||||||
Frag Literal(Rune r, bool foldcase);
|
Frag Literal(Rune r, bool foldcase);
|
||||||
|
|
||||||
void Setup(Regexp::ParseFlags, int64_t, RE2::Anchor);
|
void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor);
|
||||||
Prog* Finish();
|
Prog* Finish(Regexp* re);
|
||||||
|
|
||||||
// Returns .* where dot = any byte
|
// Returns .* where dot = any byte
|
||||||
Frag DotStar();
|
Frag DotStar();
|
||||||
@ -298,7 +264,7 @@ int Compiler::AllocInst(int n) {
|
|||||||
|
|
||||||
// Returns an unmatchable fragment.
|
// Returns an unmatchable fragment.
|
||||||
Frag Compiler::NoMatch() {
|
Frag Compiler::NoMatch() {
|
||||||
return Frag(0, nullPatchList);
|
return Frag(0, kNullPatchList);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is a an unmatchable fragment?
|
// Is a an unmatchable fragment?
|
||||||
@ -314,7 +280,7 @@ Frag Compiler::Cat(Frag a, Frag b) {
|
|||||||
// Elide no-op.
|
// Elide no-op.
|
||||||
Prog::Inst* begin = &inst_[a.begin];
|
Prog::Inst* begin = &inst_[a.begin];
|
||||||
if (begin->opcode() == kInstNop &&
|
if (begin->opcode() == kInstNop &&
|
||||||
a.end.p == (a.begin << 1) &&
|
a.end.head == (a.begin << 1) &&
|
||||||
begin->out() == 0) {
|
begin->out() == 0) {
|
||||||
// in case refs to a somewhere
|
// in case refs to a somewhere
|
||||||
PatchList::Patch(inst_.data(), a.end, b.begin);
|
PatchList::Patch(inst_.data(), a.end, b.begin);
|
||||||
@ -419,7 +385,7 @@ Frag Compiler::Match(int32_t match_id) {
|
|||||||
if (id < 0)
|
if (id < 0)
|
||||||
return NoMatch();
|
return NoMatch();
|
||||||
inst_[id].InitMatch(match_id);
|
inst_[id].InitMatch(match_id);
|
||||||
return Frag(id, nullPatchList);
|
return Frag(id, kNullPatchList);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a fragment matching a particular empty-width op (like ^ or $)
|
// Returns a fragment matching a particular empty-width op (like ^ or $)
|
||||||
@ -467,7 +433,7 @@ static int MaxRune(int len) {
|
|||||||
void Compiler::BeginRange() {
|
void Compiler::BeginRange() {
|
||||||
rune_cache_.clear();
|
rune_cache_.clear();
|
||||||
rune_range_.begin = 0;
|
rune_range_.begin = 0;
|
||||||
rune_range_.end = nullPatchList;
|
rune_range_.end = kNullPatchList;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
|
int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
|
||||||
@ -548,9 +514,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int br;
|
int br;
|
||||||
if (f.end.p == 0)
|
if (f.end.head == 0)
|
||||||
br = root;
|
br = root;
|
||||||
else if (f.end.p&1)
|
else if (f.end.head&1)
|
||||||
br = inst_[f.begin].out1();
|
br = inst_[f.begin].out1();
|
||||||
else
|
else
|
||||||
br = inst_[f.begin].out();
|
br = inst_[f.begin].out();
|
||||||
@ -566,9 +532,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
|
|||||||
// Ensure that the parent points to the clone, not to the original.
|
// Ensure that the parent points to the clone, not to the original.
|
||||||
// Note that this could leave the head unreachable except via the cache.
|
// Note that this could leave the head unreachable except via the cache.
|
||||||
br = byterange;
|
br = byterange;
|
||||||
if (f.end.p == 0)
|
if (f.end.head == 0)
|
||||||
root = br;
|
root = br;
|
||||||
else if (f.end.p&1)
|
else if (f.end.head&1)
|
||||||
inst_[f.begin].out1_ = br;
|
inst_[f.begin].out1_ = br;
|
||||||
else
|
else
|
||||||
inst_[f.begin].set_out(br);
|
inst_[f.begin].set_out(br);
|
||||||
@ -601,7 +567,7 @@ bool Compiler::ByteRangeEqual(int id1, int id2) {
|
|||||||
Frag Compiler::FindByteRange(int root, int id) {
|
Frag Compiler::FindByteRange(int root, int id) {
|
||||||
if (inst_[root].opcode() == kInstByteRange) {
|
if (inst_[root].opcode() == kInstByteRange) {
|
||||||
if (ByteRangeEqual(root, id))
|
if (ByteRangeEqual(root, id))
|
||||||
return Frag(root, nullPatchList);
|
return Frag(root, kNullPatchList);
|
||||||
else
|
else
|
||||||
return NoMatch();
|
return NoMatch();
|
||||||
}
|
}
|
||||||
@ -662,48 +628,43 @@ void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
|
|||||||
static_cast<uint8_t>(hi), foldcase, 0));
|
static_cast<uint8_t>(hi), foldcase, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Table describing how to make a UTF-8 matching machine
|
|
||||||
// for the rune range 80-10FFFF (Runeself-Runemax).
|
|
||||||
// This range happens frequently enough (for example /./ and /[^a-z]/)
|
|
||||||
// and the rune_cache_ map is slow enough that this is worth
|
|
||||||
// special handling. Makes compilation of a small expression
|
|
||||||
// with a dot in it about 10% faster.
|
|
||||||
// The * in the comments below mark whole sequences.
|
|
||||||
static struct ByteRangeProg {
|
|
||||||
int next;
|
|
||||||
int lo;
|
|
||||||
int hi;
|
|
||||||
} prog_80_10ffff[] = {
|
|
||||||
// Two-byte
|
|
||||||
{ -1, 0x80, 0xBF, }, // 0: 80-BF
|
|
||||||
{ 0, 0xC2, 0xDF, }, // 1: C2-DF 80-BF*
|
|
||||||
|
|
||||||
// Three-byte
|
|
||||||
{ 0, 0xA0, 0xBF, }, // 2: A0-BF 80-BF
|
|
||||||
{ 2, 0xE0, 0xE0, }, // 3: E0 A0-BF 80-BF*
|
|
||||||
{ 0, 0x80, 0xBF, }, // 4: 80-BF 80-BF
|
|
||||||
{ 4, 0xE1, 0xEF, }, // 5: E1-EF 80-BF 80-BF*
|
|
||||||
|
|
||||||
// Four-byte
|
|
||||||
{ 4, 0x90, 0xBF, }, // 6: 90-BF 80-BF 80-BF
|
|
||||||
{ 6, 0xF0, 0xF0, }, // 7: F0 90-BF 80-BF 80-BF*
|
|
||||||
{ 4, 0x80, 0xBF, }, // 8: 80-BF 80-BF 80-BF
|
|
||||||
{ 8, 0xF1, 0xF3, }, // 9: F1-F3 80-BF 80-BF 80-BF*
|
|
||||||
{ 4, 0x80, 0x8F, }, // 10: 80-8F 80-BF 80-BF
|
|
||||||
{ 10, 0xF4, 0xF4, }, // 11: F4 80-8F 80-BF 80-BF*
|
|
||||||
};
|
|
||||||
|
|
||||||
void Compiler::Add_80_10ffff() {
|
void Compiler::Add_80_10ffff() {
|
||||||
int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning
|
// The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough
|
||||||
for (size_t i = 0; i < arraysize(prog_80_10ffff); i++) {
|
// (for example, for /./ and /[^a-z]/) that it is worth simplifying: by
|
||||||
const ByteRangeProg& p = prog_80_10ffff[i];
|
// permitting overlong encodings in E0 and F0 sequences and code points
|
||||||
int next = 0;
|
// over 10FFFF in F4 sequences, the size of the bytecode and the number
|
||||||
if (p.next >= 0)
|
// of equivalence classes are reduced significantly.
|
||||||
next = inst[p.next];
|
int id;
|
||||||
inst[i] = UncachedRuneByteSuffix(static_cast<uint8_t>(p.lo),
|
if (reversed_) {
|
||||||
static_cast<uint8_t>(p.hi), false, next);
|
// Prefix factoring matters, but we don't have to handle it here
|
||||||
if ((p.lo & 0xC0) != 0x80)
|
// because the rune range trie logic takes care of that already.
|
||||||
AddSuffix(inst[i]);
|
id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0);
|
||||||
|
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||||
|
AddSuffix(id);
|
||||||
|
|
||||||
|
id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0);
|
||||||
|
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||||
|
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||||
|
AddSuffix(id);
|
||||||
|
|
||||||
|
id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0);
|
||||||
|
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||||
|
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||||
|
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||||
|
AddSuffix(id);
|
||||||
|
} else {
|
||||||
|
// Suffix factoring matters - and we do have to handle it here.
|
||||||
|
int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0);
|
||||||
|
id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1);
|
||||||
|
AddSuffix(id);
|
||||||
|
|
||||||
|
int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1);
|
||||||
|
id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2);
|
||||||
|
AddSuffix(id);
|
||||||
|
|
||||||
|
int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2);
|
||||||
|
id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3);
|
||||||
|
AddSuffix(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -711,9 +672,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
|
|||||||
if (lo > hi)
|
if (lo > hi)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Pick off 80-10FFFF as a common special case
|
// Pick off 80-10FFFF as a common special case.
|
||||||
// that can bypass the slow rune_cache_.
|
if (lo == 0x80 && hi == 0x10ffff) {
|
||||||
if (lo == 0x80 && hi == 0x10ffff && !reversed_) {
|
|
||||||
Add_80_10ffff();
|
Add_80_10ffff();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1095,8 +1055,6 @@ static bool IsAnchorEnd(Regexp** pre, int depth) {
|
|||||||
|
|
||||||
void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
|
void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
|
||||||
RE2::Anchor anchor) {
|
RE2::Anchor anchor) {
|
||||||
prog_->set_flags(flags);
|
|
||||||
|
|
||||||
if (flags & Regexp::Latin1)
|
if (flags & Regexp::Latin1)
|
||||||
encoding_ = kEncodingLatin1;
|
encoding_ = kEncodingLatin1;
|
||||||
max_mem_ = max_mem;
|
max_mem_ = max_mem;
|
||||||
@ -1117,14 +1075,11 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
|
|||||||
// on the program.)
|
// on the program.)
|
||||||
if (m >= 1<<24)
|
if (m >= 1<<24)
|
||||||
m = 1<<24;
|
m = 1<<24;
|
||||||
|
|
||||||
// Inst imposes its own limit (currently bigger than 2^24 but be safe).
|
// Inst imposes its own limit (currently bigger than 2^24 but be safe).
|
||||||
if (m > Prog::Inst::kMaxInst)
|
if (m > Prog::Inst::kMaxInst)
|
||||||
m = Prog::Inst::kMaxInst;
|
m = Prog::Inst::kMaxInst;
|
||||||
|
|
||||||
max_ninst_ = static_cast<int>(m);
|
max_ninst_ = static_cast<int>(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
anchor_ = anchor;
|
anchor_ = anchor;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1178,10 +1133,10 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
|
|||||||
c.prog_->set_start_unanchored(all.begin);
|
c.prog_->set_start_unanchored(all.begin);
|
||||||
|
|
||||||
// Hand ownership of prog_ to caller.
|
// Hand ownership of prog_ to caller.
|
||||||
return c.Finish();
|
return c.Finish(re);
|
||||||
}
|
}
|
||||||
|
|
||||||
Prog* Compiler::Finish() {
|
Prog* Compiler::Finish(Regexp* re) {
|
||||||
if (failed_)
|
if (failed_)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -1198,6 +1153,17 @@ Prog* Compiler::Finish() {
|
|||||||
prog_->Flatten();
|
prog_->Flatten();
|
||||||
prog_->ComputeByteMap();
|
prog_->ComputeByteMap();
|
||||||
|
|
||||||
|
if (!prog_->reversed()) {
|
||||||
|
std::string prefix;
|
||||||
|
bool prefix_foldcase;
|
||||||
|
if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase) &&
|
||||||
|
!prefix_foldcase) {
|
||||||
|
prog_->prefix_size_ = prefix.size();
|
||||||
|
prog_->prefix_front_ = prefix.front();
|
||||||
|
prog_->prefix_back_ = prefix.back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Record remaining memory for DFA.
|
// Record remaining memory for DFA.
|
||||||
if (max_mem_ <= 0) {
|
if (max_mem_ <= 0) {
|
||||||
prog_->set_dfa_mem(1<<20);
|
prog_->set_dfa_mem(1<<20);
|
||||||
@ -1254,7 +1220,7 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
|
|||||||
c.prog_->set_start(all.begin);
|
c.prog_->set_start(all.begin);
|
||||||
c.prog_->set_start_unanchored(all.begin);
|
c.prog_->set_start_unanchored(all.begin);
|
||||||
|
|
||||||
Prog* prog = c.Finish();
|
Prog* prog = c.Finish(re);
|
||||||
if (prog == NULL)
|
if (prog == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
281
extern/re2/re2/dfa.cc
vendored
281
extern/re2/re2/dfa.cc
vendored
@ -39,10 +39,11 @@
|
|||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/mix.h"
|
#include "util/mix.h"
|
||||||
#include "util/mutex.h"
|
#include "util/mutex.h"
|
||||||
#include "util/pod_array.h"
|
|
||||||
#include "util/sparse_set.h"
|
|
||||||
#include "util/strutil.h"
|
#include "util/strutil.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
|
#include "re2/re2.h"
|
||||||
|
#include "re2/sparse_set.h"
|
||||||
#include "re2/stringpiece.h"
|
#include "re2/stringpiece.h"
|
||||||
|
|
||||||
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
|
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
|
||||||
@ -52,17 +53,6 @@
|
|||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
#if !defined(__linux__) /* only Linux seems to have memrchr */
|
|
||||||
static void* memrchr(const void* s, int c, size_t n) {
|
|
||||||
const unsigned char* p = (const unsigned char*)s;
|
|
||||||
for (p += n; n > 0; n--)
|
|
||||||
if (*--p == c)
|
|
||||||
return (void*)p;
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Controls whether the DFA should bail out early if the NFA would be faster.
|
// Controls whether the DFA should bail out early if the NFA would be faster.
|
||||||
static bool dfa_should_bail_when_slow = true;
|
static bool dfa_should_bail_when_slow = true;
|
||||||
|
|
||||||
@ -177,11 +167,8 @@ class DFA {
|
|||||||
typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
|
typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Special "first_byte" values for a state. (Values >= 0 denote actual bytes.)
|
// Make it easier to swap in a scalable reader-writer mutex.
|
||||||
enum {
|
using CacheMutex = Mutex;
|
||||||
kFbUnknown = -1, // No analysis has been performed.
|
|
||||||
kFbNone = -2, // The first-byte trick cannot be used.
|
|
||||||
};
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
// Indices into start_ for unanchored searches.
|
// Indices into start_ for unanchored searches.
|
||||||
@ -249,25 +236,26 @@ class DFA {
|
|||||||
struct SearchParams {
|
struct SearchParams {
|
||||||
SearchParams(const StringPiece& text, const StringPiece& context,
|
SearchParams(const StringPiece& text, const StringPiece& context,
|
||||||
RWLocker* cache_lock)
|
RWLocker* cache_lock)
|
||||||
: text(text), context(context),
|
: text(text),
|
||||||
|
context(context),
|
||||||
anchored(false),
|
anchored(false),
|
||||||
|
can_prefix_accel(false),
|
||||||
want_earliest_match(false),
|
want_earliest_match(false),
|
||||||
run_forward(false),
|
run_forward(false),
|
||||||
start(NULL),
|
start(NULL),
|
||||||
first_byte(kFbUnknown),
|
|
||||||
cache_lock(cache_lock),
|
cache_lock(cache_lock),
|
||||||
failed(false),
|
failed(false),
|
||||||
ep(NULL),
|
ep(NULL),
|
||||||
matches(NULL) { }
|
matches(NULL) {}
|
||||||
|
|
||||||
StringPiece text;
|
StringPiece text;
|
||||||
StringPiece context;
|
StringPiece context;
|
||||||
bool anchored;
|
bool anchored;
|
||||||
|
bool can_prefix_accel;
|
||||||
bool want_earliest_match;
|
bool want_earliest_match;
|
||||||
bool run_forward;
|
bool run_forward;
|
||||||
State* start;
|
State* start;
|
||||||
int first_byte;
|
RWLocker* cache_lock;
|
||||||
RWLocker *cache_lock;
|
|
||||||
bool failed; // "out" parameter: whether search gave up
|
bool failed; // "out" parameter: whether search gave up
|
||||||
const char* ep; // "out" parameter: end pointer for match
|
const char* ep; // "out" parameter: end pointer for match
|
||||||
SparseSet* matches;
|
SparseSet* matches;
|
||||||
@ -278,15 +266,13 @@ class DFA {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Before each search, the parameters to Search are analyzed by
|
// Before each search, the parameters to Search are analyzed by
|
||||||
// AnalyzeSearch to determine the state in which to start and the
|
// AnalyzeSearch to determine the state in which to start.
|
||||||
// "first_byte" for that state, if any.
|
|
||||||
struct StartInfo {
|
struct StartInfo {
|
||||||
StartInfo() : start(NULL), first_byte(kFbUnknown) {}
|
StartInfo() : start(NULL) {}
|
||||||
State* start;
|
std::atomic<State*> start;
|
||||||
std::atomic<int> first_byte;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Fills in params->start and params->first_byte using
|
// Fills in params->start and params->can_prefix_accel using
|
||||||
// the other search parameters. Returns true on success,
|
// the other search parameters. Returns true on success,
|
||||||
// false on failure.
|
// false on failure.
|
||||||
// cache_mutex_.r <= L < mutex_
|
// cache_mutex_.r <= L < mutex_
|
||||||
@ -297,10 +283,10 @@ class DFA {
|
|||||||
// The generic search loop, inlined to create specialized versions.
|
// The generic search loop, inlined to create specialized versions.
|
||||||
// cache_mutex_.r <= L < mutex_
|
// cache_mutex_.r <= L < mutex_
|
||||||
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
||||||
inline bool InlinedSearchLoop(SearchParams* params,
|
template <bool can_prefix_accel,
|
||||||
bool have_first_byte,
|
bool want_earliest_match,
|
||||||
bool want_earliest_match,
|
bool run_forward>
|
||||||
bool run_forward);
|
inline bool InlinedSearchLoop(SearchParams* params);
|
||||||
|
|
||||||
// The specialized versions of InlinedSearchLoop. The three letters
|
// The specialized versions of InlinedSearchLoop. The three letters
|
||||||
// at the ends of the name denote the true/false values used as the
|
// at the ends of the name denote the true/false values used as the
|
||||||
@ -322,13 +308,6 @@ class DFA {
|
|||||||
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
||||||
bool FastSearchLoop(SearchParams* params);
|
bool FastSearchLoop(SearchParams* params);
|
||||||
|
|
||||||
// For debugging, a slow search loop that calls InlinedSearchLoop
|
|
||||||
// directly -- because the booleans passed are not constants, the
|
|
||||||
// loop is not specialized like the SearchFFF etc. versions, so it
|
|
||||||
// runs much more slowly. Useful only for debugging.
|
|
||||||
// cache_mutex_.r <= L < mutex_
|
|
||||||
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
|
||||||
bool SlowSearchLoop(SearchParams* params);
|
|
||||||
|
|
||||||
// Looks up bytes in bytemap_ but handles case c == kByteEndText too.
|
// Looks up bytes in bytemap_ but handles case c == kByteEndText too.
|
||||||
int ByteMap(int c) {
|
int ByteMap(int c) {
|
||||||
@ -355,11 +334,14 @@ class DFA {
|
|||||||
// while holding cache_mutex_ for writing, to avoid interrupting other
|
// while holding cache_mutex_ for writing, to avoid interrupting other
|
||||||
// readers. Any State* pointers are only valid while cache_mutex_
|
// readers. Any State* pointers are only valid while cache_mutex_
|
||||||
// is held.
|
// is held.
|
||||||
Mutex cache_mutex_;
|
CacheMutex cache_mutex_;
|
||||||
int64_t mem_budget_; // Total memory budget for all States.
|
int64_t mem_budget_; // Total memory budget for all States.
|
||||||
int64_t state_budget_; // Amount of memory remaining for new States.
|
int64_t state_budget_; // Amount of memory remaining for new States.
|
||||||
StateSet state_cache_; // All States computed so far.
|
StateSet state_cache_; // All States computed so far.
|
||||||
StartInfo start_[kMaxStart];
|
StartInfo start_[kMaxStart];
|
||||||
|
|
||||||
|
DFA(const DFA&) = delete;
|
||||||
|
DFA& operator=(const DFA&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Shorthand for casting to uint8_t*.
|
// Shorthand for casting to uint8_t*.
|
||||||
@ -442,7 +424,7 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
|
|||||||
q1_(NULL),
|
q1_(NULL),
|
||||||
mem_budget_(max_mem) {
|
mem_budget_(max_mem) {
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
|
fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
|
||||||
int nmark = 0;
|
int nmark = 0;
|
||||||
if (kind_ == Prog::kLongestMatch)
|
if (kind_ == Prog::kLongestMatch)
|
||||||
nmark = prog_->size();
|
nmark = prog_->size();
|
||||||
@ -613,7 +595,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
|||||||
// Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
|
// Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
|
||||||
// those are the only operators with any effect in
|
// those are the only operators with any effect in
|
||||||
// RunWorkqOnEmptyString or RunWorkqOnByte.
|
// RunWorkqOnEmptyString or RunWorkqOnByte.
|
||||||
int* inst = new int[q->size()];
|
PODArray<int> inst(q->size());
|
||||||
int n = 0;
|
int n = 0;
|
||||||
uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
|
uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
|
||||||
bool sawmatch = false; // whether queue contains guaranteed kInstMatch
|
bool sawmatch = false; // whether queue contains guaranteed kInstMatch
|
||||||
@ -643,7 +625,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
|||||||
(it == q->begin() && ip->greedy(prog_))) &&
|
(it == q->begin() && ip->greedy(prog_))) &&
|
||||||
(kind_ != Prog::kLongestMatch || !sawmark) &&
|
(kind_ != Prog::kLongestMatch || !sawmark) &&
|
||||||
(flag & kFlagMatch)) {
|
(flag & kFlagMatch)) {
|
||||||
delete[] inst;
|
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, " -> FullMatchState\n");
|
fprintf(stderr, " -> FullMatchState\n");
|
||||||
return FullMatchState;
|
return FullMatchState;
|
||||||
@ -690,7 +671,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
|||||||
// the execution loop can stop early. This is only okay
|
// the execution loop can stop early. This is only okay
|
||||||
// if the state is *not* a matching state.
|
// if the state is *not* a matching state.
|
||||||
if (n == 0 && flag == 0) {
|
if (n == 0 && flag == 0) {
|
||||||
delete[] inst;
|
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, " -> DeadState\n");
|
fprintf(stderr, " -> DeadState\n");
|
||||||
return DeadState;
|
return DeadState;
|
||||||
@ -700,7 +680,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
|||||||
// unordered state sets separated by Marks. Sort each set
|
// unordered state sets separated by Marks. Sort each set
|
||||||
// to canonicalize, to reduce the number of distinct sets stored.
|
// to canonicalize, to reduce the number of distinct sets stored.
|
||||||
if (kind_ == Prog::kLongestMatch) {
|
if (kind_ == Prog::kLongestMatch) {
|
||||||
int* ip = inst;
|
int* ip = inst.data();
|
||||||
int* ep = ip + n;
|
int* ep = ip + n;
|
||||||
while (ip < ep) {
|
while (ip < ep) {
|
||||||
int* markp = ip;
|
int* markp = ip;
|
||||||
@ -717,7 +697,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
|||||||
// we have an unordered set of states (i.e. we don't have Marks)
|
// we have an unordered set of states (i.e. we don't have Marks)
|
||||||
// and sorting will reduce the number of distinct sets stored.
|
// and sorting will reduce the number of distinct sets stored.
|
||||||
if (kind_ == Prog::kManyMatch) {
|
if (kind_ == Prog::kManyMatch) {
|
||||||
int* ip = inst;
|
int* ip = inst.data();
|
||||||
int* ep = ip + n;
|
int* ep = ip + n;
|
||||||
std::sort(ip, ep);
|
std::sort(ip, ep);
|
||||||
}
|
}
|
||||||
@ -736,8 +716,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
|||||||
// Save the needed empty-width flags in the top bits for use later.
|
// Save the needed empty-width flags in the top bits for use later.
|
||||||
flag |= needflags << kFlagNeedShift;
|
flag |= needflags << kFlagNeedShift;
|
||||||
|
|
||||||
State* state = CachedState(inst, n, flag);
|
State* state = CachedState(inst.data(), n, flag);
|
||||||
delete[] inst;
|
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -971,8 +950,21 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case kInstByteRange: // can follow if c is in range
|
case kInstByteRange: // can follow if c is in range
|
||||||
if (ip->Matches(c))
|
if (!ip->Matches(c))
|
||||||
AddToQueue(newq, ip->out(), flag);
|
break;
|
||||||
|
AddToQueue(newq, ip->out(), flag);
|
||||||
|
if (ip->hint() != 0) {
|
||||||
|
// We have a hint, but we must cancel out the
|
||||||
|
// increment that will occur after the break.
|
||||||
|
i += ip->hint() - 1;
|
||||||
|
} else {
|
||||||
|
// We have no hint, so we must find the end
|
||||||
|
// of the current list and then skip to it.
|
||||||
|
Prog::Inst* ip0 = ip;
|
||||||
|
while (!ip->last())
|
||||||
|
++ip;
|
||||||
|
i += ip - ip0;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case kInstMatch:
|
case kInstMatch:
|
||||||
@ -989,8 +981,8 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(),
|
fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
|
||||||
c, flag, DumpWorkq(newq).c_str(), *ismatch);
|
DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Processes input byte c in state, returning new state.
|
// Processes input byte c in state, returning new state.
|
||||||
@ -1117,7 +1109,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
|
|||||||
|
|
||||||
class DFA::RWLocker {
|
class DFA::RWLocker {
|
||||||
public:
|
public:
|
||||||
explicit RWLocker(Mutex* mu);
|
explicit RWLocker(CacheMutex* mu);
|
||||||
~RWLocker();
|
~RWLocker();
|
||||||
|
|
||||||
// If the lock is only held for reading right now,
|
// If the lock is only held for reading right now,
|
||||||
@ -1127,19 +1119,19 @@ class DFA::RWLocker {
|
|||||||
void LockForWriting();
|
void LockForWriting();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Mutex* mu_;
|
CacheMutex* mu_;
|
||||||
bool writing_;
|
bool writing_;
|
||||||
|
|
||||||
RWLocker(const RWLocker&) = delete;
|
RWLocker(const RWLocker&) = delete;
|
||||||
RWLocker& operator=(const RWLocker&) = delete;
|
RWLocker& operator=(const RWLocker&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) {
|
DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
|
||||||
mu_->ReaderLock();
|
mu_->ReaderLock();
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations
|
// This function is marked as NO_THREAD_SAFETY_ANALYSIS because
|
||||||
// does not support lock upgrade.
|
// the annotations don't support lock upgrade.
|
||||||
void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
|
void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
|
||||||
if (!writing_) {
|
if (!writing_) {
|
||||||
mu_->ReaderUnlock();
|
mu_->ReaderUnlock();
|
||||||
@ -1171,11 +1163,14 @@ void DFA::ResetCache(RWLocker* cache_lock) {
|
|||||||
// Re-acquire the cache_mutex_ for writing (exclusive use).
|
// Re-acquire the cache_mutex_ for writing (exclusive use).
|
||||||
cache_lock->LockForWriting();
|
cache_lock->LockForWriting();
|
||||||
|
|
||||||
|
hooks::GetDFAStateCacheResetHook()({
|
||||||
|
state_budget_,
|
||||||
|
state_cache_.size(),
|
||||||
|
});
|
||||||
|
|
||||||
// Clear the cache, reset the memory budget.
|
// Clear the cache, reset the memory budget.
|
||||||
for (int i = 0; i < kMaxStart; i++) {
|
for (int i = 0; i < kMaxStart; i++)
|
||||||
start_[i].start = NULL;
|
start_[i].start.store(NULL, std::memory_order_relaxed);
|
||||||
start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
|
|
||||||
}
|
|
||||||
ClearCache();
|
ClearCache();
|
||||||
mem_budget_ = state_budget_;
|
mem_budget_ = state_budget_;
|
||||||
}
|
}
|
||||||
@ -1290,8 +1285,7 @@ DFA::State* DFA::StateSaver::Restore() {
|
|||||||
// situation, the DFA can do better than executing the simple loop.
|
// situation, the DFA can do better than executing the simple loop.
|
||||||
// Instead, it can call memchr to search very quickly for the byte c.
|
// Instead, it can call memchr to search very quickly for the byte c.
|
||||||
// Whether the start state has this property is determined during a
|
// Whether the start state has this property is determined during a
|
||||||
// pre-compilation pass, and if so, the byte b is passed to the search
|
// pre-compilation pass and the "can_prefix_accel" argument is set.
|
||||||
// loop as the "first_byte" argument, along with a boolean "have_first_byte".
|
|
||||||
//
|
//
|
||||||
// Fourth, the desired behavior is to search for the leftmost-best match
|
// Fourth, the desired behavior is to search for the leftmost-best match
|
||||||
// (approximately, the same one that Perl would find), which is not
|
// (approximately, the same one that Perl would find), which is not
|
||||||
@ -1323,15 +1317,16 @@ DFA::State* DFA::StateSaver::Restore() {
|
|||||||
// The bools are equal to the same-named variables in params, but
|
// The bools are equal to the same-named variables in params, but
|
||||||
// making them function arguments lets the inliner specialize
|
// making them function arguments lets the inliner specialize
|
||||||
// this function to each combination (see two paragraphs above).
|
// this function to each combination (see two paragraphs above).
|
||||||
inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
template <bool can_prefix_accel,
|
||||||
bool have_first_byte,
|
bool want_earliest_match,
|
||||||
bool want_earliest_match,
|
bool run_forward>
|
||||||
bool run_forward) {
|
inline bool DFA::InlinedSearchLoop(SearchParams* params) {
|
||||||
State* start = params->start;
|
State* start = params->start;
|
||||||
const uint8_t* bp = BytePtr(params->text.begin()); // start of text
|
const uint8_t* bp = BytePtr(params->text.data()); // start of text
|
||||||
const uint8_t* p = bp; // text scanning point
|
const uint8_t* p = bp; // text scanning point
|
||||||
const uint8_t* ep = BytePtr(params->text.end()); // end of text
|
const uint8_t* ep = BytePtr(params->text.data() +
|
||||||
const uint8_t* resetp = NULL; // p at last cache reset
|
params->text.size()); // end of text
|
||||||
|
const uint8_t* resetp = NULL; // p at last cache reset
|
||||||
if (!run_forward) {
|
if (!run_forward) {
|
||||||
using std::swap;
|
using std::swap;
|
||||||
swap(p, ep);
|
swap(p, ep);
|
||||||
@ -1366,25 +1361,16 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
|||||||
|
|
||||||
while (p != ep) {
|
while (p != ep) {
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, "@%td: %s\n",
|
fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
|
||||||
p - bp, DumpState(s).c_str());
|
|
||||||
|
|
||||||
if (have_first_byte && s == start) {
|
if (can_prefix_accel && s == start) {
|
||||||
// In start state, only way out is to find first_byte,
|
// In start state, only way out is to find the prefix,
|
||||||
// so use optimized assembly in memchr to skip ahead.
|
// so we use prefix accel (e.g. memchr) to skip ahead.
|
||||||
// If first_byte isn't found, we can skip to the end
|
// If not found, we can skip to the end of the string.
|
||||||
// of the string.
|
p = BytePtr(prog_->PrefixAccel(p, ep - p));
|
||||||
if (run_forward) {
|
if (p == NULL) {
|
||||||
if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
|
p = ep;
|
||||||
p = ep;
|
break;
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if ((p = BytePtr(memrchr(ep, params->first_byte, p - ep))) == NULL) {
|
|
||||||
p = ep;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
p++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1475,8 +1461,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
|||||||
else
|
else
|
||||||
lastmatch = p + 1;
|
lastmatch = p + 1;
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, "match @%td! [%s]\n",
|
fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
|
||||||
lastmatch - bp, DumpState(s).c_str());
|
|
||||||
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
|
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
|
||||||
for (int i = s->ninst_ - 1; i >= 0; i--) {
|
for (int i = s->ninst_ - 1; i >= 0; i--) {
|
||||||
int id = s->inst_[i];
|
int id = s->inst_[i];
|
||||||
@ -1560,36 +1545,28 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
|||||||
|
|
||||||
// Inline specializations of the general loop.
|
// Inline specializations of the general loop.
|
||||||
bool DFA::SearchFFF(SearchParams* params) {
|
bool DFA::SearchFFF(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 0, 0, 0);
|
return InlinedSearchLoop<false, false, false>(params);
|
||||||
}
|
}
|
||||||
bool DFA::SearchFFT(SearchParams* params) {
|
bool DFA::SearchFFT(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 0, 0, 1);
|
return InlinedSearchLoop<false, false, true>(params);
|
||||||
}
|
}
|
||||||
bool DFA::SearchFTF(SearchParams* params) {
|
bool DFA::SearchFTF(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 0, 1, 0);
|
return InlinedSearchLoop<false, true, false>(params);
|
||||||
}
|
}
|
||||||
bool DFA::SearchFTT(SearchParams* params) {
|
bool DFA::SearchFTT(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 0, 1, 1);
|
return InlinedSearchLoop<false, true, true>(params);
|
||||||
}
|
}
|
||||||
bool DFA::SearchTFF(SearchParams* params) {
|
bool DFA::SearchTFF(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 1, 0, 0);
|
return InlinedSearchLoop<true, false, false>(params);
|
||||||
}
|
}
|
||||||
bool DFA::SearchTFT(SearchParams* params) {
|
bool DFA::SearchTFT(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 1, 0, 1);
|
return InlinedSearchLoop<true, false, true>(params);
|
||||||
}
|
}
|
||||||
bool DFA::SearchTTF(SearchParams* params) {
|
bool DFA::SearchTTF(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 1, 1, 0);
|
return InlinedSearchLoop<true, true, false>(params);
|
||||||
}
|
}
|
||||||
bool DFA::SearchTTT(SearchParams* params) {
|
bool DFA::SearchTTT(SearchParams* params) {
|
||||||
return InlinedSearchLoop(params, 1, 1, 1);
|
return InlinedSearchLoop<true, true, true>(params);
|
||||||
}
|
|
||||||
|
|
||||||
// For debugging, calls the general code directly.
|
|
||||||
bool DFA::SlowSearchLoop(SearchParams* params) {
|
|
||||||
return InlinedSearchLoop(params,
|
|
||||||
params->first_byte >= 0,
|
|
||||||
params->want_earliest_match,
|
|
||||||
params->run_forward);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// For performance, calls the appropriate specialized version
|
// For performance, calls the appropriate specialized version
|
||||||
@ -1608,8 +1585,7 @@ bool DFA::FastSearchLoop(SearchParams* params) {
|
|||||||
&DFA::SearchTTT,
|
&DFA::SearchTTT,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool have_first_byte = params->first_byte >= 0;
|
int index = 4 * params->can_prefix_accel +
|
||||||
int index = 4 * have_first_byte +
|
|
||||||
2 * params->want_earliest_match +
|
2 * params->want_earliest_match +
|
||||||
1 * params->run_forward;
|
1 * params->run_forward;
|
||||||
return (this->*Searches[index])(params);
|
return (this->*Searches[index])(params);
|
||||||
@ -1701,13 +1677,22 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ExtraDebug)
|
params->start = info->start.load(std::memory_order_acquire);
|
||||||
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
|
|
||||||
params->anchored, params->run_forward, flags,
|
|
||||||
DumpState(info->start).c_str(), info->first_byte.load());
|
|
||||||
|
|
||||||
params->start = info->start;
|
// Even if we could prefix accel, we cannot do so when anchored and,
|
||||||
params->first_byte = info->first_byte.load(std::memory_order_acquire);
|
// less obviously, we cannot do so when we are going to need flags.
|
||||||
|
// This trick works only when there is a single byte that leads to a
|
||||||
|
// different state!
|
||||||
|
if (prog_->can_prefix_accel() &&
|
||||||
|
!params->anchored &&
|
||||||
|
params->start > SpecialStateMax &&
|
||||||
|
params->start->flag_ >> kFlagNeedShift == 0)
|
||||||
|
params->can_prefix_accel = true;
|
||||||
|
|
||||||
|
if (ExtraDebug)
|
||||||
|
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
|
||||||
|
params->anchored, params->run_forward, flags,
|
||||||
|
DumpState(params->start).c_str(), params->can_prefix_accel);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1716,47 +1701,25 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
|||||||
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
|
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
|
||||||
uint32_t flags) {
|
uint32_t flags) {
|
||||||
// Quick check.
|
// Quick check.
|
||||||
int fb = info->first_byte.load(std::memory_order_acquire);
|
State* start = info->start.load(std::memory_order_acquire);
|
||||||
if (fb != kFbUnknown)
|
if (start != NULL)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
fb = info->first_byte.load(std::memory_order_relaxed);
|
start = info->start.load(std::memory_order_relaxed);
|
||||||
if (fb != kFbUnknown)
|
if (start != NULL)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
q0_->clear();
|
q0_->clear();
|
||||||
AddToQueue(q0_,
|
AddToQueue(q0_,
|
||||||
params->anchored ? prog_->start() : prog_->start_unanchored(),
|
params->anchored ? prog_->start() : prog_->start_unanchored(),
|
||||||
flags);
|
flags);
|
||||||
info->start = WorkqToCachedState(q0_, NULL, flags);
|
start = WorkqToCachedState(q0_, NULL, flags);
|
||||||
if (info->start == NULL)
|
if (start == NULL)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (info->start == DeadState) {
|
|
||||||
// Synchronize with "quick check" above.
|
|
||||||
info->first_byte.store(kFbNone, std::memory_order_release);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info->start == FullMatchState) {
|
|
||||||
// Synchronize with "quick check" above.
|
|
||||||
info->first_byte.store(kFbNone, std::memory_order_release); // will be ignored
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Even if we have a first_byte, we cannot use it when anchored and,
|
|
||||||
// less obviously, we cannot use it when we are going to need flags.
|
|
||||||
// This trick works only when there is a single byte that leads to a
|
|
||||||
// different state!
|
|
||||||
int first_byte = prog_->first_byte();
|
|
||||||
if (first_byte == -1 ||
|
|
||||||
params->anchored ||
|
|
||||||
info->start->flag_ >> kFlagNeedShift != 0)
|
|
||||||
first_byte = kFbNone;
|
|
||||||
|
|
||||||
// Synchronize with "quick check" above.
|
// Synchronize with "quick check" above.
|
||||||
info->first_byte.store(first_byte, std::memory_order_release);
|
info->start.store(start, std::memory_order_release);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1779,8 +1742,7 @@ bool DFA::Search(const StringPiece& text,
|
|||||||
if (ExtraDebug) {
|
if (ExtraDebug) {
|
||||||
fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
|
fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
|
||||||
fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
|
fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
|
||||||
std::string(text).c_str(), anchored, want_earliest_match,
|
std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
|
||||||
run_forward, kind_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RWLocker l(&cache_mutex_);
|
RWLocker l(&cache_mutex_);
|
||||||
@ -1798,9 +1760,9 @@ bool DFA::Search(const StringPiece& text,
|
|||||||
return false;
|
return false;
|
||||||
if (params.start == FullMatchState) {
|
if (params.start == FullMatchState) {
|
||||||
if (run_forward == want_earliest_match)
|
if (run_forward == want_earliest_match)
|
||||||
*epp = text.begin();
|
*epp = text.data();
|
||||||
else
|
else
|
||||||
*epp = text.end();
|
*epp = text.data() + text.size();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
@ -1863,15 +1825,15 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
|
|||||||
*failed = false;
|
*failed = false;
|
||||||
|
|
||||||
StringPiece context = const_context;
|
StringPiece context = const_context;
|
||||||
if (context.begin() == NULL)
|
if (context.data() == NULL)
|
||||||
context = text;
|
context = text;
|
||||||
bool carat = anchor_start();
|
bool caret = anchor_start();
|
||||||
bool dollar = anchor_end();
|
bool dollar = anchor_end();
|
||||||
if (reversed_) {
|
if (reversed_) {
|
||||||
using std::swap;
|
using std::swap;
|
||||||
swap(carat, dollar);
|
swap(caret, dollar);
|
||||||
}
|
}
|
||||||
if (carat && context.begin() != text.begin())
|
if (caret && context.begin() != text.begin())
|
||||||
return false;
|
return false;
|
||||||
if (dollar && context.end() != text.end())
|
if (dollar && context.end() != text.end())
|
||||||
return false;
|
return false;
|
||||||
@ -1906,11 +1868,15 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
|
|||||||
bool matched = dfa->Search(text, context, anchored,
|
bool matched = dfa->Search(text, context, anchored,
|
||||||
want_earliest_match, !reversed_,
|
want_earliest_match, !reversed_,
|
||||||
failed, &ep, matches);
|
failed, &ep, matches);
|
||||||
if (*failed)
|
if (*failed) {
|
||||||
|
hooks::GetDFASearchFailureHook()({
|
||||||
|
// Nothing yet...
|
||||||
|
});
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
if (!matched)
|
if (!matched)
|
||||||
return false;
|
return false;
|
||||||
if (endmatch && ep != (reversed_ ? text.begin() : text.end()))
|
if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// If caller cares, record the boundary of the match.
|
// If caller cares, record the boundary of the match.
|
||||||
@ -1918,10 +1884,11 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
|
|||||||
// as the beginning.
|
// as the beginning.
|
||||||
if (match0) {
|
if (match0) {
|
||||||
if (reversed_)
|
if (reversed_)
|
||||||
*match0 = StringPiece(ep, static_cast<size_t>(text.end() - ep));
|
*match0 =
|
||||||
|
StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
|
||||||
else
|
else
|
||||||
*match0 =
|
*match0 =
|
||||||
StringPiece(text.begin(), static_cast<size_t>(ep - text.begin()));
|
StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
20
extern/re2/re2/filtered_re2.cc
vendored
20
extern/re2/re2/filtered_re2.cc
vendored
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
@ -27,7 +28,22 @@ FilteredRE2::FilteredRE2(int min_atom_len)
|
|||||||
FilteredRE2::~FilteredRE2() {
|
FilteredRE2::~FilteredRE2() {
|
||||||
for (size_t i = 0; i < re2_vec_.size(); i++)
|
for (size_t i = 0; i < re2_vec_.size(); i++)
|
||||||
delete re2_vec_[i];
|
delete re2_vec_[i];
|
||||||
delete prefilter_tree_;
|
}
|
||||||
|
|
||||||
|
FilteredRE2::FilteredRE2(FilteredRE2&& other)
|
||||||
|
: re2_vec_(std::move(other.re2_vec_)),
|
||||||
|
compiled_(other.compiled_),
|
||||||
|
prefilter_tree_(std::move(other.prefilter_tree_)) {
|
||||||
|
other.re2_vec_.clear();
|
||||||
|
other.re2_vec_.shrink_to_fit();
|
||||||
|
other.compiled_ = false;
|
||||||
|
other.prefilter_tree_.reset(new PrefilterTree());
|
||||||
|
}
|
||||||
|
|
||||||
|
FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
|
||||||
|
this->~FilteredRE2();
|
||||||
|
(void) new (this) FilteredRE2(std::move(other));
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
|
RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
|
||||||
@ -38,7 +54,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
|
|||||||
if (!re->ok()) {
|
if (!re->ok()) {
|
||||||
if (options.log_errors()) {
|
if (options.log_errors()) {
|
||||||
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
|
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
|
||||||
<< re << " due to error " << re->error();
|
<< pattern << " due to error " << re->error();
|
||||||
}
|
}
|
||||||
delete re;
|
delete re;
|
||||||
} else {
|
} else {
|
||||||
|
39
extern/re2/re2/filtered_re2.h
vendored
39
extern/re2/re2/filtered_re2.h
vendored
@ -10,17 +10,18 @@
|
|||||||
// number of regexps that need to be actually searched.
|
// number of regexps that need to be actually searched.
|
||||||
//
|
//
|
||||||
// By design, it does not include a string matching engine. This is to
|
// By design, it does not include a string matching engine. This is to
|
||||||
// allow the user of the class to use their favorite string match
|
// allow the user of the class to use their favorite string matching
|
||||||
// engine. The overall flow is: Add all the regexps using Add, then
|
// engine. The overall flow is: Add all the regexps using Add, then
|
||||||
// Compile the FilteredRE2. The compile returns strings that need to
|
// Compile the FilteredRE2. Compile returns strings that need to be
|
||||||
// be matched. Note that all returned strings are lowercase. For
|
// matched. Note that the returned strings are lowercased and distinct.
|
||||||
// applying regexps to a search text, the caller does the string
|
// For applying regexps to a search text, the caller does the string
|
||||||
// matching using the strings returned. When doing the string match,
|
// matching using the returned strings. When doing the string match,
|
||||||
// note that the caller has to do that on lower cased version of the
|
// note that the caller has to do that in a case-insensitive way or
|
||||||
// search text. Then call FirstMatch or AllMatches with a vector of
|
// on a lowercased version of the search text. Then call FirstMatch
|
||||||
// indices of strings that were found in the text to get the actual
|
// or AllMatches with a vector of indices of strings that were found
|
||||||
// regexp matches.
|
// in the text to get the actual regexp matches.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -36,18 +37,25 @@ class FilteredRE2 {
|
|||||||
explicit FilteredRE2(int min_atom_len);
|
explicit FilteredRE2(int min_atom_len);
|
||||||
~FilteredRE2();
|
~FilteredRE2();
|
||||||
|
|
||||||
|
// Not copyable.
|
||||||
|
FilteredRE2(const FilteredRE2&) = delete;
|
||||||
|
FilteredRE2& operator=(const FilteredRE2&) = delete;
|
||||||
|
// Movable.
|
||||||
|
FilteredRE2(FilteredRE2&& other);
|
||||||
|
FilteredRE2& operator=(FilteredRE2&& other);
|
||||||
|
|
||||||
// Uses RE2 constructor to create a RE2 object (re). Returns
|
// Uses RE2 constructor to create a RE2 object (re). Returns
|
||||||
// re->error_code(). If error_code is other than NoError, then re is
|
// re->error_code(). If error_code is other than NoError, then re is
|
||||||
// deleted and not added to re2_vec_.
|
// deleted and not added to re2_vec_.
|
||||||
RE2::ErrorCode Add(const StringPiece& pattern,
|
RE2::ErrorCode Add(const StringPiece& pattern,
|
||||||
const RE2::Options& options,
|
const RE2::Options& options,
|
||||||
int *id);
|
int* id);
|
||||||
|
|
||||||
// Prepares the regexps added by Add for filtering. Returns a set
|
// Prepares the regexps added by Add for filtering. Returns a set
|
||||||
// of strings that the caller should check for in candidate texts.
|
// of strings that the caller should check for in candidate texts.
|
||||||
// The returned strings are lowercased. When doing string matching,
|
// The returned strings are lowercased and distinct. When doing
|
||||||
// the search text should be lowercased first to find matching
|
// string matching, it should be performed in a case-insensitive
|
||||||
// strings from the set of strings returned by Compile. Call after
|
// way or the search text should be lowercased first. Call after
|
||||||
// all Add calls are done.
|
// all Add calls are done.
|
||||||
void Compile(std::vector<std::string>* strings_to_match);
|
void Compile(std::vector<std::string>* strings_to_match);
|
||||||
|
|
||||||
@ -98,10 +106,7 @@ class FilteredRE2 {
|
|||||||
bool compiled_;
|
bool compiled_;
|
||||||
|
|
||||||
// An AND-OR tree of string atoms used for filtering regexps.
|
// An AND-OR tree of string atoms used for filtering regexps.
|
||||||
PrefilterTree* prefilter_tree_;
|
std::unique_ptr<PrefilterTree> prefilter_tree_;
|
||||||
|
|
||||||
FilteredRE2(const FilteredRE2&) = delete;
|
|
||||||
FilteredRE2& operator=(const FilteredRE2&) = delete;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
219
extern/re2/re2/fuzzing/compiler-rt/LICENSE
vendored
Normal file
219
extern/re2/re2/fuzzing/compiler-rt/LICENSE
vendored
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
--- LLVM Exceptions to the Apache 2.0 License ----
|
||||||
|
|
||||||
|
As an exception, if, as a result of your compiling your source code, portions
|
||||||
|
of this Software are embedded into an Object form of such source code, you
|
||||||
|
may redistribute such embedded portions in such Object form without complying
|
||||||
|
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
|
||||||
|
|
||||||
|
In addition, if you combine or link compiled forms of this Software with
|
||||||
|
software that is licensed under the GPLv2 ("Combined Software") and if a
|
||||||
|
court of competent jurisdiction determines that the patent provision (Section
|
||||||
|
3), the indemnity provision (Section 9) or other Section of the License
|
||||||
|
conflicts with the conditions of the GPLv2, you may retroactively and
|
||||||
|
prospectively choose to deem waived or otherwise exclude such Section(s) of
|
||||||
|
the License, but only in their entirety and only with respect to the Combined
|
||||||
|
Software.
|
||||||
|
|
305
extern/re2/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
vendored
Normal file
305
extern/re2/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
vendored
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// A single header library providing an utility class to break up an array of
|
||||||
|
// bytes. Whenever run on the same input, provides the same output, as long as
|
||||||
|
// its methods are called in the same order, with the same arguments.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
|
||||||
|
#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <climits>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <initializer_list>
|
||||||
|
#include <string>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
// In addition to the comments below, the API is also briefly documented at
|
||||||
|
// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
|
||||||
|
class FuzzedDataProvider {
|
||||||
|
public:
|
||||||
|
// |data| is an array of length |size| that the FuzzedDataProvider wraps to
|
||||||
|
// provide more granular access. |data| must outlive the FuzzedDataProvider.
|
||||||
|
FuzzedDataProvider(const uint8_t *data, size_t size)
|
||||||
|
: data_ptr_(data), remaining_bytes_(size) {}
|
||||||
|
~FuzzedDataProvider() = default;
|
||||||
|
|
||||||
|
// Returns a std::vector containing |num_bytes| of input data. If fewer than
|
||||||
|
// |num_bytes| of data remain, returns a shorter std::vector containing all
|
||||||
|
// of the data that's left. Can be used with any byte sized type, such as
|
||||||
|
// char, unsigned char, uint8_t, etc.
|
||||||
|
template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
|
||||||
|
num_bytes = std::min(num_bytes, remaining_bytes_);
|
||||||
|
return ConsumeBytes<T>(num_bytes, num_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Similar to |ConsumeBytes|, but also appends the terminator value at the end
|
||||||
|
// of the resulting vector. Useful, when a mutable null-terminated C-string is
|
||||||
|
// needed, for example. But that is a rare case. Better avoid it, if possible,
|
||||||
|
// and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
|
||||||
|
template <typename T>
|
||||||
|
std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
|
||||||
|
T terminator = 0) {
|
||||||
|
num_bytes = std::min(num_bytes, remaining_bytes_);
|
||||||
|
std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
|
||||||
|
result.back() = terminator;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a std::string containing |num_bytes| of input data. Using this and
|
||||||
|
// |.c_str()| on the resulting string is the best way to get an immutable
|
||||||
|
// null-terminated C string. If fewer than |num_bytes| of data remain, returns
|
||||||
|
// a shorter std::string containing all of the data that's left.
|
||||||
|
std::string ConsumeBytesAsString(size_t num_bytes) {
|
||||||
|
static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
|
||||||
|
"ConsumeBytesAsString cannot convert the data to a string.");
|
||||||
|
|
||||||
|
num_bytes = std::min(num_bytes, remaining_bytes_);
|
||||||
|
std::string result(
|
||||||
|
reinterpret_cast<const std::string::value_type *>(data_ptr_),
|
||||||
|
num_bytes);
|
||||||
|
Advance(num_bytes);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a number in the range [min, max] by consuming bytes from the
|
||||||
|
// input data. The value might not be uniformly distributed in the given
|
||||||
|
// range. If there's no input data left, always returns |min|. |min| must
|
||||||
|
// be less than or equal to |max|.
|
||||||
|
template <typename T> T ConsumeIntegralInRange(T min, T max) {
|
||||||
|
static_assert(std::is_integral<T>::value, "An integral type is required.");
|
||||||
|
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
|
||||||
|
|
||||||
|
if (min > max)
|
||||||
|
abort();
|
||||||
|
|
||||||
|
// Use the biggest type possible to hold the range and the result.
|
||||||
|
uint64_t range = static_cast<uint64_t>(max) - min;
|
||||||
|
uint64_t result = 0;
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
|
||||||
|
remaining_bytes_ != 0) {
|
||||||
|
// Pull bytes off the end of the seed data. Experimentally, this seems to
|
||||||
|
// allow the fuzzer to more easily explore the input space. This makes
|
||||||
|
// sense, since it works by modifying inputs that caused new code to run,
|
||||||
|
// and this data is often used to encode length of data read by
|
||||||
|
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
|
||||||
|
// contents of the data that is actually read.
|
||||||
|
--remaining_bytes_;
|
||||||
|
result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
|
||||||
|
offset += CHAR_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid division by 0, in case |range + 1| results in overflow.
|
||||||
|
if (range != std::numeric_limits<decltype(range)>::max())
|
||||||
|
result = result % (range + 1);
|
||||||
|
|
||||||
|
return static_cast<T>(min + result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a std::string of length from 0 to |max_length|. When it runs out of
|
||||||
|
// input data, returns what remains of the input. Designed to be more stable
|
||||||
|
// with respect to a fuzzer inserting characters than just picking a random
|
||||||
|
// length and then consuming that many bytes with |ConsumeBytes|.
|
||||||
|
std::string ConsumeRandomLengthString(size_t max_length) {
|
||||||
|
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
|
||||||
|
// followed by anything else to the end of the string. As a result of this
|
||||||
|
// logic, a fuzzer can insert characters into the string, and the string
|
||||||
|
// will be lengthened to include those new characters, resulting in a more
|
||||||
|
// stable fuzzer than picking the length of a string independently from
|
||||||
|
// picking its contents.
|
||||||
|
std::string result;
|
||||||
|
|
||||||
|
// Reserve the anticipated capaticity to prevent several reallocations.
|
||||||
|
result.reserve(std::min(max_length, remaining_bytes_));
|
||||||
|
for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
|
||||||
|
char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
|
||||||
|
Advance(1);
|
||||||
|
if (next == '\\' && remaining_bytes_ != 0) {
|
||||||
|
next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
|
||||||
|
Advance(1);
|
||||||
|
if (next != '\\')
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
result += next;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.shrink_to_fit();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a std::vector containing all remaining bytes of the input data.
|
||||||
|
template <typename T> std::vector<T> ConsumeRemainingBytes() {
|
||||||
|
return ConsumeBytes<T>(remaining_bytes_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a std::string containing all remaining bytes of the input data.
|
||||||
|
// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
|
||||||
|
// object.
|
||||||
|
std::string ConsumeRemainingBytesAsString() {
|
||||||
|
return ConsumeBytesAsString(remaining_bytes_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a number in the range [Type's min, Type's max]. The value might
|
||||||
|
// not be uniformly distributed in the given range. If there's no input data
|
||||||
|
// left, always returns |min|.
|
||||||
|
template <typename T> T ConsumeIntegral() {
|
||||||
|
return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
|
||||||
|
std::numeric_limits<T>::max());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads one byte and returns a bool, or false when no data remains.
|
||||||
|
bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
|
||||||
|
|
||||||
|
// Returns a copy of the value selected from the given fixed-size |array|.
|
||||||
|
template <typename T, size_t size>
|
||||||
|
T PickValueInArray(const T (&array)[size]) {
|
||||||
|
static_assert(size > 0, "The array must be non empty.");
|
||||||
|
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T PickValueInArray(std::initializer_list<const T> list) {
|
||||||
|
// TODO(Dor1s): switch to static_assert once C++14 is allowed.
|
||||||
|
if (!list.size())
|
||||||
|
abort();
|
||||||
|
|
||||||
|
return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns an enum value. The enum must start at 0 and be contiguous. It must
|
||||||
|
// also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
|
||||||
|
// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
|
||||||
|
template <typename T> T ConsumeEnum() {
|
||||||
|
static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
|
||||||
|
return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
|
||||||
|
0, static_cast<uint32_t>(T::kMaxValue)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a floating point number in the range [0.0, 1.0]. If there's no
|
||||||
|
// input data left, always returns 0.
|
||||||
|
template <typename T> T ConsumeProbability() {
|
||||||
|
static_assert(std::is_floating_point<T>::value,
|
||||||
|
"A floating point type is required.");
|
||||||
|
|
||||||
|
// Use different integral types for different floating point types in order
|
||||||
|
// to provide better density of the resulting values.
|
||||||
|
using IntegralType =
|
||||||
|
typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
|
||||||
|
uint64_t>::type;
|
||||||
|
|
||||||
|
T result = static_cast<T>(ConsumeIntegral<IntegralType>());
|
||||||
|
result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a floating point value in the range [Type's lowest, Type's max] by
|
||||||
|
// consuming bytes from the input data. If there's no input data left, always
|
||||||
|
// returns approximately 0.
|
||||||
|
template <typename T> T ConsumeFloatingPoint() {
|
||||||
|
return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
|
||||||
|
std::numeric_limits<T>::max());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a floating point value in the given range by consuming bytes from
|
||||||
|
// the input data. If there's no input data left, returns |min|. Note that
|
||||||
|
// |min| must be less than or equal to |max|.
|
||||||
|
template <typename T> T ConsumeFloatingPointInRange(T min, T max) {
|
||||||
|
if (min > max)
|
||||||
|
abort();
|
||||||
|
|
||||||
|
T range = .0;
|
||||||
|
T result = min;
|
||||||
|
constexpr T zero(.0);
|
||||||
|
if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
|
||||||
|
// The diff |max - min| would overflow the given floating point type. Use
|
||||||
|
// the half of the diff as the range and consume a bool to decide whether
|
||||||
|
// the result is in the first of the second part of the diff.
|
||||||
|
range = (max / 2.0) - (min / 2.0);
|
||||||
|
if (ConsumeBool()) {
|
||||||
|
result += range;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
range = max - min;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result + range * ConsumeProbability<T>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reports the remaining bytes available for fuzzed input.
|
||||||
|
size_t remaining_bytes() { return remaining_bytes_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
FuzzedDataProvider(const FuzzedDataProvider &) = delete;
|
||||||
|
FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
|
||||||
|
|
||||||
|
void Advance(size_t num_bytes) {
|
||||||
|
if (num_bytes > remaining_bytes_)
|
||||||
|
abort();
|
||||||
|
|
||||||
|
data_ptr_ += num_bytes;
|
||||||
|
remaining_bytes_ -= num_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
|
||||||
|
static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
|
||||||
|
|
||||||
|
// The point of using the size-based constructor below is to increase the
|
||||||
|
// odds of having a vector object with capacity being equal to the length.
|
||||||
|
// That part is always implementation specific, but at least both libc++ and
|
||||||
|
// libstdc++ allocate the requested number of bytes in that constructor,
|
||||||
|
// which seems to be a natural choice for other implementations as well.
|
||||||
|
// To increase the odds even more, we also call |shrink_to_fit| below.
|
||||||
|
std::vector<T> result(size);
|
||||||
|
if (size == 0) {
|
||||||
|
if (num_bytes_to_consume != 0)
|
||||||
|
abort();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
|
||||||
|
Advance(num_bytes_to_consume);
|
||||||
|
|
||||||
|
// Even though |shrink_to_fit| is also implementation specific, we expect it
|
||||||
|
// to provide an additional assurance in case vector's constructor allocated
|
||||||
|
// a buffer which is larger than the actual amount of data we put inside it.
|
||||||
|
result.shrink_to_fit();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
|
||||||
|
static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
|
||||||
|
static_assert(!std::numeric_limits<TU>::is_signed,
|
||||||
|
"Source type must be unsigned.");
|
||||||
|
|
||||||
|
// TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
|
||||||
|
if (std::numeric_limits<TS>::is_modulo)
|
||||||
|
return static_cast<TS>(value);
|
||||||
|
|
||||||
|
// Avoid using implementation-defined unsigned to signer conversions.
|
||||||
|
// To learn more, see https://stackoverflow.com/questions/13150449.
|
||||||
|
if (value <= std::numeric_limits<TS>::max()) {
|
||||||
|
return static_cast<TS>(value);
|
||||||
|
} else {
|
||||||
|
constexpr auto TS_min = std::numeric_limits<TS>::min();
|
||||||
|
return TS_min + static_cast<char>(value - TS_min);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8_t *data_ptr_;
|
||||||
|
size_t remaining_bytes_;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
|
122
extern/re2/re2/fuzzing/re2_fuzzer.cc
vendored
122
extern/re2/re2/fuzzing/re2_fuzzer.cc
vendored
@ -2,12 +2,13 @@
|
|||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
#include <fuzzer/FuzzedDataProvider.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <map>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "re2/prefilter.h"
|
#include "re2/prefilter.h"
|
||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
@ -17,7 +18,38 @@ using re2::StringPiece;
|
|||||||
// NOT static, NOT signed.
|
// NOT static, NOT signed.
|
||||||
uint8_t dummy = 0;
|
uint8_t dummy = 0;
|
||||||
|
|
||||||
void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
|
void TestOneInput(StringPiece pattern, const RE2::Options& options,
|
||||||
|
StringPiece text) {
|
||||||
|
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
|
||||||
|
// Otherwise, we will waste time on inputs that have long runs of various
|
||||||
|
// character classes. The fuzzer has shown itself to be easily capable of
|
||||||
|
// generating such patterns that fall within the other limits, but result
|
||||||
|
// in timeouts nonetheless. The marginal cost is high - even more so when
|
||||||
|
// counted repetition is involved - whereas the marginal benefit is zero.
|
||||||
|
// TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
|
||||||
|
int char_class = 0;
|
||||||
|
int backslash_p = 0; // very expensive, so handle specially
|
||||||
|
for (size_t i = 0; i < pattern.size(); i++) {
|
||||||
|
if (pattern[i] == '.')
|
||||||
|
char_class++;
|
||||||
|
if (pattern[i] != '\\')
|
||||||
|
continue;
|
||||||
|
i++;
|
||||||
|
if (i >= pattern.size())
|
||||||
|
break;
|
||||||
|
if (pattern[i] == 'p' || pattern[i] == 'P' ||
|
||||||
|
pattern[i] == 'd' || pattern[i] == 'D' ||
|
||||||
|
pattern[i] == 's' || pattern[i] == 'S' ||
|
||||||
|
pattern[i] == 'w' || pattern[i] == 'W')
|
||||||
|
char_class++;
|
||||||
|
if (pattern[i] == 'p' || pattern[i] == 'P')
|
||||||
|
backslash_p++;
|
||||||
|
}
|
||||||
|
if (char_class > 9)
|
||||||
|
return;
|
||||||
|
if (backslash_p > 1)
|
||||||
|
return;
|
||||||
|
|
||||||
RE2 re(pattern, options);
|
RE2 re(pattern, options);
|
||||||
if (!re.ok())
|
if (!re.ok())
|
||||||
return;
|
return;
|
||||||
@ -55,7 +87,7 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
|
|||||||
|
|
||||||
// Don't waste time fuzzing high-fanout programs.
|
// Don't waste time fuzzing high-fanout programs.
|
||||||
// They can cause bug reports due to fuzzer timeouts.
|
// They can cause bug reports due to fuzzer timeouts.
|
||||||
std::map<int, int> histogram;
|
std::vector<int> histogram;
|
||||||
int fanout = re.ProgramFanout(&histogram);
|
int fanout = re.ProgramFanout(&histogram);
|
||||||
if (fanout > 9)
|
if (fanout > 9)
|
||||||
return;
|
return;
|
||||||
@ -102,72 +134,38 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
|
|||||||
|
|
||||||
// Entry point for libFuzzer.
|
// Entry point for libFuzzer.
|
||||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||||
if (size == 0 || size > 999)
|
// An input larger than 4 KiB probably isn't interesting. (This limit
|
||||||
|
// allows for fdp.ConsumeRandomLengthString()'s backslash behaviour.)
|
||||||
|
if (size == 0 || size > 4096)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
|
FuzzedDataProvider fdp(data, size);
|
||||||
// Otherwise, we will waste time on inputs that have long runs of various
|
|
||||||
// character classes. The fuzzer has shown itself to be easily capable of
|
|
||||||
// generating such patterns that fall within the other limits, but result
|
|
||||||
// in timeouts nonetheless. The marginal cost is high - even more so when
|
|
||||||
// counted repetition is involved - whereas the marginal benefit is zero.
|
|
||||||
// TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
|
|
||||||
int char_class = 0;
|
|
||||||
int backslash_p = 0; // very expensive, so handle specially
|
|
||||||
for (size_t i = 0; i < size; i++) {
|
|
||||||
if (data[i] == '.')
|
|
||||||
char_class++;
|
|
||||||
if (data[i] != '\\')
|
|
||||||
continue;
|
|
||||||
i++;
|
|
||||||
if (i >= size)
|
|
||||||
break;
|
|
||||||
if (data[i] == 'p' || data[i] == 'P' ||
|
|
||||||
data[i] == 'd' || data[i] == 'D' ||
|
|
||||||
data[i] == 's' || data[i] == 'S' ||
|
|
||||||
data[i] == 'w' || data[i] == 'W')
|
|
||||||
char_class++;
|
|
||||||
if (data[i] == 'p' || data[i] == 'P')
|
|
||||||
backslash_p++;
|
|
||||||
}
|
|
||||||
if (char_class > 9)
|
|
||||||
return 0;
|
|
||||||
if (backslash_p > 1)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
// The one-at-a-time hash by Bob Jenkins.
|
|
||||||
uint32_t hash = 0;
|
|
||||||
for (size_t i = 0; i < size; i++) {
|
|
||||||
hash += data[i];
|
|
||||||
hash += (hash << 10);
|
|
||||||
hash ^= (hash >> 6);
|
|
||||||
}
|
|
||||||
hash += (hash << 3);
|
|
||||||
hash ^= (hash >> 11);
|
|
||||||
hash += (hash << 15);
|
|
||||||
|
|
||||||
|
// The convention here is that fdp.ConsumeBool() returning false sets
|
||||||
|
// the default value whereas returning true sets the alternate value:
|
||||||
|
// most options default to false and so can be set directly; encoding
|
||||||
|
// defaults to UTF-8; case_sensitive defaults to true. We do NOT want
|
||||||
|
// to log errors. max_mem is 64 MiB because we can afford to use more
|
||||||
|
// RAM in exchange for (hopefully) faster fuzzing.
|
||||||
RE2::Options options;
|
RE2::Options options;
|
||||||
|
options.set_encoding(fdp.ConsumeBool() ? RE2::Options::EncodingLatin1
|
||||||
|
: RE2::Options::EncodingUTF8);
|
||||||
|
options.set_posix_syntax(fdp.ConsumeBool());
|
||||||
|
options.set_longest_match(fdp.ConsumeBool());
|
||||||
options.set_log_errors(false);
|
options.set_log_errors(false);
|
||||||
options.set_max_mem(64 << 20);
|
options.set_max_mem(64 << 20);
|
||||||
options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
|
options.set_literal(fdp.ConsumeBool());
|
||||||
: RE2::Options::EncodingUTF8);
|
options.set_never_nl(fdp.ConsumeBool());
|
||||||
options.set_posix_syntax(hash & 2);
|
options.set_dot_nl(fdp.ConsumeBool());
|
||||||
options.set_longest_match(hash & 4);
|
options.set_never_capture(fdp.ConsumeBool());
|
||||||
options.set_literal(hash & 8);
|
options.set_case_sensitive(!fdp.ConsumeBool());
|
||||||
options.set_never_nl(hash & 16);
|
options.set_perl_classes(fdp.ConsumeBool());
|
||||||
options.set_dot_nl(hash & 32);
|
options.set_word_boundary(fdp.ConsumeBool());
|
||||||
options.set_never_capture(hash & 64);
|
options.set_one_line(fdp.ConsumeBool());
|
||||||
options.set_case_sensitive(hash & 128);
|
|
||||||
options.set_perl_classes(hash & 256);
|
|
||||||
options.set_word_boundary(hash & 512);
|
|
||||||
options.set_one_line(hash & 1024);
|
|
||||||
|
|
||||||
const char* ptr = reinterpret_cast<const char*>(data);
|
std::string pattern = fdp.ConsumeRandomLengthString(999);
|
||||||
int len = static_cast<int>(size);
|
std::string text = fdp.ConsumeRandomLengthString(999);
|
||||||
|
|
||||||
StringPiece pattern(ptr, len);
|
|
||||||
StringPiece text(ptr, len);
|
|
||||||
Test(pattern, options, text);
|
|
||||||
|
|
||||||
|
TestOneInput(pattern, options, text);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
2
extern/re2/re2/make_perl_groups.pl
vendored
Normal file → Executable file
2
extern/re2/re2/make_perl_groups.pl
vendored
Normal file → Executable file
@ -76,7 +76,7 @@ sub PrintClass($$@) {
|
|||||||
} else {
|
} else {
|
||||||
$negname =~ y/a-z/A-Z/;
|
$negname =~ y/a-z/A-Z/;
|
||||||
}
|
}
|
||||||
return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }";
|
return "{ \"$escname\", +1, code$cnum, $n, 0, 0 }", "{ \"$negname\", -1, code$cnum, $n, 0, 0 }";
|
||||||
}
|
}
|
||||||
|
|
||||||
my $cnum = 0;
|
my $cnum = 0;
|
||||||
|
0
extern/re2/re2/make_unicode_casefold.py
vendored
Normal file → Executable file
0
extern/re2/re2/make_unicode_casefold.py
vendored
Normal file → Executable file
0
extern/re2/re2/make_unicode_groups.py
vendored
Normal file → Executable file
0
extern/re2/re2/make_unicode_groups.py
vendored
Normal file → Executable file
30
extern/re2/re2/mimics_pcre.cc
vendored
30
extern/re2/re2/mimics_pcre.cc
vendored
@ -38,14 +38,21 @@ static bool CanBeEmptyString(Regexp *re);
|
|||||||
class PCREWalker : public Regexp::Walker<bool> {
|
class PCREWalker : public Regexp::Walker<bool> {
|
||||||
public:
|
public:
|
||||||
PCREWalker() {}
|
PCREWalker() {}
|
||||||
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,
|
|
||||||
int nchild_args);
|
|
||||||
|
|
||||||
bool ShortVisit(Regexp* re, bool a) {
|
virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||||
// Should never be called: we use Walk not WalkExponential.
|
bool* child_args, int nchild_args);
|
||||||
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
|
||||||
|
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||||
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
|
LOG(DFATAL) << "PCREWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
PCREWalker(const PCREWalker&) = delete;
|
||||||
|
PCREWalker& operator=(const PCREWalker&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Called after visiting each of re's children and accumulating
|
// Called after visiting each of re's children and accumulating
|
||||||
@ -114,13 +121,16 @@ bool Regexp::MimicsPCRE() {
|
|||||||
|
|
||||||
class EmptyStringWalker : public Regexp::Walker<bool> {
|
class EmptyStringWalker : public Regexp::Walker<bool> {
|
||||||
public:
|
public:
|
||||||
EmptyStringWalker() { }
|
EmptyStringWalker() {}
|
||||||
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
|
||||||
bool* child_args, int nchild_args);
|
|
||||||
|
|
||||||
bool ShortVisit(Regexp* re, bool a) {
|
virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||||
// Should never be called: we use Walk not WalkExponential.
|
bool* child_args, int nchild_args);
|
||||||
|
|
||||||
|
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||||
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
220
extern/re2/re2/nfa.cc
vendored
220
extern/re2/re2/nfa.cc
vendored
@ -27,17 +27,18 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <deque>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/strutil.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
#include "util/logging.h"
|
#include "re2/sparse_array.h"
|
||||||
#include "util/pod_array.h"
|
#include "re2/sparse_set.h"
|
||||||
#include "util/sparse_array.h"
|
|
||||||
#include "util/sparse_set.h"
|
|
||||||
#include "util/strutil.h"
|
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -107,18 +108,21 @@ class NFA {
|
|||||||
// Returns text version of capture information, for debugging.
|
// Returns text version of capture information, for debugging.
|
||||||
std::string FormatCapture(const char** capture);
|
std::string FormatCapture(const char** capture);
|
||||||
|
|
||||||
inline void CopyCapture(const char** dst, const char** src);
|
void CopyCapture(const char** dst, const char** src) {
|
||||||
|
memmove(dst, src, ncapture_*sizeof src[0]);
|
||||||
|
}
|
||||||
|
|
||||||
Prog* prog_; // underlying program
|
Prog* prog_; // underlying program
|
||||||
int start_; // start instruction in program
|
int start_; // start instruction in program
|
||||||
int ncapture_; // number of submatches to track
|
int ncapture_; // number of submatches to track
|
||||||
bool longest_; // whether searching for longest match
|
bool longest_; // whether searching for longest match
|
||||||
bool endmatch_; // whether match must end at text.end()
|
bool endmatch_; // whether match must end at text.end()
|
||||||
const char* btext_; // beginning of text being matched (for FormatSubmatch)
|
const char* btext_; // beginning of text (for FormatSubmatch)
|
||||||
const char* etext_; // end of text being matched (for endmatch_)
|
const char* etext_; // end of text (for endmatch_)
|
||||||
Threadq q0_, q1_; // pre-allocated for Search.
|
Threadq q0_, q1_; // pre-allocated for Search.
|
||||||
PODArray<AddState> stack_; // pre-allocated for AddToThreadq
|
PODArray<AddState> stack_; // pre-allocated for AddToThreadq
|
||||||
Thread* free_threads_; // free list
|
std::deque<Thread> arena_; // thread arena
|
||||||
|
Thread* freelist_; // thread freelist
|
||||||
const char** match_; // best match so far
|
const char** match_; // best match so far
|
||||||
bool matched_; // any match so far?
|
bool matched_; // any match so far?
|
||||||
|
|
||||||
@ -141,31 +145,30 @@ NFA::NFA(Prog* prog) {
|
|||||||
prog_->inst_count(kInstEmptyWidth) +
|
prog_->inst_count(kInstEmptyWidth) +
|
||||||
prog_->inst_count(kInstNop) + 1; // + 1 for start inst
|
prog_->inst_count(kInstNop) + 1; // + 1 for start inst
|
||||||
stack_ = PODArray<AddState>(nstack);
|
stack_ = PODArray<AddState>(nstack);
|
||||||
free_threads_ = NULL;
|
freelist_ = NULL;
|
||||||
match_ = NULL;
|
match_ = NULL;
|
||||||
matched_ = false;
|
matched_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
NFA::~NFA() {
|
NFA::~NFA() {
|
||||||
delete[] match_;
|
delete[] match_;
|
||||||
Thread* next;
|
for (const Thread& t : arena_)
|
||||||
for (Thread* t = free_threads_; t; t = next) {
|
delete[] t.capture;
|
||||||
next = t->next;
|
|
||||||
delete[] t->capture;
|
|
||||||
delete t;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NFA::Thread* NFA::AllocThread() {
|
NFA::Thread* NFA::AllocThread() {
|
||||||
Thread* t = free_threads_;
|
Thread* t = freelist_;
|
||||||
if (t == NULL) {
|
if (t != NULL) {
|
||||||
t = new Thread;
|
freelist_ = t->next;
|
||||||
t->ref = 1;
|
t->ref = 1;
|
||||||
t->capture = new const char*[ncapture_];
|
// We don't need to touch t->capture because
|
||||||
|
// the caller will immediately overwrite it.
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
free_threads_ = t->next;
|
arena_.emplace_back();
|
||||||
|
t = &arena_.back();
|
||||||
t->ref = 1;
|
t->ref = 1;
|
||||||
|
t->capture = new const char*[ncapture_];
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,21 +179,13 @@ NFA::Thread* NFA::Incref(Thread* t) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void NFA::Decref(Thread* t) {
|
void NFA::Decref(Thread* t) {
|
||||||
if (t == NULL)
|
DCHECK(t != NULL);
|
||||||
return;
|
|
||||||
t->ref--;
|
t->ref--;
|
||||||
if (t->ref > 0)
|
if (t->ref > 0)
|
||||||
return;
|
return;
|
||||||
DCHECK_EQ(t->ref, 0);
|
DCHECK_EQ(t->ref, 0);
|
||||||
t->next = free_threads_;
|
t->next = freelist_;
|
||||||
free_threads_ = t;
|
freelist_ = t;
|
||||||
}
|
|
||||||
|
|
||||||
void NFA::CopyCapture(const char** dst, const char** src) {
|
|
||||||
for (int i = 0; i < ncapture_; i+=2) {
|
|
||||||
dst[i] = src[i];
|
|
||||||
dst[i+1] = src[i+1];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Follows all empty arrows from id0 and enqueues all the states reached.
|
// Follows all empty arrows from id0 and enqueues all the states reached.
|
||||||
@ -372,8 +367,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
|
|||||||
matched_ = true;
|
matched_ = true;
|
||||||
|
|
||||||
Decref(t);
|
Decref(t);
|
||||||
for (++i; i != runq->end(); ++i)
|
for (++i; i != runq->end(); ++i) {
|
||||||
Decref(i->value());
|
if (i->value() != NULL)
|
||||||
|
Decref(i->value());
|
||||||
|
}
|
||||||
runq->clear();
|
runq->clear();
|
||||||
if (ip->greedy(prog_))
|
if (ip->greedy(prog_))
|
||||||
return ip->out1();
|
return ip->out1();
|
||||||
@ -382,10 +379,15 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case kInstMatch: {
|
case kInstMatch: {
|
||||||
// Avoid invoking undefined behavior when p happens
|
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||||
// to be null - and p-1 would be meaningless anyway.
|
// by storing p instead of p-1. (What would the latter even mean?!)
|
||||||
if (p == NULL)
|
// This complements the special case in NFA::Search().
|
||||||
|
if (p == NULL) {
|
||||||
|
CopyCapture(match_, t->capture);
|
||||||
|
match_[1] = p;
|
||||||
|
matched_ = true;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (endmatch_ && p-1 != etext_)
|
if (endmatch_ && p-1 != etext_)
|
||||||
break;
|
break;
|
||||||
@ -411,8 +413,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
|
|||||||
// worse than the one we just found: don't run the
|
// worse than the one we just found: don't run the
|
||||||
// rest of the current Threadq.
|
// rest of the current Threadq.
|
||||||
Decref(t);
|
Decref(t);
|
||||||
for (++i; i != runq->end(); ++i)
|
for (++i; i != runq->end(); ++i) {
|
||||||
Decref(i->value());
|
if (i->value() != NULL)
|
||||||
|
Decref(i->value());
|
||||||
|
}
|
||||||
runq->clear();
|
runq->clear();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -431,12 +435,12 @@ std::string NFA::FormatCapture(const char** capture) {
|
|||||||
if (capture[i] == NULL)
|
if (capture[i] == NULL)
|
||||||
s += "(?,?)";
|
s += "(?,?)";
|
||||||
else if (capture[i+1] == NULL)
|
else if (capture[i+1] == NULL)
|
||||||
s += StringPrintf("(%d,?)",
|
s += StringPrintf("(%td,?)",
|
||||||
(int)(capture[i] - btext_));
|
capture[i] - btext_);
|
||||||
else
|
else
|
||||||
s += StringPrintf("(%d,%d)",
|
s += StringPrintf("(%td,%td)",
|
||||||
(int)(capture[i] - btext_),
|
capture[i] - btext_,
|
||||||
(int)(capture[i+1] - btext_));
|
capture[i+1] - btext_);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -448,7 +452,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
StringPiece context = const_context;
|
StringPiece context = const_context;
|
||||||
if (context.begin() == NULL)
|
if (context.data() == NULL)
|
||||||
context = text;
|
context = text;
|
||||||
|
|
||||||
// Sanity check: make sure that text lies within context.
|
// Sanity check: make sure that text lies within context.
|
||||||
@ -465,7 +469,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
|||||||
if (prog_->anchor_end()) {
|
if (prog_->anchor_end()) {
|
||||||
longest = true;
|
longest = true;
|
||||||
endmatch_ = true;
|
endmatch_ = true;
|
||||||
etext_ = text.end();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nsubmatch < 0) {
|
if (nsubmatch < 0) {
|
||||||
@ -485,32 +488,33 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
|||||||
}
|
}
|
||||||
|
|
||||||
match_ = new const char*[ncapture_];
|
match_ = new const char*[ncapture_];
|
||||||
|
memset(match_, 0, ncapture_*sizeof match_[0]);
|
||||||
matched_ = false;
|
matched_ = false;
|
||||||
|
|
||||||
// For debugging prints.
|
// For debugging prints.
|
||||||
btext_ = context.begin();
|
btext_ = context.data();
|
||||||
|
// For convenience.
|
||||||
|
etext_ = text.data() + text.size();
|
||||||
|
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
|
fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
|
||||||
std::string(text).c_str(), std::string(context).c_str(), anchored,
|
std::string(text).c_str(), std::string(context).c_str(), anchored, longest);
|
||||||
longest);
|
|
||||||
|
|
||||||
// Set up search.
|
// Set up search.
|
||||||
Threadq* runq = &q0_;
|
Threadq* runq = &q0_;
|
||||||
Threadq* nextq = &q1_;
|
Threadq* nextq = &q1_;
|
||||||
runq->clear();
|
runq->clear();
|
||||||
nextq->clear();
|
nextq->clear();
|
||||||
memset(&match_[0], 0, ncapture_*sizeof match_[0]);
|
|
||||||
|
|
||||||
// Loop over the text, stepping the machine.
|
// Loop over the text, stepping the machine.
|
||||||
for (const char* p = text.begin();; p++) {
|
for (const char* p = text.data();; p++) {
|
||||||
if (ExtraDebug) {
|
if (ExtraDebug) {
|
||||||
int c = 0;
|
int c = 0;
|
||||||
if (p == context.begin())
|
if (p == btext_)
|
||||||
c = '^';
|
c = '^';
|
||||||
else if (p > text.end())
|
else if (p > etext_)
|
||||||
c = '$';
|
c = '$';
|
||||||
else if (p < text.end())
|
else if (p < etext_)
|
||||||
c = p[0] & 0xFF;
|
c = p[0] & 0xFF;
|
||||||
|
|
||||||
fprintf(stderr, "%c:", c);
|
fprintf(stderr, "%c:", c);
|
||||||
@ -524,14 +528,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This is a no-op the first time around the loop because runq is empty.
|
// This is a no-op the first time around the loop because runq is empty.
|
||||||
int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p);
|
int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
|
||||||
DCHECK_EQ(runq->size(), 0);
|
DCHECK_EQ(runq->size(), 0);
|
||||||
using std::swap;
|
using std::swap;
|
||||||
swap(nextq, runq);
|
swap(nextq, runq);
|
||||||
nextq->clear();
|
nextq->clear();
|
||||||
if (id != 0) {
|
if (id != 0) {
|
||||||
// We're done: full match ahead.
|
// We're done: full match ahead.
|
||||||
p = text.end();
|
p = etext_;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
Prog::Inst* ip = prog_->inst(id);
|
Prog::Inst* ip = prog_->inst(id);
|
||||||
switch (ip->opcode()) {
|
switch (ip->opcode()) {
|
||||||
@ -559,30 +563,28 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p > text.end())
|
if (p > etext_)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Start a new thread if there have not been any matches.
|
// Start a new thread if there have not been any matches.
|
||||||
// (No point in starting a new thread if there have been
|
// (No point in starting a new thread if there have been
|
||||||
// matches, since it would be to the right of the match
|
// matches, since it would be to the right of the match
|
||||||
// we already found.)
|
// we already found.)
|
||||||
if (!matched_ && (!anchored || p == text.begin())) {
|
if (!matched_ && (!anchored || p == text.data())) {
|
||||||
// If there's a required first byte for an unanchored search
|
// Try to use prefix accel (e.g. memchr) to skip ahead.
|
||||||
// and we're not in the middle of any possible matches,
|
// The search must be unanchored and there must be zero
|
||||||
// use memchr to search for the byte quickly.
|
// possible matches already.
|
||||||
int fb = prog_->first_byte();
|
|
||||||
if (!anchored && runq->size() == 0 &&
|
if (!anchored && runq->size() == 0 &&
|
||||||
fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
|
p < etext_ && prog_->can_prefix_accel()) {
|
||||||
p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
|
p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
|
||||||
if (p == NULL) {
|
if (p == NULL)
|
||||||
p = text.end();
|
p = etext_;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Thread* t = AllocThread();
|
Thread* t = AllocThread();
|
||||||
CopyCapture(t->capture, match_);
|
CopyCapture(t->capture, match_);
|
||||||
t->capture[0] = p;
|
t->capture[0] = p;
|
||||||
AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p,
|
AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
|
||||||
t);
|
t);
|
||||||
Decref(t);
|
Decref(t);
|
||||||
}
|
}
|
||||||
@ -593,10 +595,24 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
|||||||
fprintf(stderr, "dead\n");
|
fprintf(stderr, "dead\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||||
|
// by simply not continuing the loop.
|
||||||
|
// This complements the special case in NFA::Step().
|
||||||
|
if (p == NULL) {
|
||||||
|
(void) Step(runq, nextq, -1, context, p);
|
||||||
|
DCHECK_EQ(runq->size(), 0);
|
||||||
|
using std::swap;
|
||||||
|
swap(nextq, runq);
|
||||||
|
nextq->clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
|
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
|
||||||
Decref(i->value());
|
if (i->value() != NULL)
|
||||||
|
Decref(i->value());
|
||||||
|
}
|
||||||
|
|
||||||
if (matched_) {
|
if (matched_) {
|
||||||
for (int i = 0; i < nsubmatch; i++)
|
for (int i = 0; i < nsubmatch; i++)
|
||||||
@ -605,73 +621,13 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
|||||||
static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
|
static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
fprintf(stderr, "match (%td,%td)\n",
|
fprintf(stderr, "match (%td,%td)\n",
|
||||||
match_[0] - btext_, match_[1] - btext_);
|
match_[0] - btext_,
|
||||||
|
match_[1] - btext_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Computes whether all successful matches have a common first byte,
|
|
||||||
// and if so, returns that byte. If not, returns -1.
|
|
||||||
int Prog::ComputeFirstByte() {
|
|
||||||
int b = -1;
|
|
||||||
SparseSet q(size());
|
|
||||||
q.insert(start());
|
|
||||||
for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) {
|
|
||||||
int id = *it;
|
|
||||||
Prog::Inst* ip = inst(id);
|
|
||||||
switch (ip->opcode()) {
|
|
||||||
default:
|
|
||||||
LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte";
|
|
||||||
break;
|
|
||||||
|
|
||||||
case kInstMatch:
|
|
||||||
// The empty string matches: no first byte.
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
case kInstByteRange:
|
|
||||||
if (!ip->last())
|
|
||||||
q.insert(id+1);
|
|
||||||
|
|
||||||
// Must match only a single byte
|
|
||||||
if (ip->lo() != ip->hi())
|
|
||||||
return -1;
|
|
||||||
if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')
|
|
||||||
return -1;
|
|
||||||
// If we haven't seen any bytes yet, record it;
|
|
||||||
// otherwise must match the one we saw before.
|
|
||||||
if (b == -1)
|
|
||||||
b = ip->lo();
|
|
||||||
else if (b != ip->lo())
|
|
||||||
return -1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case kInstNop:
|
|
||||||
case kInstCapture:
|
|
||||||
case kInstEmptyWidth:
|
|
||||||
if (!ip->last())
|
|
||||||
q.insert(id+1);
|
|
||||||
|
|
||||||
// Continue on.
|
|
||||||
// Ignore ip->empty() flags for kInstEmptyWidth
|
|
||||||
// in order to be as conservative as possible
|
|
||||||
// (assume all possible empty-width flags are true).
|
|
||||||
if (ip->out())
|
|
||||||
q.insert(ip->out());
|
|
||||||
break;
|
|
||||||
|
|
||||||
case kInstAltMatch:
|
|
||||||
DCHECK(!ip->last());
|
|
||||||
q.insert(id+1);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case kInstFail:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
|
Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
|
||||||
Anchor anchor, MatchKind kind,
|
Anchor anchor, MatchKind kind,
|
||||||
|
14
extern/re2/re2/onepass.cc
vendored
14
extern/re2/re2/onepass.cc
vendored
@ -59,11 +59,11 @@
|
|||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/pod_array.h"
|
|
||||||
#include "util/sparse_set.h"
|
|
||||||
#include "util/strutil.h"
|
#include "util/strutil.h"
|
||||||
#include "util/utf.h"
|
#include "util/utf.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
|
#include "re2/sparse_set.h"
|
||||||
#include "re2/stringpiece.h"
|
#include "re2/stringpiece.h"
|
||||||
|
|
||||||
// Silence "zero-sized array in struct/union" warning for OneState::action.
|
// Silence "zero-sized array in struct/union" warning for OneState::action.
|
||||||
@ -235,7 +235,7 @@ bool Prog::SearchOnePass(const StringPiece& text,
|
|||||||
matchcap[i] = NULL;
|
matchcap[i] = NULL;
|
||||||
|
|
||||||
StringPiece context = const_context;
|
StringPiece context = const_context;
|
||||||
if (context.begin() == NULL)
|
if (context.data() == NULL)
|
||||||
context = text;
|
context = text;
|
||||||
if (anchor_start() && context.begin() != text.begin())
|
if (anchor_start() && context.begin() != text.begin())
|
||||||
return false;
|
return false;
|
||||||
@ -249,8 +249,8 @@ bool Prog::SearchOnePass(const StringPiece& text,
|
|||||||
// start() is always mapped to the zeroth OneState.
|
// start() is always mapped to the zeroth OneState.
|
||||||
OneState* state = IndexToNode(nodes, statesize, 0);
|
OneState* state = IndexToNode(nodes, statesize, 0);
|
||||||
uint8_t* bytemap = bytemap_;
|
uint8_t* bytemap = bytemap_;
|
||||||
const char* bp = text.begin();
|
const char* bp = text.data();
|
||||||
const char* ep = text.end();
|
const char* ep = text.data() + text.size();
|
||||||
const char* p;
|
const char* p;
|
||||||
bool matched = false;
|
bool matched = false;
|
||||||
matchcap[0] = bp;
|
matchcap[0] = bp;
|
||||||
@ -550,7 +550,7 @@ bool Prog::IsOnePass() {
|
|||||||
if (!AddQ(&workq, ip->out())) {
|
if (!AddQ(&workq, ip->out())) {
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
LOG(ERROR) << StringPrintf(
|
LOG(ERROR) << StringPrintf(
|
||||||
"Not OnePass: multiple paths %d -> %d\n", *it, ip->out());
|
"Not OnePass: multiple paths %d -> %d", *it, ip->out());
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
id = ip->out();
|
id = ip->out();
|
||||||
@ -561,7 +561,7 @@ bool Prog::IsOnePass() {
|
|||||||
// (3) is violated
|
// (3) is violated
|
||||||
if (ExtraDebug)
|
if (ExtraDebug)
|
||||||
LOG(ERROR) << StringPrintf(
|
LOG(ERROR) << StringPrintf(
|
||||||
"Not OnePass: multiple matches from %d\n", *it);
|
"Not OnePass: multiple matches from %d", *it);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
matched = true;
|
matched = true;
|
||||||
|
113
extern/re2/re2/parse.cc
vendored
113
extern/re2/re2/parse.cc
vendored
@ -27,9 +27,9 @@
|
|||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/pod_array.h"
|
|
||||||
#include "util/strutil.h"
|
#include "util/strutil.h"
|
||||||
#include "util/utf.h"
|
#include "util/utf.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
#include "re2/stringpiece.h"
|
#include "re2/stringpiece.h"
|
||||||
#include "re2/unicode_casefold.h"
|
#include "re2/unicode_casefold.h"
|
||||||
@ -93,7 +93,7 @@ class Regexp::ParseState {
|
|||||||
bool PushSimpleOp(RegexpOp op);
|
bool PushSimpleOp(RegexpOp op);
|
||||||
|
|
||||||
// Pushes a ^ onto the stack.
|
// Pushes a ^ onto the stack.
|
||||||
bool PushCarat();
|
bool PushCaret();
|
||||||
|
|
||||||
// Pushes a \b (word == true) or \B (word == false) onto the stack.
|
// Pushes a \b (word == true) or \B (word == false) onto the stack.
|
||||||
bool PushWordBoundary(bool word);
|
bool PushWordBoundary(bool word);
|
||||||
@ -423,7 +423,7 @@ bool Regexp::ParseState::PushLiteral(Rune r) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pushes a ^ onto the stack.
|
// Pushes a ^ onto the stack.
|
||||||
bool Regexp::ParseState::PushCarat() {
|
bool Regexp::ParseState::PushCaret() {
|
||||||
if (flags_ & OneLine) {
|
if (flags_ & OneLine) {
|
||||||
return PushSimpleOp(kRegexpBeginText);
|
return PushSimpleOp(kRegexpBeginText);
|
||||||
}
|
}
|
||||||
@ -556,9 +556,10 @@ int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
|
int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
|
||||||
// This should never be called, since we use Walk and not
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
// WalkExponential.
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
|
LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -684,7 +685,7 @@ bool Regexp::ParseState::DoRightParen() {
|
|||||||
if ((r1 = stacktop_) == NULL ||
|
if ((r1 = stacktop_) == NULL ||
|
||||||
(r2 = r1->down_) == NULL ||
|
(r2 = r1->down_) == NULL ||
|
||||||
r2->op() != kLeftParen) {
|
r2->op() != kLeftParen) {
|
||||||
status_->set_code(kRegexpMissingParen);
|
status_->set_code(kRegexpUnexpectedParen);
|
||||||
status_->set_error_arg(whole_regexp_);
|
status_->set_error_arg(whole_regexp_);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1323,14 +1324,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
|
|||||||
// Parses a decimal integer, storing it in *np.
|
// Parses a decimal integer, storing it in *np.
|
||||||
// Sets *s to span the remainder of the string.
|
// Sets *s to span the remainder of the string.
|
||||||
static bool ParseInteger(StringPiece* s, int* np) {
|
static bool ParseInteger(StringPiece* s, int* np) {
|
||||||
if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
|
if (s->empty() || !isdigit((*s)[0] & 0xFF))
|
||||||
return false;
|
return false;
|
||||||
// Disallow leading zeros.
|
// Disallow leading zeros.
|
||||||
if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
|
if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
|
||||||
return false;
|
return false;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int c;
|
int c;
|
||||||
while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
|
while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
|
||||||
// Avoid overflow.
|
// Avoid overflow.
|
||||||
if (n >= 100000000)
|
if (n >= 100000000)
|
||||||
return false;
|
return false;
|
||||||
@ -1352,16 +1353,16 @@ static bool ParseInteger(StringPiece* s, int* np) {
|
|||||||
// s must NOT be edited unless MaybeParseRepetition returns true.
|
// s must NOT be edited unless MaybeParseRepetition returns true.
|
||||||
static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
|
static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
|
||||||
StringPiece s = *sp;
|
StringPiece s = *sp;
|
||||||
if (s.size() == 0 || s[0] != '{')
|
if (s.empty() || s[0] != '{')
|
||||||
return false;
|
return false;
|
||||||
s.remove_prefix(1); // '{'
|
s.remove_prefix(1); // '{'
|
||||||
if (!ParseInteger(&s, lo))
|
if (!ParseInteger(&s, lo))
|
||||||
return false;
|
return false;
|
||||||
if (s.size() == 0)
|
if (s.empty())
|
||||||
return false;
|
return false;
|
||||||
if (s[0] == ',') {
|
if (s[0] == ',') {
|
||||||
s.remove_prefix(1); // ','
|
s.remove_prefix(1); // ','
|
||||||
if (s.size() == 0)
|
if (s.empty())
|
||||||
return false;
|
return false;
|
||||||
if (s[0] == '}') {
|
if (s[0] == '}') {
|
||||||
// {2,} means at least 2
|
// {2,} means at least 2
|
||||||
@ -1375,7 +1376,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
|
|||||||
// {2} means exactly two
|
// {2} means exactly two
|
||||||
*hi = *lo;
|
*hi = *lo;
|
||||||
}
|
}
|
||||||
if (s.size() == 0 || s[0] != '}')
|
if (s.empty() || s[0] != '}')
|
||||||
return false;
|
return false;
|
||||||
s.remove_prefix(1); // '}'
|
s.remove_prefix(1); // '}'
|
||||||
*sp = s;
|
*sp = s;
|
||||||
@ -1416,7 +1417,7 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
|
|||||||
static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
|
static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
|
||||||
StringPiece t = s;
|
StringPiece t = s;
|
||||||
Rune r;
|
Rune r;
|
||||||
while (t.size() > 0) {
|
while (!t.empty()) {
|
||||||
if (StringPieceToRune(&r, &t, status) < 0)
|
if (StringPieceToRune(&r, &t, status) < 0)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1447,14 +1448,14 @@ static int UnHex(int c) {
|
|||||||
// Sets *rp to the named character.
|
// Sets *rp to the named character.
|
||||||
static bool ParseEscape(StringPiece* s, Rune* rp,
|
static bool ParseEscape(StringPiece* s, Rune* rp,
|
||||||
RegexpStatus* status, int rune_max) {
|
RegexpStatus* status, int rune_max) {
|
||||||
const char* begin = s->begin();
|
const char* begin = s->data();
|
||||||
if (s->size() < 1 || (*s)[0] != '\\') {
|
if (s->empty() || (*s)[0] != '\\') {
|
||||||
// Should not happen - caller always checks.
|
// Should not happen - caller always checks.
|
||||||
status->set_code(kRegexpInternalError);
|
status->set_code(kRegexpInternalError);
|
||||||
status->set_error_arg(StringPiece());
|
status->set_error_arg(StringPiece());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (s->size() < 2) {
|
if (s->size() == 1) {
|
||||||
status->set_code(kRegexpTrailingBackslash);
|
status->set_code(kRegexpTrailingBackslash);
|
||||||
status->set_error_arg(StringPiece());
|
status->set_error_arg(StringPiece());
|
||||||
return false;
|
return false;
|
||||||
@ -1485,16 +1486,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
|||||||
case '6':
|
case '6':
|
||||||
case '7':
|
case '7':
|
||||||
// Single non-zero octal digit is a backreference; not supported.
|
// Single non-zero octal digit is a backreference; not supported.
|
||||||
if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7')
|
if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
|
||||||
goto BadEscape;
|
goto BadEscape;
|
||||||
FALLTHROUGH_INTENDED;
|
FALLTHROUGH_INTENDED;
|
||||||
case '0':
|
case '0':
|
||||||
// consume up to three octal digits; already have one.
|
// consume up to three octal digits; already have one.
|
||||||
code = c - '0';
|
code = c - '0';
|
||||||
if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') {
|
if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
|
||||||
code = code * 8 + c - '0';
|
code = code * 8 + c - '0';
|
||||||
s->remove_prefix(1); // digit
|
s->remove_prefix(1); // digit
|
||||||
if (s->size() > 0) {
|
if (!s->empty()) {
|
||||||
c = (*s)[0];
|
c = (*s)[0];
|
||||||
if ('0' <= c && c <= '7') {
|
if ('0' <= c && c <= '7') {
|
||||||
code = code * 8 + c - '0';
|
code = code * 8 + c - '0';
|
||||||
@ -1509,7 +1510,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
|||||||
|
|
||||||
// Hexadecimal escapes
|
// Hexadecimal escapes
|
||||||
case 'x':
|
case 'x':
|
||||||
if (s->size() == 0)
|
if (s->empty())
|
||||||
goto BadEscape;
|
goto BadEscape;
|
||||||
if (StringPieceToRune(&c, s, status) < 0)
|
if (StringPieceToRune(&c, s, status) < 0)
|
||||||
return false;
|
return false;
|
||||||
@ -1529,7 +1530,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
|||||||
code = code * 16 + UnHex(c);
|
code = code * 16 + UnHex(c);
|
||||||
if (code > rune_max)
|
if (code > rune_max)
|
||||||
goto BadEscape;
|
goto BadEscape;
|
||||||
if (s->size() == 0)
|
if (s->empty())
|
||||||
goto BadEscape;
|
goto BadEscape;
|
||||||
if (StringPieceToRune(&c, s, status) < 0)
|
if (StringPieceToRune(&c, s, status) < 0)
|
||||||
return false;
|
return false;
|
||||||
@ -1540,7 +1541,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// Easy case: two hex digits.
|
// Easy case: two hex digits.
|
||||||
if (s->size() == 0)
|
if (s->empty())
|
||||||
goto BadEscape;
|
goto BadEscape;
|
||||||
if (StringPieceToRune(&c1, s, status) < 0)
|
if (StringPieceToRune(&c1, s, status) < 0)
|
||||||
return false;
|
return false;
|
||||||
@ -1590,7 +1591,7 @@ BadEscape:
|
|||||||
// Unrecognized escape sequence.
|
// Unrecognized escape sequence.
|
||||||
status->set_code(kRegexpBadEscape);
|
status->set_code(kRegexpBadEscape);
|
||||||
status->set_error_arg(
|
status->set_error_arg(
|
||||||
StringPiece(begin, static_cast<size_t>(s->begin() - begin)));
|
StringPiece(begin, static_cast<size_t>(s->data() - begin)));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1710,7 +1711,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl
|
|||||||
return NULL;
|
return NULL;
|
||||||
// Could use StringPieceToRune, but there aren't
|
// Could use StringPieceToRune, but there aren't
|
||||||
// any non-ASCII Perl group names.
|
// any non-ASCII Perl group names.
|
||||||
StringPiece name(s->begin(), 2);
|
StringPiece name(s->data(), 2);
|
||||||
const UGroup *g = LookupPerlGroup(name);
|
const UGroup *g = LookupPerlGroup(name);
|
||||||
if (g == NULL)
|
if (g == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1750,8 +1751,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
|
|||||||
return kParseError;
|
return kParseError;
|
||||||
if (c != '{') {
|
if (c != '{') {
|
||||||
// Name is the bit of string we just skipped over for c.
|
// Name is the bit of string we just skipped over for c.
|
||||||
const char* p = seq.begin() + 2;
|
const char* p = seq.data() + 2;
|
||||||
name = StringPiece(p, static_cast<size_t>(s->begin() - p));
|
name = StringPiece(p, static_cast<size_t>(s->data() - p));
|
||||||
} else {
|
} else {
|
||||||
// Name is in braces. Look for closing }
|
// Name is in braces. Look for closing }
|
||||||
size_t end = s->find('}', 0);
|
size_t end = s->find('}', 0);
|
||||||
@ -1762,16 +1763,16 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
|
|||||||
status->set_error_arg(seq);
|
status->set_error_arg(seq);
|
||||||
return kParseError;
|
return kParseError;
|
||||||
}
|
}
|
||||||
name = StringPiece(s->begin(), end); // without '}'
|
name = StringPiece(s->data(), end); // without '}'
|
||||||
s->remove_prefix(end + 1); // with '}'
|
s->remove_prefix(end + 1); // with '}'
|
||||||
if (!IsValidUTF8(name, status))
|
if (!IsValidUTF8(name, status))
|
||||||
return kParseError;
|
return kParseError;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Chop seq where s now begins.
|
// Chop seq where s now begins.
|
||||||
seq = StringPiece(seq.begin(), static_cast<size_t>(s->begin() - seq.begin()));
|
seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
|
||||||
|
|
||||||
if (name.size() > 0 && name[0] == '^') {
|
if (!name.empty() && name[0] == '^') {
|
||||||
sign = -sign;
|
sign = -sign;
|
||||||
name.remove_prefix(1); // '^'
|
name.remove_prefix(1); // '^'
|
||||||
}
|
}
|
||||||
@ -1801,14 +1802,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
|
|||||||
|
|
||||||
// Convert the UnicodeSet to a URange32 and UGroup that we can add.
|
// Convert the UnicodeSet to a URange32 and UGroup that we can add.
|
||||||
int nr = uset.getRangeCount();
|
int nr = uset.getRangeCount();
|
||||||
URange32* r = new URange32[nr];
|
PODArray<URange32> r(nr);
|
||||||
for (int i = 0; i < nr; i++) {
|
for (int i = 0; i < nr; i++) {
|
||||||
r[i].lo = uset.getRangeStart(i);
|
r[i].lo = uset.getRangeStart(i);
|
||||||
r[i].hi = uset.getRangeEnd(i);
|
r[i].hi = uset.getRangeEnd(i);
|
||||||
}
|
}
|
||||||
UGroup g = {"", +1, 0, 0, r, nr};
|
UGroup g = {"", +1, 0, 0, r.data(), nr};
|
||||||
AddUGroup(cc, &g, sign, parse_flags);
|
AddUGroup(cc, &g, sign, parse_flags);
|
||||||
delete[] r;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return kParseOk;
|
return kParseOk;
|
||||||
@ -1858,7 +1858,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
|
|||||||
bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
|
bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
|
||||||
const StringPiece& whole_class,
|
const StringPiece& whole_class,
|
||||||
RegexpStatus* status) {
|
RegexpStatus* status) {
|
||||||
if (s->size() == 0) {
|
if (s->empty()) {
|
||||||
status->set_code(kRegexpMissingBracket);
|
status->set_code(kRegexpMissingBracket);
|
||||||
status->set_error_arg(whole_class);
|
status->set_error_arg(whole_class);
|
||||||
return false;
|
return false;
|
||||||
@ -1866,7 +1866,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
|
|||||||
|
|
||||||
// Allow regular escape sequences even though
|
// Allow regular escape sequences even though
|
||||||
// many need not be escaped in this context.
|
// many need not be escaped in this context.
|
||||||
if (s->size() >= 1 && (*s)[0] == '\\')
|
if ((*s)[0] == '\\')
|
||||||
return ParseEscape(s, rp, status, rune_max_);
|
return ParseEscape(s, rp, status, rune_max_);
|
||||||
|
|
||||||
// Otherwise take the next rune.
|
// Otherwise take the next rune.
|
||||||
@ -1908,7 +1908,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
|||||||
Regexp** out_re,
|
Regexp** out_re,
|
||||||
RegexpStatus* status) {
|
RegexpStatus* status) {
|
||||||
StringPiece whole_class = *s;
|
StringPiece whole_class = *s;
|
||||||
if (s->size() == 0 || (*s)[0] != '[') {
|
if (s->empty() || (*s)[0] != '[') {
|
||||||
// Caller checked this.
|
// Caller checked this.
|
||||||
status->set_code(kRegexpInternalError);
|
status->set_code(kRegexpInternalError);
|
||||||
status->set_error_arg(StringPiece());
|
status->set_error_arg(StringPiece());
|
||||||
@ -1918,7 +1918,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
|||||||
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
|
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
|
||||||
re->ccb_ = new CharClassBuilder;
|
re->ccb_ = new CharClassBuilder;
|
||||||
s->remove_prefix(1); // '['
|
s->remove_prefix(1); // '['
|
||||||
if (s->size() > 0 && (*s)[0] == '^') {
|
if (!s->empty() && (*s)[0] == '^') {
|
||||||
s->remove_prefix(1); // '^'
|
s->remove_prefix(1); // '^'
|
||||||
negated = true;
|
negated = true;
|
||||||
if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
|
if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
|
||||||
@ -1928,7 +1928,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool first = true; // ] is okay as first char in class
|
bool first = true; // ] is okay as first char in class
|
||||||
while (s->size() > 0 && ((*s)[0] != ']' || first)) {
|
while (!s->empty() && ((*s)[0] != ']' || first)) {
|
||||||
// - is only okay unescaped as first or last in class.
|
// - is only okay unescaped as first or last in class.
|
||||||
// Except that Perl allows - anywhere.
|
// Except that Perl allows - anywhere.
|
||||||
if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
|
if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
|
||||||
@ -1996,7 +1996,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
|||||||
// in the flags.
|
// in the flags.
|
||||||
re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
|
re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
|
||||||
}
|
}
|
||||||
if (s->size() == 0) {
|
if (s->empty()) {
|
||||||
status->set_code(kRegexpMissingBracket);
|
status->set_code(kRegexpMissingBracket);
|
||||||
status->set_error_arg(whole_class);
|
status->set_error_arg(whole_class);
|
||||||
re->Decref();
|
re->Decref();
|
||||||
@ -2016,7 +2016,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
|||||||
// Python rejects names starting with digits.
|
// Python rejects names starting with digits.
|
||||||
// We don't enforce either of those.
|
// We don't enforce either of those.
|
||||||
static bool IsValidCaptureName(const StringPiece& name) {
|
static bool IsValidCaptureName(const StringPiece& name) {
|
||||||
if (name.size() == 0)
|
if (name.empty())
|
||||||
return false;
|
return false;
|
||||||
for (size_t i = 0; i < name.size(); i++) {
|
for (size_t i = 0; i < name.size(); i++) {
|
||||||
int c = name[i];
|
int c = name[i];
|
||||||
@ -2074,8 +2074,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// t is "P<name>...", t[end] == '>'
|
// t is "P<name>...", t[end] == '>'
|
||||||
StringPiece capture(t.begin()-2, end+3); // "(?P<name>"
|
StringPiece capture(t.data()-2, end+3); // "(?P<name>"
|
||||||
StringPiece name(t.begin()+2, end-2); // "name"
|
StringPiece name(t.data()+2, end-2); // "name"
|
||||||
if (!IsValidUTF8(name, status_))
|
if (!IsValidUTF8(name, status_))
|
||||||
return false;
|
return false;
|
||||||
if (!IsValidCaptureName(name)) {
|
if (!IsValidCaptureName(name)) {
|
||||||
@ -2089,7 +2089,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
s->remove_prefix(static_cast<size_t>(capture.end() - s->begin()));
|
s->remove_prefix(
|
||||||
|
static_cast<size_t>(capture.data() + capture.size() - s->data()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2098,7 +2099,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
|||||||
int nflags = flags_;
|
int nflags = flags_;
|
||||||
Rune c;
|
Rune c;
|
||||||
for (bool done = false; !done; ) {
|
for (bool done = false; !done; ) {
|
||||||
if (t.size() == 0)
|
if (t.empty())
|
||||||
goto BadPerlOp;
|
goto BadPerlOp;
|
||||||
if (StringPieceToRune(&c, &t, status_) < 0)
|
if (StringPieceToRune(&c, &t, status_) < 0)
|
||||||
return false;
|
return false;
|
||||||
@ -2173,7 +2174,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
|||||||
BadPerlOp:
|
BadPerlOp:
|
||||||
status_->set_code(kRegexpBadPerlOp);
|
status_->set_code(kRegexpBadPerlOp);
|
||||||
status_->set_error_arg(
|
status_->set_error_arg(
|
||||||
StringPiece(s->begin(), static_cast<size_t>(t.begin() - s->begin())));
|
StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2216,7 +2217,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
|||||||
|
|
||||||
if (global_flags & Literal) {
|
if (global_flags & Literal) {
|
||||||
// Special parse loop for literal string.
|
// Special parse loop for literal string.
|
||||||
while (t.size() > 0) {
|
while (!t.empty()) {
|
||||||
Rune r;
|
Rune r;
|
||||||
if (StringPieceToRune(&r, &t, status) < 0)
|
if (StringPieceToRune(&r, &t, status) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -2227,7 +2228,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
|||||||
}
|
}
|
||||||
|
|
||||||
StringPiece lastunary = StringPiece();
|
StringPiece lastunary = StringPiece();
|
||||||
while (t.size() > 0) {
|
while (!t.empty()) {
|
||||||
StringPiece isunary = StringPiece();
|
StringPiece isunary = StringPiece();
|
||||||
switch (t[0]) {
|
switch (t[0]) {
|
||||||
default: {
|
default: {
|
||||||
@ -2270,7 +2271,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case '^': // Beginning of line.
|
case '^': // Beginning of line.
|
||||||
if (!ps.PushCarat())
|
if (!ps.PushCaret())
|
||||||
return NULL;
|
return NULL;
|
||||||
t.remove_prefix(1); // '^'
|
t.remove_prefix(1); // '^'
|
||||||
break;
|
break;
|
||||||
@ -2311,18 +2312,18 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
|||||||
bool nongreedy = false;
|
bool nongreedy = false;
|
||||||
t.remove_prefix(1); // '*' or '+' or '?'
|
t.remove_prefix(1); // '*' or '+' or '?'
|
||||||
if (ps.flags() & PerlX) {
|
if (ps.flags() & PerlX) {
|
||||||
if (t.size() > 0 && t[0] == '?') {
|
if (!t.empty() && t[0] == '?') {
|
||||||
nongreedy = true;
|
nongreedy = true;
|
||||||
t.remove_prefix(1); // '?'
|
t.remove_prefix(1); // '?'
|
||||||
}
|
}
|
||||||
if (lastunary.size() > 0) {
|
if (!lastunary.empty()) {
|
||||||
// In Perl it is not allowed to stack repetition operators:
|
// In Perl it is not allowed to stack repetition operators:
|
||||||
// a** is a syntax error, not a double-star.
|
// a** is a syntax error, not a double-star.
|
||||||
// (and a++ means something else entirely, which we don't support!)
|
// (and a++ means something else entirely, which we don't support!)
|
||||||
status->set_code(kRegexpRepeatOp);
|
status->set_code(kRegexpRepeatOp);
|
||||||
status->set_error_arg(StringPiece(
|
status->set_error_arg(StringPiece(
|
||||||
lastunary.begin(),
|
lastunary.data(),
|
||||||
static_cast<size_t>(t.begin() - lastunary.begin())));
|
static_cast<size_t>(t.data() - lastunary.data())));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2346,16 +2347,16 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
|||||||
}
|
}
|
||||||
bool nongreedy = false;
|
bool nongreedy = false;
|
||||||
if (ps.flags() & PerlX) {
|
if (ps.flags() & PerlX) {
|
||||||
if (t.size() > 0 && t[0] == '?') {
|
if (!t.empty() && t[0] == '?') {
|
||||||
nongreedy = true;
|
nongreedy = true;
|
||||||
t.remove_prefix(1); // '?'
|
t.remove_prefix(1); // '?'
|
||||||
}
|
}
|
||||||
if (lastunary.size() > 0) {
|
if (!lastunary.empty()) {
|
||||||
// Not allowed to stack repetition operators.
|
// Not allowed to stack repetition operators.
|
||||||
status->set_code(kRegexpRepeatOp);
|
status->set_code(kRegexpRepeatOp);
|
||||||
status->set_error_arg(StringPiece(
|
status->set_error_arg(StringPiece(
|
||||||
lastunary.begin(),
|
lastunary.data(),
|
||||||
static_cast<size_t>(t.begin() - lastunary.begin())));
|
static_cast<size_t>(t.data() - lastunary.data())));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2404,7 +2405,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
|||||||
|
|
||||||
if (t[1] == 'Q') { // \Q ... \E: the ... is always literals
|
if (t[1] == 'Q') { // \Q ... \E: the ... is always literals
|
||||||
t.remove_prefix(2); // '\\', 'Q'
|
t.remove_prefix(2); // '\\', 'Q'
|
||||||
while (t.size() > 0) {
|
while (!t.empty()) {
|
||||||
if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
|
if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
|
||||||
t.remove_prefix(2); // '\\', 'E'
|
t.remove_prefix(2); // '\\', 'E'
|
||||||
break;
|
break;
|
||||||
|
68
extern/re2/re2/perl_groups.cc
vendored
68
extern/re2/re2/perl_groups.cc
vendored
@ -20,12 +20,12 @@ static const URange16 code3[] = { /* \w */
|
|||||||
{ 0x61, 0x7a },
|
{ 0x61, 0x7a },
|
||||||
};
|
};
|
||||||
const UGroup perl_groups[] = {
|
const UGroup perl_groups[] = {
|
||||||
{ "\\d", +1, code1, 1 },
|
{ "\\d", +1, code1, 1, 0, 0 },
|
||||||
{ "\\D", -1, code1, 1 },
|
{ "\\D", -1, code1, 1, 0, 0 },
|
||||||
{ "\\s", +1, code2, 3 },
|
{ "\\s", +1, code2, 3, 0, 0 },
|
||||||
{ "\\S", -1, code2, 3 },
|
{ "\\S", -1, code2, 3, 0, 0 },
|
||||||
{ "\\w", +1, code3, 4 },
|
{ "\\w", +1, code3, 4, 0, 0 },
|
||||||
{ "\\W", -1, code3, 4 },
|
{ "\\W", -1, code3, 4, 0, 0 },
|
||||||
};
|
};
|
||||||
const int num_perl_groups = 6;
|
const int num_perl_groups = 6;
|
||||||
static const URange16 code4[] = { /* [:alnum:] */
|
static const URange16 code4[] = { /* [:alnum:] */
|
||||||
@ -85,34 +85,34 @@ static const URange16 code17[] = { /* [:xdigit:] */
|
|||||||
{ 0x61, 0x66 },
|
{ 0x61, 0x66 },
|
||||||
};
|
};
|
||||||
const UGroup posix_groups[] = {
|
const UGroup posix_groups[] = {
|
||||||
{ "[:alnum:]", +1, code4, 3 },
|
{ "[:alnum:]", +1, code4, 3, 0, 0 },
|
||||||
{ "[:^alnum:]", -1, code4, 3 },
|
{ "[:^alnum:]", -1, code4, 3, 0, 0 },
|
||||||
{ "[:alpha:]", +1, code5, 2 },
|
{ "[:alpha:]", +1, code5, 2, 0, 0 },
|
||||||
{ "[:^alpha:]", -1, code5, 2 },
|
{ "[:^alpha:]", -1, code5, 2, 0, 0 },
|
||||||
{ "[:ascii:]", +1, code6, 1 },
|
{ "[:ascii:]", +1, code6, 1, 0, 0 },
|
||||||
{ "[:^ascii:]", -1, code6, 1 },
|
{ "[:^ascii:]", -1, code6, 1, 0, 0 },
|
||||||
{ "[:blank:]", +1, code7, 2 },
|
{ "[:blank:]", +1, code7, 2, 0, 0 },
|
||||||
{ "[:^blank:]", -1, code7, 2 },
|
{ "[:^blank:]", -1, code7, 2, 0, 0 },
|
||||||
{ "[:cntrl:]", +1, code8, 2 },
|
{ "[:cntrl:]", +1, code8, 2, 0, 0 },
|
||||||
{ "[:^cntrl:]", -1, code8, 2 },
|
{ "[:^cntrl:]", -1, code8, 2, 0, 0 },
|
||||||
{ "[:digit:]", +1, code9, 1 },
|
{ "[:digit:]", +1, code9, 1, 0, 0 },
|
||||||
{ "[:^digit:]", -1, code9, 1 },
|
{ "[:^digit:]", -1, code9, 1, 0, 0 },
|
||||||
{ "[:graph:]", +1, code10, 1 },
|
{ "[:graph:]", +1, code10, 1, 0, 0 },
|
||||||
{ "[:^graph:]", -1, code10, 1 },
|
{ "[:^graph:]", -1, code10, 1, 0, 0 },
|
||||||
{ "[:lower:]", +1, code11, 1 },
|
{ "[:lower:]", +1, code11, 1, 0, 0 },
|
||||||
{ "[:^lower:]", -1, code11, 1 },
|
{ "[:^lower:]", -1, code11, 1, 0, 0 },
|
||||||
{ "[:print:]", +1, code12, 1 },
|
{ "[:print:]", +1, code12, 1, 0, 0 },
|
||||||
{ "[:^print:]", -1, code12, 1 },
|
{ "[:^print:]", -1, code12, 1, 0, 0 },
|
||||||
{ "[:punct:]", +1, code13, 4 },
|
{ "[:punct:]", +1, code13, 4, 0, 0 },
|
||||||
{ "[:^punct:]", -1, code13, 4 },
|
{ "[:^punct:]", -1, code13, 4, 0, 0 },
|
||||||
{ "[:space:]", +1, code14, 2 },
|
{ "[:space:]", +1, code14, 2, 0, 0 },
|
||||||
{ "[:^space:]", -1, code14, 2 },
|
{ "[:^space:]", -1, code14, 2, 0, 0 },
|
||||||
{ "[:upper:]", +1, code15, 1 },
|
{ "[:upper:]", +1, code15, 1, 0, 0 },
|
||||||
{ "[:^upper:]", -1, code15, 1 },
|
{ "[:^upper:]", -1, code15, 1, 0, 0 },
|
||||||
{ "[:word:]", +1, code16, 4 },
|
{ "[:word:]", +1, code16, 4, 0, 0 },
|
||||||
{ "[:^word:]", -1, code16, 4 },
|
{ "[:^word:]", -1, code16, 4, 0, 0 },
|
||||||
{ "[:xdigit:]", +1, code17, 3 },
|
{ "[:xdigit:]", +1, code17, 3, 0, 0 },
|
||||||
{ "[:^xdigit:]", -1, code17, 3 },
|
{ "[:^xdigit:]", -1, code17, 3, 0, 0 },
|
||||||
};
|
};
|
||||||
const int num_posix_groups = 28;
|
const int num_posix_groups = 28;
|
||||||
|
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#ifndef UTIL_POD_ARRAY_H_
|
#ifndef RE2_POD_ARRAY_H_
|
||||||
#define UTIL_POD_ARRAY_H_
|
#define RE2_POD_ARRAY_H_
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
@ -13,7 +13,7 @@ namespace re2 {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
class PODArray {
|
class PODArray {
|
||||||
public:
|
public:
|
||||||
static_assert(std::is_pod<T>::value,
|
static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
|
||||||
"T must be POD");
|
"T must be POD");
|
||||||
|
|
||||||
PODArray()
|
PODArray()
|
||||||
@ -52,4 +52,4 @@ class PODArray {
|
|||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
|
||||||
#endif // UTIL_POD_ARRAY_H_
|
#endif // RE2_POD_ARRAY_H_
|
5
extern/re2/re2/prefilter.cc
vendored
5
extern/re2/re2/prefilter.cc
vendored
@ -648,14 +648,15 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
Regexp* simple = re->Simplify();
|
Regexp* simple = re->Simplify();
|
||||||
Prefilter::Info *info = BuildInfo(simple);
|
if (simple == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
Prefilter::Info* info = BuildInfo(simple);
|
||||||
simple->Decref();
|
simple->Decref();
|
||||||
if (info == NULL)
|
if (info == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
Prefilter* m = info->TakeMatch();
|
Prefilter* m = info->TakeMatch();
|
||||||
|
|
||||||
delete info;
|
delete info;
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
4
extern/re2/re2/prefilter_tree.cc
vendored
4
extern/re2/re2/prefilter_tree.cc
vendored
@ -107,7 +107,7 @@ void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
|
|||||||
|
|
||||||
Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
|
Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
|
||||||
std::string node_string = NodeString(node);
|
std::string node_string = NodeString(node);
|
||||||
std::map<std::string, Prefilter*>::iterator iter = nodes->find(node_string);
|
NodeMap::iterator iter = nodes->find(node_string);
|
||||||
if (iter == nodes->end())
|
if (iter == nodes->end())
|
||||||
return NULL;
|
return NULL;
|
||||||
return (*iter).second;
|
return (*iter).second;
|
||||||
@ -377,7 +377,7 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
|
|||||||
LOG(ERROR) << it->first;
|
LOG(ERROR) << it->first;
|
||||||
}
|
}
|
||||||
LOG(ERROR) << "Map:";
|
LOG(ERROR) << "Map:";
|
||||||
for (std::map<std::string, Prefilter*>::const_iterator iter = nodes->begin();
|
for (NodeMap::const_iterator iter = nodes->begin();
|
||||||
iter != nodes->end(); ++iter)
|
iter != nodes->end(); ++iter)
|
||||||
LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
|
LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
|
||||||
<< " Str: " << (*iter).first;
|
<< " Str: " << (*iter).first;
|
||||||
|
2
extern/re2/re2/prefilter_tree.h
vendored
2
extern/re2/re2/prefilter_tree.h
vendored
@ -21,8 +21,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/sparse_array.h"
|
|
||||||
#include "re2/prefilter.h"
|
#include "re2/prefilter.h"
|
||||||
|
#include "re2/sparse_array.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
|
149
extern/re2/re2/prog.cc
vendored
149
extern/re2/re2/prog.cc
vendored
@ -7,6 +7,12 @@
|
|||||||
|
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
#include <immintrin.h>
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -109,8 +115,9 @@ Prog::Prog()
|
|||||||
start_unanchored_(0),
|
start_unanchored_(0),
|
||||||
size_(0),
|
size_(0),
|
||||||
bytemap_range_(0),
|
bytemap_range_(0),
|
||||||
first_byte_(-1),
|
prefix_size_(0),
|
||||||
flags_(0),
|
prefix_front_(-1),
|
||||||
|
prefix_back_(-1),
|
||||||
list_count_(0),
|
list_count_(0),
|
||||||
dfa_mem_(0),
|
dfa_mem_(0),
|
||||||
dfa_first_(NULL),
|
dfa_first_(NULL),
|
||||||
@ -185,14 +192,31 @@ std::string Prog::DumpByteMap() {
|
|||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Prog::first_byte() {
|
// Is ip a guaranteed match at end of text, perhaps after some capturing?
|
||||||
std::call_once(first_byte_once_, [](Prog* prog) {
|
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
|
||||||
prog->first_byte_ = prog->ComputeFirstByte();
|
for (;;) {
|
||||||
}, this);
|
switch (ip->opcode()) {
|
||||||
return first_byte_;
|
default:
|
||||||
}
|
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
||||||
|
return false;
|
||||||
|
|
||||||
static bool IsMatch(Prog*, Prog::Inst*);
|
case kInstAlt:
|
||||||
|
case kInstAltMatch:
|
||||||
|
case kInstByteRange:
|
||||||
|
case kInstFail:
|
||||||
|
case kInstEmptyWidth:
|
||||||
|
return false;
|
||||||
|
|
||||||
|
case kInstCapture:
|
||||||
|
case kInstNop:
|
||||||
|
ip = prog->inst(ip->out());
|
||||||
|
break;
|
||||||
|
|
||||||
|
case kInstMatch:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Peep-hole optimizer.
|
// Peep-hole optimizer.
|
||||||
void Prog::Optimize() {
|
void Prog::Optimize() {
|
||||||
@ -258,54 +282,28 @@ void Prog::Optimize() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is ip a guaranteed match at end of text, perhaps after some capturing?
|
|
||||||
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
|
|
||||||
for (;;) {
|
|
||||||
switch (ip->opcode()) {
|
|
||||||
default:
|
|
||||||
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
|
||||||
return false;
|
|
||||||
|
|
||||||
case kInstAlt:
|
|
||||||
case kInstAltMatch:
|
|
||||||
case kInstByteRange:
|
|
||||||
case kInstFail:
|
|
||||||
case kInstEmptyWidth:
|
|
||||||
return false;
|
|
||||||
|
|
||||||
case kInstCapture:
|
|
||||||
case kInstNop:
|
|
||||||
ip = prog->inst(ip->out());
|
|
||||||
break;
|
|
||||||
|
|
||||||
case kInstMatch:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
|
uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
|
|
||||||
// ^ and \A
|
// ^ and \A
|
||||||
if (p == text.begin())
|
if (p == text.data())
|
||||||
flags |= kEmptyBeginText | kEmptyBeginLine;
|
flags |= kEmptyBeginText | kEmptyBeginLine;
|
||||||
else if (p[-1] == '\n')
|
else if (p[-1] == '\n')
|
||||||
flags |= kEmptyBeginLine;
|
flags |= kEmptyBeginLine;
|
||||||
|
|
||||||
// $ and \z
|
// $ and \z
|
||||||
if (p == text.end())
|
if (p == text.data() + text.size())
|
||||||
flags |= kEmptyEndText | kEmptyEndLine;
|
flags |= kEmptyEndText | kEmptyEndLine;
|
||||||
else if (p < text.end() && p[0] == '\n')
|
else if (p < text.data() + text.size() && p[0] == '\n')
|
||||||
flags |= kEmptyEndLine;
|
flags |= kEmptyEndLine;
|
||||||
|
|
||||||
// \b and \B
|
// \b and \B
|
||||||
if (p == text.begin() && p == text.end()) {
|
if (p == text.data() && p == text.data() + text.size()) {
|
||||||
// no word boundary here
|
// no word boundary here
|
||||||
} else if (p == text.begin()) {
|
} else if (p == text.data()) {
|
||||||
if (IsWordChar(p[0]))
|
if (IsWordChar(p[0]))
|
||||||
flags |= kEmptyWordBoundary;
|
flags |= kEmptyWordBoundary;
|
||||||
} else if (p == text.end()) {
|
} else if (p == text.data() + text.size()) {
|
||||||
if (IsWordChar(p[-1]))
|
if (IsWordChar(p[-1]))
|
||||||
flags |= kEmptyWordBoundary;
|
flags |= kEmptyWordBoundary;
|
||||||
} else {
|
} else {
|
||||||
@ -918,4 +916,73 @@ void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
// Finds the least significant non-zero bit in n.
|
||||||
|
static int FindLSBSet(uint32_t n) {
|
||||||
|
DCHECK_NE(n, 0);
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
return __builtin_ctz(n);
|
||||||
|
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
||||||
|
unsigned long c;
|
||||||
|
_BitScanForward(&c, n);
|
||||||
|
return static_cast<int>(c);
|
||||||
|
#else
|
||||||
|
int c = 31;
|
||||||
|
for (int shift = 1 << 4; shift != 0; shift >>= 1) {
|
||||||
|
uint32_t word = n << shift;
|
||||||
|
if (word != 0) {
|
||||||
|
n = word;
|
||||||
|
c -= shift;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
|
||||||
|
DCHECK_GE(prefix_size_, 2);
|
||||||
|
if (size < prefix_size_)
|
||||||
|
return NULL;
|
||||||
|
// Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
|
||||||
|
// This also means that probing for prefix_back_ doesn't go out of bounds.
|
||||||
|
size -= prefix_size_-1;
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
// Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
|
||||||
|
if (size >= sizeof(__m256i)) {
|
||||||
|
const __m256i* fp = reinterpret_cast<const __m256i*>(
|
||||||
|
reinterpret_cast<const char*>(data));
|
||||||
|
const __m256i* bp = reinterpret_cast<const __m256i*>(
|
||||||
|
reinterpret_cast<const char*>(data) + prefix_size_-1);
|
||||||
|
const __m256i* endfp = fp + size/sizeof(__m256i);
|
||||||
|
const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
|
||||||
|
const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
|
||||||
|
while (fp != endfp) {
|
||||||
|
const __m256i f_loadu = _mm256_loadu_si256(fp++);
|
||||||
|
const __m256i b_loadu = _mm256_loadu_si256(bp++);
|
||||||
|
const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
|
||||||
|
const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
|
||||||
|
const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
|
||||||
|
if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
|
||||||
|
const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
|
||||||
|
const int fb_movemask = _mm256_movemask_epi8(fb_and);
|
||||||
|
const int fb_ctz = FindLSBSet(fb_movemask);
|
||||||
|
return reinterpret_cast<const char*>(fp-1) + fb_ctz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data = fp;
|
||||||
|
size = size%sizeof(__m256i);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const char* p0 = reinterpret_cast<const char*>(data);
|
||||||
|
for (const char* p = p0;; p++) {
|
||||||
|
DCHECK_GE(size, static_cast<size_t>(p-p0));
|
||||||
|
p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
|
||||||
|
if (p == NULL || p[prefix_size_-1] == prefix_back_)
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
36
extern/re2/re2/prog.h
vendored
36
extern/re2/re2/prog.h
vendored
@ -18,10 +18,10 @@
|
|||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/pod_array.h"
|
#include "re2/pod_array.h"
|
||||||
#include "util/sparse_array.h"
|
|
||||||
#include "util/sparse_set.h"
|
|
||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
|
#include "re2/sparse_array.h"
|
||||||
|
#include "re2/sparse_set.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -198,8 +198,8 @@ class Prog {
|
|||||||
|
|
||||||
Inst *inst(int id) { return &inst_[id]; }
|
Inst *inst(int id) { return &inst_[id]; }
|
||||||
int start() { return start_; }
|
int start() { return start_; }
|
||||||
int start_unanchored() { return start_unanchored_; }
|
|
||||||
void set_start(int start) { start_ = start; }
|
void set_start(int start) { start_ = start; }
|
||||||
|
int start_unanchored() { return start_unanchored_; }
|
||||||
void set_start_unanchored(int start) { start_unanchored_ = start; }
|
void set_start_unanchored(int start) { start_unanchored_ = start; }
|
||||||
int size() { return size_; }
|
int size() { return size_; }
|
||||||
bool reversed() { return reversed_; }
|
bool reversed() { return reversed_; }
|
||||||
@ -207,19 +207,27 @@ class Prog {
|
|||||||
int list_count() { return list_count_; }
|
int list_count() { return list_count_; }
|
||||||
int inst_count(InstOp op) { return inst_count_[op]; }
|
int inst_count(InstOp op) { return inst_count_[op]; }
|
||||||
uint16_t* list_heads() { return list_heads_.data(); }
|
uint16_t* list_heads() { return list_heads_.data(); }
|
||||||
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
|
|
||||||
int64_t dfa_mem() { return dfa_mem_; }
|
int64_t dfa_mem() { return dfa_mem_; }
|
||||||
int flags() { return flags_; }
|
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
|
||||||
void set_flags(int flags) { flags_ = flags; }
|
|
||||||
bool anchor_start() { return anchor_start_; }
|
bool anchor_start() { return anchor_start_; }
|
||||||
void set_anchor_start(bool b) { anchor_start_ = b; }
|
void set_anchor_start(bool b) { anchor_start_ = b; }
|
||||||
bool anchor_end() { return anchor_end_; }
|
bool anchor_end() { return anchor_end_; }
|
||||||
void set_anchor_end(bool b) { anchor_end_ = b; }
|
void set_anchor_end(bool b) { anchor_end_ = b; }
|
||||||
int bytemap_range() { return bytemap_range_; }
|
int bytemap_range() { return bytemap_range_; }
|
||||||
const uint8_t* bytemap() { return bytemap_; }
|
const uint8_t* bytemap() { return bytemap_; }
|
||||||
|
bool can_prefix_accel() { return prefix_size_ != 0; }
|
||||||
|
|
||||||
// Lazily computed.
|
// Accelerates to the first likely occurrence of the prefix.
|
||||||
int first_byte();
|
// Returns a pointer to the first byte or NULL if not found.
|
||||||
|
const void* PrefixAccel(const void* data, size_t size) {
|
||||||
|
DCHECK_GE(prefix_size_, 1);
|
||||||
|
return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
|
||||||
|
: PrefixAccel_FrontAndBack(data, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// An implementation of prefix accel that looks for prefix_front_ and
|
||||||
|
// prefix_back_ to return fewer false positives than memchr(3) alone.
|
||||||
|
const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
|
||||||
|
|
||||||
// Returns string representation of program for debugging.
|
// Returns string representation of program for debugging.
|
||||||
std::string Dump();
|
std::string Dump();
|
||||||
@ -297,10 +305,6 @@ class Prog {
|
|||||||
// Compute bytemap.
|
// Compute bytemap.
|
||||||
void ComputeByteMap();
|
void ComputeByteMap();
|
||||||
|
|
||||||
// Computes whether all matches must begin with the same first
|
|
||||||
// byte, and if so, returns that byte. If not, returns -1.
|
|
||||||
int ComputeFirstByte();
|
|
||||||
|
|
||||||
// Run peep-hole optimizer on program.
|
// Run peep-hole optimizer on program.
|
||||||
void Optimize();
|
void Optimize();
|
||||||
|
|
||||||
@ -402,8 +406,9 @@ class Prog {
|
|||||||
int start_unanchored_; // unanchored entry point for program
|
int start_unanchored_; // unanchored entry point for program
|
||||||
int size_; // number of instructions
|
int size_; // number of instructions
|
||||||
int bytemap_range_; // bytemap_[x] < bytemap_range_
|
int bytemap_range_; // bytemap_[x] < bytemap_range_
|
||||||
int first_byte_; // required first byte for match, or -1 if none
|
size_t prefix_size_; // size of prefix (0 if no prefix)
|
||||||
int flags_; // regexp parse flags
|
int prefix_front_; // first byte of prefix (-1 if no prefix)
|
||||||
|
int prefix_back_; // last byte of prefix (-1 if no prefix)
|
||||||
|
|
||||||
int list_count_; // count of lists (see above)
|
int list_count_; // count of lists (see above)
|
||||||
int inst_count_[kNumInst]; // count of instructions by opcode
|
int inst_count_[kNumInst]; // count of instructions by opcode
|
||||||
@ -419,7 +424,6 @@ class Prog {
|
|||||||
|
|
||||||
uint8_t bytemap_[256]; // map from input bytes to byte classes
|
uint8_t bytemap_[256]; // map from input bytes to byte classes
|
||||||
|
|
||||||
std::once_flag first_byte_once_;
|
|
||||||
std::once_flag dfa_first_once_;
|
std::once_flag dfa_first_once_;
|
||||||
std::once_flag dfa_longest_once_;
|
std::once_flag dfa_longest_once_;
|
||||||
|
|
||||||
|
409
extern/re2/re2/re2.cc
vendored
409
extern/re2/re2/re2.cc
vendored
@ -12,10 +12,14 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <atomic>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -24,11 +28,11 @@
|
|||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/sparse_array.h"
|
|
||||||
#include "util/strutil.h"
|
#include "util/strutil.h"
|
||||||
#include "util/utf.h"
|
#include "util/utf.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
|
#include "re2/sparse_array.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -79,6 +83,8 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
|
|||||||
return RE2::ErrorMissingBracket;
|
return RE2::ErrorMissingBracket;
|
||||||
case re2::kRegexpMissingParen:
|
case re2::kRegexpMissingParen:
|
||||||
return RE2::ErrorMissingParen;
|
return RE2::ErrorMissingParen;
|
||||||
|
case re2::kRegexpUnexpectedParen:
|
||||||
|
return RE2::ErrorUnexpectedParen;
|
||||||
case re2::kRegexpTrailingBackslash:
|
case re2::kRegexpTrailingBackslash:
|
||||||
return RE2::ErrorTrailingBackslash;
|
return RE2::ErrorTrailingBackslash;
|
||||||
case re2::kRegexpRepeatArgument:
|
case re2::kRegexpRepeatArgument:
|
||||||
@ -172,15 +178,20 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
|
|||||||
empty_group_names = new std::map<int, std::string>;
|
empty_group_names = new std::map<int, std::string>;
|
||||||
});
|
});
|
||||||
|
|
||||||
pattern_ = std::string(pattern);
|
pattern_.assign(pattern.data(), pattern.size());
|
||||||
options_.Copy(options);
|
options_.Copy(options);
|
||||||
entire_regexp_ = NULL;
|
entire_regexp_ = NULL;
|
||||||
|
error_ = empty_string;
|
||||||
|
error_code_ = NoError;
|
||||||
|
error_arg_.clear();
|
||||||
|
prefix_.clear();
|
||||||
|
prefix_foldcase_ = false;
|
||||||
suffix_regexp_ = NULL;
|
suffix_regexp_ = NULL;
|
||||||
prog_ = NULL;
|
prog_ = NULL;
|
||||||
num_captures_ = -1;
|
num_captures_ = -1;
|
||||||
|
is_one_pass_ = false;
|
||||||
|
|
||||||
rprog_ = NULL;
|
rprog_ = NULL;
|
||||||
error_ = empty_string;
|
|
||||||
error_code_ = NoError;
|
|
||||||
named_groups_ = NULL;
|
named_groups_ = NULL;
|
||||||
group_names_ = NULL;
|
group_names_ = NULL;
|
||||||
|
|
||||||
@ -239,9 +250,11 @@ re2::Prog* RE2::ReverseProg() const {
|
|||||||
if (re->rprog_ == NULL) {
|
if (re->rprog_ == NULL) {
|
||||||
if (re->options_.log_errors())
|
if (re->options_.log_errors())
|
||||||
LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
|
LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
|
||||||
re->error_ =
|
// We no longer touch error_ and error_code_ because failing to compile
|
||||||
new std::string("pattern too large - reverse compile failed");
|
// the reverse Prog is not a showstopper: falling back to NFA execution
|
||||||
re->error_code_ = RE2::ErrorPatternTooLarge;
|
// is fine. More importantly, an RE2 object is supposed to be logically
|
||||||
|
// immutable: whatever ok() would have returned after Init() completed,
|
||||||
|
// it should continue to return that no matter what ReverseProg() does.
|
||||||
}
|
}
|
||||||
}, this);
|
}, this);
|
||||||
return rprog_;
|
return rprog_;
|
||||||
@ -277,28 +290,54 @@ int RE2::ReverseProgramSize() const {
|
|||||||
return prog->size();
|
return prog->size();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Fanout(Prog* prog, std::map<int, int>* histogram) {
|
// Finds the most significant non-zero bit in n.
|
||||||
SparseArray<int> fanout(prog->size());
|
static int FindMSBSet(uint32_t n) {
|
||||||
prog->Fanout(&fanout);
|
DCHECK_NE(n, 0);
|
||||||
histogram->clear();
|
#if defined(__GNUC__)
|
||||||
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
|
return 31 ^ __builtin_clz(n);
|
||||||
// TODO(junyer): Optimise this?
|
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
||||||
int bucket = 0;
|
unsigned long c;
|
||||||
while (1 << bucket < i->value()) {
|
_BitScanReverse(&c, n);
|
||||||
bucket++;
|
return static_cast<int>(c);
|
||||||
|
#else
|
||||||
|
int c = 0;
|
||||||
|
for (int shift = 1 << 4; shift != 0; shift >>= 1) {
|
||||||
|
uint32_t word = n >> shift;
|
||||||
|
if (word != 0) {
|
||||||
|
n = word;
|
||||||
|
c += shift;
|
||||||
}
|
}
|
||||||
(*histogram)[bucket]++;
|
|
||||||
}
|
}
|
||||||
return histogram->rbegin()->first;
|
return c;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int RE2::ProgramFanout(std::map<int, int>* histogram) const {
|
static int Fanout(Prog* prog, std::vector<int>* histogram) {
|
||||||
|
SparseArray<int> fanout(prog->size());
|
||||||
|
prog->Fanout(&fanout);
|
||||||
|
int data[32] = {};
|
||||||
|
int size = 0;
|
||||||
|
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
|
||||||
|
if (i->value() == 0)
|
||||||
|
continue;
|
||||||
|
uint32_t value = i->value();
|
||||||
|
int bucket = FindMSBSet(value);
|
||||||
|
bucket += value & (value-1) ? 1 : 0;
|
||||||
|
++data[bucket];
|
||||||
|
size = std::max(size, bucket+1);
|
||||||
|
}
|
||||||
|
if (histogram != NULL)
|
||||||
|
histogram->assign(data, data+size);
|
||||||
|
return size-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int RE2::ProgramFanout(std::vector<int>* histogram) const {
|
||||||
if (prog_ == NULL)
|
if (prog_ == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
return Fanout(prog_, histogram);
|
return Fanout(prog_, histogram);
|
||||||
}
|
}
|
||||||
|
|
||||||
int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
|
int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
|
||||||
if (prog_ == NULL)
|
if (prog_ == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
Prog* prog = ReverseProg();
|
Prog* prog = ReverseProg();
|
||||||
@ -368,6 +407,8 @@ bool RE2::Replace(std::string* str,
|
|||||||
const StringPiece& rewrite) {
|
const StringPiece& rewrite) {
|
||||||
StringPiece vec[kVecSize];
|
StringPiece vec[kVecSize];
|
||||||
int nvec = 1 + MaxSubmatch(rewrite);
|
int nvec = 1 + MaxSubmatch(rewrite);
|
||||||
|
if (nvec > 1 + re.NumberOfCapturingGroups())
|
||||||
|
return false;
|
||||||
if (nvec > static_cast<int>(arraysize(vec)))
|
if (nvec > static_cast<int>(arraysize(vec)))
|
||||||
return false;
|
return false;
|
||||||
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
|
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
|
||||||
@ -377,8 +418,8 @@ bool RE2::Replace(std::string* str,
|
|||||||
if (!re.Rewrite(&s, rewrite, vec, nvec))
|
if (!re.Rewrite(&s, rewrite, vec, nvec))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
assert(vec[0].begin() >= str->data());
|
assert(vec[0].data() >= str->data());
|
||||||
assert(vec[0].end() <= str->data()+str->size());
|
assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
|
||||||
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
|
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -388,6 +429,8 @@ int RE2::GlobalReplace(std::string* str,
|
|||||||
const StringPiece& rewrite) {
|
const StringPiece& rewrite) {
|
||||||
StringPiece vec[kVecSize];
|
StringPiece vec[kVecSize];
|
||||||
int nvec = 1 + MaxSubmatch(rewrite);
|
int nvec = 1 + MaxSubmatch(rewrite);
|
||||||
|
if (nvec > 1 + re.NumberOfCapturingGroups())
|
||||||
|
return false;
|
||||||
if (nvec > static_cast<int>(arraysize(vec)))
|
if (nvec > static_cast<int>(arraysize(vec)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -406,9 +449,9 @@ int RE2::GlobalReplace(std::string* str,
|
|||||||
if (!re.Match(*str, static_cast<size_t>(p - str->data()),
|
if (!re.Match(*str, static_cast<size_t>(p - str->data()),
|
||||||
str->size(), UNANCHORED, vec, nvec))
|
str->size(), UNANCHORED, vec, nvec))
|
||||||
break;
|
break;
|
||||||
if (p < vec[0].begin())
|
if (p < vec[0].data())
|
||||||
out.append(p, vec[0].begin() - p);
|
out.append(p, vec[0].data() - p);
|
||||||
if (vec[0].begin() == lastend && vec[0].size() == 0) {
|
if (vec[0].data() == lastend && vec[0].empty()) {
|
||||||
// Disallow empty match at end of last match: skip ahead.
|
// Disallow empty match at end of last match: skip ahead.
|
||||||
//
|
//
|
||||||
// fullrune() takes int, not ptrdiff_t. However, it just looks
|
// fullrune() takes int, not ptrdiff_t. However, it just looks
|
||||||
@ -439,7 +482,7 @@ int RE2::GlobalReplace(std::string* str,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
re.Rewrite(&out, rewrite, vec, nvec);
|
re.Rewrite(&out, rewrite, vec, nvec);
|
||||||
p = vec[0].end();
|
p = vec[0].data() + vec[0].size();
|
||||||
lastend = p;
|
lastend = p;
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
@ -460,9 +503,10 @@ bool RE2::Extract(const StringPiece& text,
|
|||||||
std::string* out) {
|
std::string* out) {
|
||||||
StringPiece vec[kVecSize];
|
StringPiece vec[kVecSize];
|
||||||
int nvec = 1 + MaxSubmatch(rewrite);
|
int nvec = 1 + MaxSubmatch(rewrite);
|
||||||
|
if (nvec > 1 + re.NumberOfCapturingGroups())
|
||||||
|
return false;
|
||||||
if (nvec > static_cast<int>(arraysize(vec)))
|
if (nvec > static_cast<int>(arraysize(vec)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
|
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -610,6 +654,8 @@ bool RE2::Match(const StringPiece& text,
|
|||||||
// If the regexp is anchored explicitly, must not be in middle of text.
|
// If the regexp is anchored explicitly, must not be in middle of text.
|
||||||
if (prog_->anchor_start() && startpos != 0)
|
if (prog_->anchor_start() && startpos != 0)
|
||||||
return false;
|
return false;
|
||||||
|
if (prog_->anchor_end() && endpos != text.size())
|
||||||
|
return false;
|
||||||
|
|
||||||
// If the regexp is anchored explicitly, update re_anchor
|
// If the regexp is anchored explicitly, update re_anchor
|
||||||
// so that we can potentially fall into a faster case below.
|
// so that we can potentially fall into a faster case below.
|
||||||
@ -643,7 +689,6 @@ bool RE2::Match(const StringPiece& text,
|
|||||||
Prog::MatchKind kind = Prog::kFirstMatch;
|
Prog::MatchKind kind = Prog::kFirstMatch;
|
||||||
if (options_.longest_match())
|
if (options_.longest_match())
|
||||||
kind = Prog::kLongestMatch;
|
kind = Prog::kLongestMatch;
|
||||||
bool skipped_test = false;
|
|
||||||
|
|
||||||
bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
|
bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
|
||||||
|
|
||||||
@ -655,38 +700,82 @@ bool RE2::Match(const StringPiece& text,
|
|||||||
bool can_bit_state = prog_->CanBitState();
|
bool can_bit_state = prog_->CanBitState();
|
||||||
size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
|
size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
|
||||||
|
|
||||||
|
#ifdef RE2_HAVE_THREAD_LOCAL
|
||||||
|
hooks::context = this;
|
||||||
|
#endif
|
||||||
bool dfa_failed = false;
|
bool dfa_failed = false;
|
||||||
|
bool skipped_test = false;
|
||||||
switch (re_anchor) {
|
switch (re_anchor) {
|
||||||
default:
|
default:
|
||||||
|
LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
|
||||||
|
return false;
|
||||||
|
|
||||||
case UNANCHORED: {
|
case UNANCHORED: {
|
||||||
|
if (prog_->anchor_end()) {
|
||||||
|
// This is a very special case: we don't need the forward DFA because
|
||||||
|
// we already know where the match must end! Instead, the reverse DFA
|
||||||
|
// can say whether there is a match and (optionally) where it starts.
|
||||||
|
Prog* prog = ReverseProg();
|
||||||
|
if (prog == NULL) {
|
||||||
|
// Fall back to NFA below.
|
||||||
|
skipped_test = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
|
||||||
|
Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
|
||||||
|
if (dfa_failed) {
|
||||||
|
if (options_.log_errors())
|
||||||
|
LOG(ERROR) << "DFA out of memory: "
|
||||||
|
<< "pattern length " << pattern_.size() << ", "
|
||||||
|
<< "program size " << prog->size() << ", "
|
||||||
|
<< "list count " << prog->list_count() << ", "
|
||||||
|
<< "bytemap range " << prog->bytemap_range();
|
||||||
|
// Fall back to NFA below.
|
||||||
|
skipped_test = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (matchp == NULL) // Matched. Don't care where.
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!prog_->SearchDFA(subtext, text, anchor, kind,
|
if (!prog_->SearchDFA(subtext, text, anchor, kind,
|
||||||
matchp, &dfa_failed, NULL)) {
|
matchp, &dfa_failed, NULL)) {
|
||||||
if (dfa_failed) {
|
if (dfa_failed) {
|
||||||
if (options_.log_errors())
|
if (options_.log_errors())
|
||||||
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
|
LOG(ERROR) << "DFA out of memory: "
|
||||||
<< "bytemap range " << prog_->bytemap_range() << ", "
|
<< "pattern length " << pattern_.size() << ", "
|
||||||
<< "list count " << prog_->list_count();
|
<< "program size " << prog_->size() << ", "
|
||||||
|
<< "list count " << prog_->list_count() << ", "
|
||||||
|
<< "bytemap range " << prog_->bytemap_range();
|
||||||
// Fall back to NFA below.
|
// Fall back to NFA below.
|
||||||
skipped_test = true;
|
skipped_test = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (matchp == NULL) // Matched. Don't care where
|
if (matchp == NULL) // Matched. Don't care where.
|
||||||
return true;
|
return true;
|
||||||
// SearchDFA set match[0].end() but didn't know where the
|
// SearchDFA set match.end() but didn't know where the
|
||||||
// match started. Run the regexp backward from match[0].end()
|
// match started. Run the regexp backward from match.end()
|
||||||
// to find the longest possible match -- that's where it started.
|
// to find the longest possible match -- that's where it started.
|
||||||
Prog* prog = ReverseProg();
|
Prog* prog = ReverseProg();
|
||||||
if (prog == NULL)
|
if (prog == NULL) {
|
||||||
return false;
|
// Fall back to NFA below.
|
||||||
|
skipped_test = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (!prog->SearchDFA(match, text, Prog::kAnchored,
|
if (!prog->SearchDFA(match, text, Prog::kAnchored,
|
||||||
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
|
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
|
||||||
if (dfa_failed) {
|
if (dfa_failed) {
|
||||||
if (options_.log_errors())
|
if (options_.log_errors())
|
||||||
LOG(ERROR) << "DFA out of memory: size " << prog->size() << ", "
|
LOG(ERROR) << "DFA out of memory: "
|
||||||
<< "bytemap range " << prog->bytemap_range() << ", "
|
<< "pattern length " << pattern_.size() << ", "
|
||||||
<< "list count " << prog->list_count();
|
<< "program size " << prog->size() << ", "
|
||||||
|
<< "list count " << prog->list_count() << ", "
|
||||||
|
<< "bytemap range " << prog->bytemap_range();
|
||||||
// Fall back to NFA below.
|
// Fall back to NFA below.
|
||||||
skipped_test = true;
|
skipped_test = true;
|
||||||
break;
|
break;
|
||||||
@ -724,9 +813,11 @@ bool RE2::Match(const StringPiece& text,
|
|||||||
&match, &dfa_failed, NULL)) {
|
&match, &dfa_failed, NULL)) {
|
||||||
if (dfa_failed) {
|
if (dfa_failed) {
|
||||||
if (options_.log_errors())
|
if (options_.log_errors())
|
||||||
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
|
LOG(ERROR) << "DFA out of memory: "
|
||||||
<< "bytemap range " << prog_->bytemap_range() << ", "
|
<< "pattern length " << pattern_.size() << ", "
|
||||||
<< "list count " << prog_->list_count();
|
<< "program size " << prog_->size() << ", "
|
||||||
|
<< "list count " << prog_->list_count() << ", "
|
||||||
|
<< "bytemap range " << prog_->bytemap_range();
|
||||||
// Fall back to NFA below.
|
// Fall back to NFA below.
|
||||||
skipped_test = true;
|
skipped_test = true;
|
||||||
break;
|
break;
|
||||||
@ -928,13 +1019,13 @@ bool RE2::Rewrite(std::string* out,
|
|||||||
int n = (c - '0');
|
int n = (c - '0');
|
||||||
if (n >= veclen) {
|
if (n >= veclen) {
|
||||||
if (options_.log_errors()) {
|
if (options_.log_errors()) {
|
||||||
LOG(ERROR) << "requested group " << n
|
LOG(ERROR) << "invalid substitution \\" << n
|
||||||
<< " in regexp " << rewrite.data();
|
<< " from " << veclen << " groups";
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
StringPiece snip = vec[n];
|
StringPiece snip = vec[n];
|
||||||
if (snip.size() > 0)
|
if (!snip.empty())
|
||||||
out->append(snip.data(), snip.size());
|
out->append(snip.data(), snip.size());
|
||||||
} else if (c == '\\') {
|
} else if (c == '\\') {
|
||||||
out->push_back('\\');
|
out->push_back('\\');
|
||||||
@ -949,41 +1040,49 @@ bool RE2::Rewrite(std::string* out,
|
|||||||
|
|
||||||
/***** Parsers for various types *****/
|
/***** Parsers for various types *****/
|
||||||
|
|
||||||
bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
|
namespace re2_internal {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, void* dest) {
|
||||||
// We fail if somebody asked us to store into a non-NULL void* pointer
|
// We fail if somebody asked us to store into a non-NULL void* pointer
|
||||||
return (dest == NULL);
|
return (dest == NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, std::string* dest) {
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
reinterpret_cast<std::string*>(dest)->assign(str, n);
|
dest->assign(str, n);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, StringPiece* dest) {
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
|
*dest = StringPiece(str, n);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) {
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, char* dest) {
|
||||||
if (n != 1) return false;
|
if (n != 1) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<char*>(dest)) = str[0];
|
*dest = str[0];
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) {
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, signed char* dest) {
|
||||||
if (n != 1) return false;
|
if (n != 1) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<signed char*>(dest)) = str[0];
|
*dest = str[0];
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) {
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, unsigned char* dest) {
|
||||||
if (n != 1) return false;
|
if (n != 1) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<unsigned char*>(dest)) = str[0];
|
*dest = str[0];
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1047,10 +1146,40 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
|
|||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_long_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, float* dest) {
|
||||||
void* dest,
|
if (n == 0) return false;
|
||||||
int radix) {
|
static const int kMaxLength = 200;
|
||||||
|
char buf[kMaxLength+1];
|
||||||
|
str = TerminateNumber(buf, sizeof buf, str, &n, true);
|
||||||
|
char* end;
|
||||||
|
errno = 0;
|
||||||
|
float r = strtof(str, &end);
|
||||||
|
if (end != str + n) return false; // Leftover junk
|
||||||
|
if (errno) return false;
|
||||||
|
if (dest == NULL) return true;
|
||||||
|
*dest = r;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, double* dest) {
|
||||||
|
if (n == 0) return false;
|
||||||
|
static const int kMaxLength = 200;
|
||||||
|
char buf[kMaxLength+1];
|
||||||
|
str = TerminateNumber(buf, sizeof buf, str, &n, true);
|
||||||
|
char* end;
|
||||||
|
errno = 0;
|
||||||
|
double r = strtod(str, &end);
|
||||||
|
if (end != str + n) return false; // Leftover junk
|
||||||
|
if (errno) return false;
|
||||||
|
if (dest == NULL) return true;
|
||||||
|
*dest = r;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool Parse(const char* str, size_t n, long* dest, int radix) {
|
||||||
if (n == 0) return false;
|
if (n == 0) return false;
|
||||||
char buf[kMaxNumberLength+1];
|
char buf[kMaxNumberLength+1];
|
||||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||||
@ -1060,14 +1189,12 @@ bool RE2::Arg::parse_long_radix(const char* str,
|
|||||||
if (end != str + n) return false; // Leftover junk
|
if (end != str + n) return false; // Leftover junk
|
||||||
if (errno) return false;
|
if (errno) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<long*>(dest)) = r;
|
*dest = r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_ulong_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
|
||||||
void* dest,
|
|
||||||
int radix) {
|
|
||||||
if (n == 0) return false;
|
if (n == 0) return false;
|
||||||
char buf[kMaxNumberLength+1];
|
char buf[kMaxNumberLength+1];
|
||||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||||
@ -1083,62 +1210,52 @@ bool RE2::Arg::parse_ulong_radix(const char* str,
|
|||||||
if (end != str + n) return false; // Leftover junk
|
if (end != str + n) return false; // Leftover junk
|
||||||
if (errno) return false;
|
if (errno) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<unsigned long*>(dest)) = r;
|
*dest = r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_short_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, short* dest, int radix) {
|
||||||
void* dest,
|
|
||||||
int radix) {
|
|
||||||
long r;
|
long r;
|
||||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||||
if ((short)r != r) return false; // Out of range
|
if ((short)r != r) return false; // Out of range
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<short*>(dest)) = (short)r;
|
*dest = (short)r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_ushort_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
|
||||||
void* dest,
|
|
||||||
int radix) {
|
|
||||||
unsigned long r;
|
unsigned long r;
|
||||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||||
if ((unsigned short)r != r) return false; // Out of range
|
if ((unsigned short)r != r) return false; // Out of range
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
|
*dest = (unsigned short)r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_int_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, int* dest, int radix) {
|
||||||
void* dest,
|
|
||||||
int radix) {
|
|
||||||
long r;
|
long r;
|
||||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||||
if ((int)r != r) return false; // Out of range
|
if ((int)r != r) return false; // Out of range
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<int*>(dest)) = (int)r;
|
*dest = (int)r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_uint_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
|
||||||
void* dest,
|
|
||||||
int radix) {
|
|
||||||
unsigned long r;
|
unsigned long r;
|
||||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||||
if ((unsigned int)r != r) return false; // Out of range
|
if ((unsigned int)r != r) return false; // Out of range
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
|
*dest = (unsigned int)r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_longlong_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, long long* dest, int radix) {
|
||||||
void* dest,
|
|
||||||
int radix) {
|
|
||||||
if (n == 0) return false;
|
if (n == 0) return false;
|
||||||
char buf[kMaxNumberLength+1];
|
char buf[kMaxNumberLength+1];
|
||||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||||
@ -1148,14 +1265,12 @@ bool RE2::Arg::parse_longlong_radix(const char* str,
|
|||||||
if (end != str + n) return false; // Leftover junk
|
if (end != str + n) return false; // Leftover junk
|
||||||
if (errno) return false;
|
if (errno) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<long long*>(dest)) = r;
|
*dest = r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Arg::parse_ulonglong_radix(const char* str,
|
template <>
|
||||||
size_t n,
|
bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
|
||||||
void* dest,
|
|
||||||
int radix) {
|
|
||||||
if (n == 0) return false;
|
if (n == 0) return false;
|
||||||
char buf[kMaxNumberLength+1];
|
char buf[kMaxNumberLength+1];
|
||||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||||
@ -1170,67 +1285,47 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str,
|
|||||||
if (end != str + n) return false; // Leftover junk
|
if (end != str + n) return false; // Leftover junk
|
||||||
if (errno) return false;
|
if (errno) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
*(reinterpret_cast<unsigned long long*>(dest)) = r;
|
*dest = r;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool parse_double_float(const char* str, size_t n, bool isfloat,
|
} // namespace re2_internal
|
||||||
void* dest) {
|
|
||||||
if (n == 0) return false;
|
|
||||||
static const int kMaxLength = 200;
|
|
||||||
char buf[kMaxLength+1];
|
|
||||||
str = TerminateNumber(buf, sizeof buf, str, &n, true);
|
|
||||||
char* end;
|
|
||||||
errno = 0;
|
|
||||||
double r;
|
|
||||||
if (isfloat) {
|
|
||||||
r = strtof(str, &end);
|
|
||||||
} else {
|
|
||||||
r = strtod(str, &end);
|
|
||||||
}
|
|
||||||
if (end != str + n) return false; // Leftover junk
|
|
||||||
if (errno) return false;
|
|
||||||
if (dest == NULL) return true;
|
|
||||||
if (isfloat) {
|
|
||||||
*(reinterpret_cast<float*>(dest)) = (float)r;
|
|
||||||
} else {
|
|
||||||
*(reinterpret_cast<double*>(dest)) = r;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) {
|
namespace hooks {
|
||||||
return parse_double_float(str, n, false, dest);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
|
#ifdef RE2_HAVE_THREAD_LOCAL
|
||||||
return parse_double_float(str, n, true, dest);
|
thread_local const RE2* context = NULL;
|
||||||
}
|
#endif
|
||||||
|
|
||||||
#define DEFINE_INTEGER_PARSER(name) \
|
template <typename T>
|
||||||
bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) { \
|
union Hook {
|
||||||
return parse_##name##_radix(str, n, dest, 10); \
|
void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
|
||||||
} \
|
T* Load() const { return cb_.load(std::memory_order_acquire); }
|
||||||
bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
|
|
||||||
return parse_##name##_radix(str, n, dest, 16); \
|
|
||||||
} \
|
|
||||||
bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
|
|
||||||
return parse_##name##_radix(str, n, dest, 8); \
|
|
||||||
} \
|
|
||||||
bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n, \
|
|
||||||
void* dest) { \
|
|
||||||
return parse_##name##_radix(str, n, dest, 0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
DEFINE_INTEGER_PARSER(short);
|
#if !defined(__clang__) && defined(_MSC_VER)
|
||||||
DEFINE_INTEGER_PARSER(ushort);
|
// Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
|
||||||
DEFINE_INTEGER_PARSER(int);
|
// this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
|
||||||
DEFINE_INTEGER_PARSER(uint);
|
static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
|
||||||
DEFINE_INTEGER_PARSER(long);
|
"std::atomic<T*> must be always lock-free");
|
||||||
DEFINE_INTEGER_PARSER(ulong);
|
T* cb_for_constinit_;
|
||||||
DEFINE_INTEGER_PARSER(longlong);
|
#endif
|
||||||
DEFINE_INTEGER_PARSER(ulonglong);
|
|
||||||
|
|
||||||
#undef DEFINE_INTEGER_PARSER
|
std::atomic<T*> cb_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void DoNothing(const T&) {}
|
||||||
|
|
||||||
|
#define DEFINE_HOOK(type, name) \
|
||||||
|
static Hook<type##Callback> name##_hook = {{&DoNothing<type>}}; \
|
||||||
|
void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
|
||||||
|
type##Callback* Get##type##Hook() { return name##_hook.Load(); }
|
||||||
|
|
||||||
|
DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
|
||||||
|
DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
|
||||||
|
|
||||||
|
#undef DEFINE_HOOK
|
||||||
|
|
||||||
|
} // namespace hooks
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
421
extern/re2/re2/re2.h
vendored
421
extern/re2/re2/re2.h
vendored
@ -30,6 +30,19 @@
|
|||||||
// "(?i)hello" -- (?i) turns on case-insensitive matching
|
// "(?i)hello" -- (?i) turns on case-insensitive matching
|
||||||
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
|
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
|
||||||
//
|
//
|
||||||
|
// The double backslashes are needed when writing C++ string literals.
|
||||||
|
// However, they should NOT be used when writing C++11 raw string literals:
|
||||||
|
//
|
||||||
|
// R"(hello (\w+) world)" -- \w matches a "word" character
|
||||||
|
// R"(version (\d+))" -- \d matches a digit
|
||||||
|
// R"(hello\s+world)" -- \s matches any whitespace character
|
||||||
|
// R"(\b(\w+)\b)" -- \b matches non-empty string at word boundary
|
||||||
|
// R"((?i)hello)" -- (?i) turns on case-insensitive matching
|
||||||
|
// R"(/\*(.*?)\*/)" -- .*? matches . minimum no. of times possible
|
||||||
|
//
|
||||||
|
// When using UTF-8 encoding, case-insensitive matching will perform
|
||||||
|
// simple case folding, not full case folding.
|
||||||
|
//
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// MATCHING INTERFACE:
|
// MATCHING INTERFACE:
|
||||||
//
|
//
|
||||||
@ -195,6 +208,12 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
#include <TargetConditionals.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "re2/stringpiece.h"
|
#include "re2/stringpiece.h"
|
||||||
|
|
||||||
@ -229,6 +248,7 @@ class RE2 {
|
|||||||
ErrorBadCharRange, // bad character class range
|
ErrorBadCharRange, // bad character class range
|
||||||
ErrorMissingBracket, // missing closing ]
|
ErrorMissingBracket, // missing closing ]
|
||||||
ErrorMissingParen, // missing closing )
|
ErrorMissingParen, // missing closing )
|
||||||
|
ErrorUnexpectedParen, // unexpected closing )
|
||||||
ErrorTrailingBackslash, // trailing \ at end of regexp
|
ErrorTrailingBackslash, // trailing \ at end of regexp
|
||||||
ErrorRepeatArgument, // repeat argument missing, e.g. "*"
|
ErrorRepeatArgument, // repeat argument missing, e.g. "*"
|
||||||
ErrorRepeatSize, // bad repetition argument
|
ErrorRepeatSize, // bad repetition argument
|
||||||
@ -287,11 +307,11 @@ class RE2 {
|
|||||||
int ProgramSize() const;
|
int ProgramSize() const;
|
||||||
int ReverseProgramSize() const;
|
int ReverseProgramSize() const;
|
||||||
|
|
||||||
// EXPERIMENTAL! SUBJECT TO CHANGE!
|
// If histogram is not null, outputs the program fanout
|
||||||
// Outputs the program fanout as a histogram bucketed by powers of 2.
|
// as a histogram bucketed by powers of 2.
|
||||||
// Returns the number of the largest non-empty bucket.
|
// Returns the number of the largest non-empty bucket.
|
||||||
int ProgramFanout(std::map<int, int>* histogram) const;
|
int ProgramFanout(std::vector<int>* histogram) const;
|
||||||
int ReverseProgramFanout(std::map<int, int>* histogram) const;
|
int ReverseProgramFanout(std::vector<int>* histogram) const;
|
||||||
|
|
||||||
// Returns the underlying Regexp; not for general use.
|
// Returns the underlying Regexp; not for general use.
|
||||||
// Returns entire_regexp_ so that callers don't need
|
// Returns entire_regexp_ so that callers don't need
|
||||||
@ -349,12 +369,12 @@ class RE2 {
|
|||||||
// (void*)NULL (the corresponding matched sub-pattern is not copied)
|
// (void*)NULL (the corresponding matched sub-pattern is not copied)
|
||||||
//
|
//
|
||||||
// Returns true iff all of the following conditions are satisfied:
|
// Returns true iff all of the following conditions are satisfied:
|
||||||
// a. "text" matches "re" exactly
|
// a. "text" matches "re" fully - from the beginning to the end of "text".
|
||||||
// b. The number of matched sub-patterns is >= number of supplied pointers
|
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||||
// c. The "i"th argument has a suitable type for holding the
|
// c. The "i"th argument has a suitable type for holding the
|
||||||
// string captured as the "i"th sub-pattern. If you pass in
|
// string captured as the "i"th sub-pattern. If you pass in
|
||||||
// NULL for the "i"th argument, or pass fewer arguments than
|
// NULL for the "i"th argument, or pass fewer arguments than
|
||||||
// number of sub-patterns, "i"th captured sub-pattern is
|
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||||
// ignored.
|
// ignored.
|
||||||
//
|
//
|
||||||
// CAVEAT: An optional sub-pattern that does not exist in the
|
// CAVEAT: An optional sub-pattern that does not exist in the
|
||||||
@ -368,8 +388,17 @@ class RE2 {
|
|||||||
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
|
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exactly like FullMatch(), except that "re" is allowed to match
|
// Like FullMatch(), except that "re" is allowed to match a substring
|
||||||
// a substring of "text".
|
// of "text".
|
||||||
|
//
|
||||||
|
// Returns true iff all of the following conditions are satisfied:
|
||||||
|
// a. "text" matches "re" partially - for some substring of "text".
|
||||||
|
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||||
|
// c. The "i"th argument has a suitable type for holding the
|
||||||
|
// string captured as the "i"th sub-pattern. If you pass in
|
||||||
|
// NULL for the "i"th argument, or pass fewer arguments than
|
||||||
|
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||||
|
// ignored.
|
||||||
template <typename... A>
|
template <typename... A>
|
||||||
static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
|
static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
|
||||||
return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
|
return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
|
||||||
@ -378,7 +407,16 @@ class RE2 {
|
|||||||
// Like FullMatch() and PartialMatch(), except that "re" has to match
|
// Like FullMatch() and PartialMatch(), except that "re" has to match
|
||||||
// a prefix of the text, and "input" is advanced past the matched
|
// a prefix of the text, and "input" is advanced past the matched
|
||||||
// text. Note: "input" is modified iff this routine returns true
|
// text. Note: "input" is modified iff this routine returns true
|
||||||
// and "re" matched a non-empty substring of "text".
|
// and "re" matched a non-empty substring of "input".
|
||||||
|
//
|
||||||
|
// Returns true iff all of the following conditions are satisfied:
|
||||||
|
// a. "input" matches "re" partially - for some prefix of "input".
|
||||||
|
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||||
|
// c. The "i"th argument has a suitable type for holding the
|
||||||
|
// string captured as the "i"th sub-pattern. If you pass in
|
||||||
|
// NULL for the "i"th argument, or pass fewer arguments than
|
||||||
|
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||||
|
// ignored.
|
||||||
template <typename... A>
|
template <typename... A>
|
||||||
static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
|
static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
|
||||||
return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
|
return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
|
||||||
@ -388,6 +426,15 @@ class RE2 {
|
|||||||
// the text. That is, "re" need not start its match at the beginning
|
// the text. That is, "re" need not start its match at the beginning
|
||||||
// of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds
|
// of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds
|
||||||
// the next word in "s" and stores it in "word".
|
// the next word in "s" and stores it in "word".
|
||||||
|
//
|
||||||
|
// Returns true iff all of the following conditions are satisfied:
|
||||||
|
// a. "input" matches "re" partially - for some substring of "input".
|
||||||
|
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||||
|
// c. The "i"th argument has a suitable type for holding the
|
||||||
|
// string captured as the "i"th sub-pattern. If you pass in
|
||||||
|
// NULL for the "i"th argument, or pass fewer arguments than
|
||||||
|
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||||
|
// ignored.
|
||||||
template <typename... A>
|
template <typename... A>
|
||||||
static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
|
static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
|
||||||
return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
|
return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
|
||||||
@ -443,7 +490,7 @@ class RE2 {
|
|||||||
|
|
||||||
// Escapes all potentially meaningful regexp characters in
|
// Escapes all potentially meaningful regexp characters in
|
||||||
// 'unquoted'. The returned string, used as a regular expression,
|
// 'unquoted'. The returned string, used as a regular expression,
|
||||||
// will exactly match the original string. For example,
|
// will match exactly the original string. For example,
|
||||||
// 1.5-2.0?
|
// 1.5-2.0?
|
||||||
// may become:
|
// may become:
|
||||||
// 1\.5\-2\.0\?
|
// 1\.5\-2\.0\?
|
||||||
@ -626,17 +673,6 @@ class RE2 {
|
|||||||
Encoding encoding() const { return encoding_; }
|
Encoding encoding() const { return encoding_; }
|
||||||
void set_encoding(Encoding encoding) { encoding_ = encoding; }
|
void set_encoding(Encoding encoding) { encoding_ = encoding; }
|
||||||
|
|
||||||
// Legacy interface to encoding.
|
|
||||||
// TODO(rsc): Remove once clients have been converted.
|
|
||||||
bool utf8() const { return encoding_ == EncodingUTF8; }
|
|
||||||
void set_utf8(bool b) {
|
|
||||||
if (b) {
|
|
||||||
encoding_ = EncodingUTF8;
|
|
||||||
} else {
|
|
||||||
encoding_ = EncodingLatin1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool posix_syntax() const { return posix_syntax_; }
|
bool posix_syntax() const { return posix_syntax_; }
|
||||||
void set_posix_syntax(bool b) { posix_syntax_ = b; }
|
void set_posix_syntax(bool b) { posix_syntax_ = b; }
|
||||||
|
|
||||||
@ -699,32 +735,12 @@ class RE2 {
|
|||||||
const Options& options() const { return options_; }
|
const Options& options() const { return options_; }
|
||||||
|
|
||||||
// Argument converters; see below.
|
// Argument converters; see below.
|
||||||
static inline Arg CRadix(short* x);
|
template <typename T>
|
||||||
static inline Arg CRadix(unsigned short* x);
|
static Arg CRadix(T* ptr);
|
||||||
static inline Arg CRadix(int* x);
|
template <typename T>
|
||||||
static inline Arg CRadix(unsigned int* x);
|
static Arg Hex(T* ptr);
|
||||||
static inline Arg CRadix(long* x);
|
template <typename T>
|
||||||
static inline Arg CRadix(unsigned long* x);
|
static Arg Octal(T* ptr);
|
||||||
static inline Arg CRadix(long long* x);
|
|
||||||
static inline Arg CRadix(unsigned long long* x);
|
|
||||||
|
|
||||||
static inline Arg Hex(short* x);
|
|
||||||
static inline Arg Hex(unsigned short* x);
|
|
||||||
static inline Arg Hex(int* x);
|
|
||||||
static inline Arg Hex(unsigned int* x);
|
|
||||||
static inline Arg Hex(long* x);
|
|
||||||
static inline Arg Hex(unsigned long* x);
|
|
||||||
static inline Arg Hex(long long* x);
|
|
||||||
static inline Arg Hex(unsigned long long* x);
|
|
||||||
|
|
||||||
static inline Arg Octal(short* x);
|
|
||||||
static inline Arg Octal(unsigned short* x);
|
|
||||||
static inline Arg Octal(int* x);
|
|
||||||
static inline Arg Octal(unsigned int* x);
|
|
||||||
static inline Arg Octal(long* x);
|
|
||||||
static inline Arg Octal(unsigned long* x);
|
|
||||||
static inline Arg Octal(long long* x);
|
|
||||||
static inline Arg Octal(unsigned long long* x);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Init(const StringPiece& pattern, const Options& options);
|
void Init(const StringPiece& pattern, const Options& options);
|
||||||
@ -737,29 +753,26 @@ class RE2 {
|
|||||||
|
|
||||||
re2::Prog* ReverseProg() const;
|
re2::Prog* ReverseProg() const;
|
||||||
|
|
||||||
std::string pattern_; // string regular expression
|
std::string pattern_; // string regular expression
|
||||||
Options options_; // option flags
|
Options options_; // option flags
|
||||||
std::string prefix_; // required prefix (before regexp_)
|
re2::Regexp* entire_regexp_; // parsed regular expression
|
||||||
bool prefix_foldcase_; // prefix is ASCII case-insensitive
|
const std::string* error_; // error indicator (or points to empty string)
|
||||||
re2::Regexp* entire_regexp_; // parsed regular expression
|
ErrorCode error_code_; // error code
|
||||||
re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed
|
std::string error_arg_; // fragment of regexp showing error
|
||||||
re2::Prog* prog_; // compiled program for regexp
|
std::string prefix_; // required prefix (before suffix_regexp_)
|
||||||
int num_captures_; // Number of capturing groups
|
bool prefix_foldcase_; // prefix_ is ASCII case-insensitive
|
||||||
bool is_one_pass_; // can use prog_->SearchOnePass?
|
re2::Regexp* suffix_regexp_; // parsed regular expression, prefix_ removed
|
||||||
|
re2::Prog* prog_; // compiled program for regexp
|
||||||
mutable re2::Prog* rprog_; // reverse program for regexp
|
int num_captures_; // number of capturing groups
|
||||||
mutable const std::string* error_; // Error indicator
|
bool is_one_pass_; // can use prog_->SearchOnePass?
|
||||||
// (or points to empty string)
|
|
||||||
mutable ErrorCode error_code_; // Error code
|
|
||||||
mutable std::string error_arg_; // Fragment of regexp showing error
|
|
||||||
|
|
||||||
|
// Reverse Prog for DFA execution only
|
||||||
|
mutable re2::Prog* rprog_;
|
||||||
// Map from capture names to indices
|
// Map from capture names to indices
|
||||||
mutable const std::map<std::string, int>* named_groups_;
|
mutable const std::map<std::string, int>* named_groups_;
|
||||||
|
|
||||||
// Map from capture indices to names
|
// Map from capture indices to names
|
||||||
mutable const std::map<int, std::string>* group_names_;
|
mutable const std::map<int, std::string>* group_names_;
|
||||||
|
|
||||||
// Onces for lazy computations.
|
|
||||||
mutable std::once_flag rprog_once_;
|
mutable std::once_flag rprog_once_;
|
||||||
mutable std::once_flag named_groups_once_;
|
mutable std::once_flag named_groups_once_;
|
||||||
mutable std::once_flag group_names_once_;
|
mutable std::once_flag group_names_once_;
|
||||||
@ -770,137 +783,134 @@ class RE2 {
|
|||||||
|
|
||||||
/***** Implementation details *****/
|
/***** Implementation details *****/
|
||||||
|
|
||||||
// Hex/Octal/Binary?
|
namespace re2_internal {
|
||||||
|
|
||||||
// Special class for parsing into objects that define a ParseFrom() method
|
// Types for which the 3-ary Parse() function template has specializations.
|
||||||
template <class T>
|
template <typename T> struct Parse3ary : public std::false_type {};
|
||||||
class _RE2_MatchObject {
|
template <> struct Parse3ary<void> : public std::true_type {};
|
||||||
public:
|
template <> struct Parse3ary<std::string> : public std::true_type {};
|
||||||
static inline bool Parse(const char* str, size_t n, void* dest) {
|
template <> struct Parse3ary<StringPiece> : public std::true_type {};
|
||||||
if (dest == NULL) return true;
|
template <> struct Parse3ary<char> : public std::true_type {};
|
||||||
T* object = reinterpret_cast<T*>(dest);
|
template <> struct Parse3ary<signed char> : public std::true_type {};
|
||||||
return object->ParseFrom(str, n);
|
template <> struct Parse3ary<unsigned char> : public std::true_type {};
|
||||||
}
|
template <> struct Parse3ary<float> : public std::true_type {};
|
||||||
};
|
template <> struct Parse3ary<double> : public std::true_type {};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool Parse(const char* str, size_t n, T* dest);
|
||||||
|
|
||||||
|
// Types for which the 4-ary Parse() function template has specializations.
|
||||||
|
template <typename T> struct Parse4ary : public std::false_type {};
|
||||||
|
template <> struct Parse4ary<long> : public std::true_type {};
|
||||||
|
template <> struct Parse4ary<unsigned long> : public std::true_type {};
|
||||||
|
template <> struct Parse4ary<short> : public std::true_type {};
|
||||||
|
template <> struct Parse4ary<unsigned short> : public std::true_type {};
|
||||||
|
template <> struct Parse4ary<int> : public std::true_type {};
|
||||||
|
template <> struct Parse4ary<unsigned int> : public std::true_type {};
|
||||||
|
template <> struct Parse4ary<long long> : public std::true_type {};
|
||||||
|
template <> struct Parse4ary<unsigned long long> : public std::true_type {};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool Parse(const char* str, size_t n, T* dest, int radix);
|
||||||
|
|
||||||
|
} // namespace re2_internal
|
||||||
|
|
||||||
class RE2::Arg {
|
class RE2::Arg {
|
||||||
public:
|
private:
|
||||||
// Empty constructor so we can declare arrays of RE2::Arg
|
template <typename T>
|
||||||
Arg();
|
using CanParse3ary = typename std::enable_if<
|
||||||
|
re2_internal::Parse3ary<T>::value,
|
||||||
|
int>::type;
|
||||||
|
|
||||||
// Constructor specially designed for NULL arguments
|
template <typename T>
|
||||||
Arg(void*);
|
using CanParse4ary = typename std::enable_if<
|
||||||
Arg(std::nullptr_t);
|
re2_internal::Parse4ary<T>::value,
|
||||||
|
int>::type;
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
template <typename T>
|
||||||
|
using CanParseFrom = typename std::enable_if<
|
||||||
|
std::is_member_function_pointer<
|
||||||
|
decltype(static_cast<bool (T::*)(const char*, size_t)>(
|
||||||
|
&T::ParseFrom))>::value,
|
||||||
|
int>::type;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
public:
|
||||||
|
Arg() : Arg(nullptr) {}
|
||||||
|
Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {}
|
||||||
|
|
||||||
|
template <typename T, CanParse3ary<T> = 0>
|
||||||
|
Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {}
|
||||||
|
|
||||||
|
template <typename T, CanParse4ary<T> = 0>
|
||||||
|
Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
template <typename T, CanParseFrom<T> = 0>
|
||||||
|
Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef bool (*Parser)(const char* str, size_t n, void* dest);
|
typedef bool (*Parser)(const char* str, size_t n, void* dest);
|
||||||
|
|
||||||
// Type-specific parsers
|
template <typename T>
|
||||||
#define MAKE_PARSER(type, name) \
|
Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {}
|
||||||
Arg(type* p) : arg_(p), parser_(name) {} \
|
|
||||||
Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
|
|
||||||
|
|
||||||
MAKE_PARSER(char, parse_char)
|
bool Parse(const char* str, size_t n) const {
|
||||||
MAKE_PARSER(signed char, parse_schar)
|
return (*parser_)(str, n, arg_);
|
||||||
MAKE_PARSER(unsigned char, parse_uchar)
|
|
||||||
MAKE_PARSER(float, parse_float)
|
|
||||||
MAKE_PARSER(double, parse_double)
|
|
||||||
MAKE_PARSER(std::string, parse_string)
|
|
||||||
MAKE_PARSER(StringPiece, parse_stringpiece)
|
|
||||||
|
|
||||||
MAKE_PARSER(short, parse_short)
|
|
||||||
MAKE_PARSER(unsigned short, parse_ushort)
|
|
||||||
MAKE_PARSER(int, parse_int)
|
|
||||||
MAKE_PARSER(unsigned int, parse_uint)
|
|
||||||
MAKE_PARSER(long, parse_long)
|
|
||||||
MAKE_PARSER(unsigned long, parse_ulong)
|
|
||||||
MAKE_PARSER(long long, parse_longlong)
|
|
||||||
MAKE_PARSER(unsigned long long, parse_ulonglong)
|
|
||||||
|
|
||||||
#undef MAKE_PARSER
|
|
||||||
|
|
||||||
// Generic constructor templates
|
|
||||||
template <class T> Arg(T* p)
|
|
||||||
: arg_(p), parser_(_RE2_MatchObject<T>::Parse) { }
|
|
||||||
template <class T> Arg(T* p, Parser parser)
|
|
||||||
: arg_(p), parser_(parser) { }
|
|
||||||
|
|
||||||
// Parse the data
|
|
||||||
bool Parse(const char* str, size_t n) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void* arg_;
|
|
||||||
Parser parser_;
|
|
||||||
|
|
||||||
static bool parse_null (const char* str, size_t n, void* dest);
|
|
||||||
static bool parse_char (const char* str, size_t n, void* dest);
|
|
||||||
static bool parse_schar (const char* str, size_t n, void* dest);
|
|
||||||
static bool parse_uchar (const char* str, size_t n, void* dest);
|
|
||||||
static bool parse_float (const char* str, size_t n, void* dest);
|
|
||||||
static bool parse_double (const char* str, size_t n, void* dest);
|
|
||||||
static bool parse_string (const char* str, size_t n, void* dest);
|
|
||||||
static bool parse_stringpiece (const char* str, size_t n, void* dest);
|
|
||||||
|
|
||||||
#define DECLARE_INTEGER_PARSER(name) \
|
|
||||||
private: \
|
|
||||||
static bool parse_##name(const char* str, size_t n, void* dest); \
|
|
||||||
static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
|
|
||||||
int radix); \
|
|
||||||
\
|
|
||||||
public: \
|
|
||||||
static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
|
|
||||||
static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
|
|
||||||
static bool parse_##name##_cradix(const char* str, size_t n, void* dest);
|
|
||||||
|
|
||||||
DECLARE_INTEGER_PARSER(short)
|
|
||||||
DECLARE_INTEGER_PARSER(ushort)
|
|
||||||
DECLARE_INTEGER_PARSER(int)
|
|
||||||
DECLARE_INTEGER_PARSER(uint)
|
|
||||||
DECLARE_INTEGER_PARSER(long)
|
|
||||||
DECLARE_INTEGER_PARSER(ulong)
|
|
||||||
DECLARE_INTEGER_PARSER(longlong)
|
|
||||||
DECLARE_INTEGER_PARSER(ulonglong)
|
|
||||||
|
|
||||||
#undef DECLARE_INTEGER_PARSER
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
|
|
||||||
inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
|
|
||||||
inline RE2::Arg::Arg(std::nullptr_t p) : arg_(p), parser_(parse_null) { }
|
|
||||||
|
|
||||||
inline bool RE2::Arg::Parse(const char* str, size_t n) const {
|
|
||||||
return (*parser_)(str, n, arg_);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This part of the parser, appropriate only for ints, deals with bases
|
|
||||||
#define MAKE_INTEGER_PARSER(type, name) \
|
|
||||||
inline RE2::Arg RE2::Hex(type* ptr) { \
|
|
||||||
return RE2::Arg(ptr, RE2::Arg::parse_##name##_hex); \
|
|
||||||
} \
|
|
||||||
inline RE2::Arg RE2::Octal(type* ptr) { \
|
|
||||||
return RE2::Arg(ptr, RE2::Arg::parse_##name##_octal); \
|
|
||||||
} \
|
|
||||||
inline RE2::Arg RE2::CRadix(type* ptr) { \
|
|
||||||
return RE2::Arg(ptr, RE2::Arg::parse_##name##_cradix); \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MAKE_INTEGER_PARSER(short, short)
|
private:
|
||||||
MAKE_INTEGER_PARSER(unsigned short, ushort)
|
static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) {
|
||||||
MAKE_INTEGER_PARSER(int, int)
|
return true;
|
||||||
MAKE_INTEGER_PARSER(unsigned int, uint)
|
}
|
||||||
MAKE_INTEGER_PARSER(long, long)
|
|
||||||
MAKE_INTEGER_PARSER(unsigned long, ulong)
|
|
||||||
MAKE_INTEGER_PARSER(long long, longlong)
|
|
||||||
MAKE_INTEGER_PARSER(unsigned long long, ulonglong)
|
|
||||||
|
|
||||||
#undef MAKE_INTEGER_PARSER
|
template <typename T>
|
||||||
|
static bool DoParse3ary(const char* str, size_t n, void* dest) {
|
||||||
|
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static bool DoParse4ary(const char* str, size_t n, void* dest) {
|
||||||
|
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
template <typename T>
|
||||||
|
static bool DoParseFrom(const char* str, size_t n, void* dest) {
|
||||||
|
if (dest == NULL) return true;
|
||||||
|
return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void* arg_;
|
||||||
|
Parser parser_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline RE2::Arg RE2::CRadix(T* ptr) {
|
||||||
|
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
|
||||||
|
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline RE2::Arg RE2::Hex(T* ptr) {
|
||||||
|
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
|
||||||
|
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline RE2::Arg RE2::Octal(T* ptr) {
|
||||||
|
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
|
||||||
|
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef SWIG
|
#ifndef SWIG
|
||||||
|
|
||||||
// Silence warnings about missing initializers for members of LazyRE2.
|
// Silence warnings about missing initializers for members of LazyRE2.
|
||||||
// Note that we test for Clang first because it defines __GNUC__ as well.
|
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
|
||||||
#if defined(__clang__)
|
|
||||||
#elif defined(__GNUC__) && __GNUC__ >= 6
|
|
||||||
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -949,7 +959,52 @@ class LazyRE2 {
|
|||||||
|
|
||||||
void operator=(const LazyRE2&); // disallowed
|
void operator=(const LazyRE2&); // disallowed
|
||||||
};
|
};
|
||||||
#endif // SWIG
|
#endif
|
||||||
|
|
||||||
|
namespace hooks {
|
||||||
|
|
||||||
|
// Most platforms support thread_local. Older versions of iOS don't support
|
||||||
|
// thread_local, but for the sake of brevity, we lump together all versions
|
||||||
|
// of Apple platforms that aren't macOS. If an iOS application really needs
|
||||||
|
// the context pointee someday, we can get more specific then...
|
||||||
|
#define RE2_HAVE_THREAD_LOCAL
|
||||||
|
#if defined(__APPLE__) && !TARGET_OS_OSX
|
||||||
|
#undef RE2_HAVE_THREAD_LOCAL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// A hook must not make any assumptions regarding the lifetime of the context
|
||||||
|
// pointee beyond the current invocation of the hook. Pointers and references
|
||||||
|
// obtained via the context pointee should be considered invalidated when the
|
||||||
|
// hook returns. Hence, any data about the context pointee (e.g. its pattern)
|
||||||
|
// would have to be copied in order for it to be kept for an indefinite time.
|
||||||
|
//
|
||||||
|
// A hook must not use RE2 for matching. Control flow reentering RE2::Match()
|
||||||
|
// could result in infinite mutual recursion. To discourage that possibility,
|
||||||
|
// RE2 will not maintain the context pointer correctly when used in that way.
|
||||||
|
#ifdef RE2_HAVE_THREAD_LOCAL
|
||||||
|
extern thread_local const RE2* context;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct DFAStateCacheReset {
|
||||||
|
int64_t state_budget;
|
||||||
|
size_t state_cache_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DFASearchFailure {
|
||||||
|
// Nothing yet...
|
||||||
|
};
|
||||||
|
|
||||||
|
#define DECLARE_HOOK(type) \
|
||||||
|
using type##Callback = void(const type&); \
|
||||||
|
void Set##type##Hook(type##Callback* cb); \
|
||||||
|
type##Callback* Get##type##Hook();
|
||||||
|
|
||||||
|
DECLARE_HOOK(DFAStateCacheReset)
|
||||||
|
DECLARE_HOOK(DFASearchFailure)
|
||||||
|
|
||||||
|
#undef DECLARE_HOOK
|
||||||
|
|
||||||
|
} // namespace hooks
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
|
||||||
|
150
extern/re2/re2/regexp.cc
vendored
150
extern/re2/re2/regexp.cc
vendored
@ -20,6 +20,7 @@
|
|||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/mutex.h"
|
#include "util/mutex.h"
|
||||||
#include "util/utf.h"
|
#include "util/utf.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/stringpiece.h"
|
#include "re2/stringpiece.h"
|
||||||
#include "re2/walker-inl.h"
|
#include "re2/walker-inl.h"
|
||||||
|
|
||||||
@ -243,16 +244,15 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
|
|||||||
return new Regexp(kRegexpEmptyMatch, flags);
|
return new Regexp(kRegexpEmptyMatch, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
Regexp** subcopy = NULL;
|
PODArray<Regexp*> subcopy;
|
||||||
if (op == kRegexpAlternate && can_factor) {
|
if (op == kRegexpAlternate && can_factor) {
|
||||||
// Going to edit sub; make a copy so we don't step on caller.
|
// Going to edit sub; make a copy so we don't step on caller.
|
||||||
subcopy = new Regexp*[nsub];
|
subcopy = PODArray<Regexp*>(nsub);
|
||||||
memmove(subcopy, sub, nsub * sizeof sub[0]);
|
memmove(subcopy.data(), sub, nsub * sizeof sub[0]);
|
||||||
sub = subcopy;
|
sub = subcopy.data();
|
||||||
nsub = FactorAlternation(sub, nsub, flags);
|
nsub = FactorAlternation(sub, nsub, flags);
|
||||||
if (nsub == 1) {
|
if (nsub == 1) {
|
||||||
Regexp* re = sub[0];
|
Regexp* re = sub[0];
|
||||||
delete[] subcopy;
|
|
||||||
return re;
|
return re;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -269,7 +269,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
|
|||||||
subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,
|
subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,
|
||||||
nsub - (nbigsub-1)*kMaxNsub, flags,
|
nsub - (nbigsub-1)*kMaxNsub, flags,
|
||||||
false);
|
false);
|
||||||
delete[] subcopy;
|
|
||||||
return re;
|
return re;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -278,8 +277,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
|
|||||||
Regexp** subs = re->sub();
|
Regexp** subs = re->sub();
|
||||||
for (int i = 0; i < nsub; i++)
|
for (int i = 0; i < nsub; i++)
|
||||||
subs[i] = sub[i];
|
subs[i] = sub[i];
|
||||||
|
|
||||||
delete[] subcopy;
|
|
||||||
return re;
|
return re;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -501,6 +498,7 @@ static const char *kErrorStrings[] = {
|
|||||||
"invalid character class range",
|
"invalid character class range",
|
||||||
"missing ]",
|
"missing ]",
|
||||||
"missing )",
|
"missing )",
|
||||||
|
"unexpected )",
|
||||||
"trailing \\",
|
"trailing \\",
|
||||||
"no argument for repetition operator",
|
"no argument for repetition operator",
|
||||||
"invalid repetition size",
|
"invalid repetition size",
|
||||||
@ -544,9 +542,12 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> {
|
|||||||
ncapture_++;
|
ncapture_++;
|
||||||
return ignored;
|
return ignored;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||||
// Should never be called: we use Walk not WalkExponential.
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
|
LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return ignored;
|
return ignored;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -575,7 +576,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
|
|||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
||||||
if (re->op() == kRegexpCapture && re->name() != NULL) {
|
if (re->op() == kRegexpCapture && re->name() != NULL) {
|
||||||
// Allocate map once we find a name.
|
// Allocate map once we find a name.
|
||||||
if (map_ == NULL)
|
if (map_ == NULL)
|
||||||
@ -591,8 +592,10 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||||
// Should never be called: we use Walk not WalkExponential.
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
|
LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return ignored;
|
return ignored;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -621,7 +624,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
|
|||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
||||||
if (re->op() == kRegexpCapture && re->name() != NULL) {
|
if (re->op() == kRegexpCapture && re->name() != NULL) {
|
||||||
// Allocate map once we find a name.
|
// Allocate map once we find a name.
|
||||||
if (map_ == NULL)
|
if (map_ == NULL)
|
||||||
@ -633,8 +636,10 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||||
// Should never be called: we use Walk not WalkExponential.
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
|
LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return ignored;
|
return ignored;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -651,78 +656,89 @@ std::map<int, std::string>* Regexp::CaptureNames() {
|
|||||||
return w.TakeMap();
|
return w.TakeMap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes,
|
||||||
|
std::string* bytes) {
|
||||||
|
if (latin1) {
|
||||||
|
bytes->resize(nrunes);
|
||||||
|
for (int i = 0; i < nrunes; i++)
|
||||||
|
(*bytes)[i] = static_cast<char>(runes[i]);
|
||||||
|
} else {
|
||||||
|
bytes->resize(nrunes * UTFmax); // worst case
|
||||||
|
char* p = &(*bytes)[0];
|
||||||
|
for (int i = 0; i < nrunes; i++)
|
||||||
|
p += runetochar(p, &runes[i]);
|
||||||
|
bytes->resize(p - &(*bytes)[0]);
|
||||||
|
bytes->shrink_to_fit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Determines whether regexp matches must be anchored
|
// Determines whether regexp matches must be anchored
|
||||||
// with a fixed string prefix. If so, returns the prefix and
|
// with a fixed string prefix. If so, returns the prefix and
|
||||||
// the regexp that remains after the prefix. The prefix might
|
// the regexp that remains after the prefix. The prefix might
|
||||||
// be ASCII case-insensitive.
|
// be ASCII case-insensitive.
|
||||||
bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
|
bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
|
||||||
Regexp** suffix) {
|
Regexp** suffix) {
|
||||||
|
prefix->clear();
|
||||||
|
*foldcase = false;
|
||||||
|
*suffix = NULL;
|
||||||
|
|
||||||
// No need for a walker: the regexp must be of the form
|
// No need for a walker: the regexp must be of the form
|
||||||
// 1. some number of ^ anchors
|
// 1. some number of ^ anchors
|
||||||
// 2. a literal char or string
|
// 2. a literal char or string
|
||||||
// 3. the rest
|
// 3. the rest
|
||||||
prefix->clear();
|
|
||||||
*foldcase = false;
|
|
||||||
*suffix = NULL;
|
|
||||||
if (op_ != kRegexpConcat)
|
if (op_ != kRegexpConcat)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Some number of anchors, then a literal or concatenation.
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
Regexp** sub = this->sub();
|
while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
|
||||||
while (i < nsub_ && sub[i]->op_ == kRegexpBeginText)
|
|
||||||
i++;
|
i++;
|
||||||
if (i == 0 || i >= nsub_)
|
if (i == 0 || i >= nsub_)
|
||||||
return false;
|
return false;
|
||||||
|
Regexp* re = sub()[i];
|
||||||
Regexp* re = sub[i];
|
if (re->op_ != kRegexpLiteral &&
|
||||||
switch (re->op_) {
|
re->op_ != kRegexpLiteralString)
|
||||||
default:
|
return false;
|
||||||
return false;
|
|
||||||
|
|
||||||
case kRegexpLiteralString:
|
|
||||||
// Convert to string in proper encoding.
|
|
||||||
if (re->parse_flags() & Latin1) {
|
|
||||||
prefix->resize(re->nrunes_);
|
|
||||||
for (int j = 0; j < re->nrunes_; j++)
|
|
||||||
(*prefix)[j] = static_cast<char>(re->runes_[j]);
|
|
||||||
} else {
|
|
||||||
// Convert to UTF-8 in place.
|
|
||||||
// Assume worst-case space and then trim.
|
|
||||||
prefix->resize(re->nrunes_ * UTFmax);
|
|
||||||
char *p = &(*prefix)[0];
|
|
||||||
for (int j = 0; j < re->nrunes_; j++) {
|
|
||||||
Rune r = re->runes_[j];
|
|
||||||
if (r < Runeself)
|
|
||||||
*p++ = static_cast<char>(r);
|
|
||||||
else
|
|
||||||
p += runetochar(p, &r);
|
|
||||||
}
|
|
||||||
prefix->resize(p - &(*prefix)[0]);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case kRegexpLiteral:
|
|
||||||
if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) {
|
|
||||||
prefix->append(1, static_cast<char>(re->rune_));
|
|
||||||
} else {
|
|
||||||
char buf[UTFmax];
|
|
||||||
prefix->append(buf, runetochar(buf, &re->rune_));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
*foldcase = (sub[i]->parse_flags() & FoldCase) != 0;
|
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
// The rest.
|
|
||||||
if (i < nsub_) {
|
if (i < nsub_) {
|
||||||
for (int j = i; j < nsub_; j++)
|
for (int j = i; j < nsub_; j++)
|
||||||
sub[j]->Incref();
|
sub()[j]->Incref();
|
||||||
re = Concat(sub + i, nsub_ - i, parse_flags());
|
*suffix = Concat(sub() + i, nsub_ - i, parse_flags());
|
||||||
} else {
|
} else {
|
||||||
re = new Regexp(kRegexpEmptyMatch, parse_flags());
|
*suffix = new Regexp(kRegexpEmptyMatch, parse_flags());
|
||||||
}
|
}
|
||||||
*suffix = re;
|
|
||||||
|
bool latin1 = (re->parse_flags() & Latin1) != 0;
|
||||||
|
Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
|
||||||
|
int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
|
||||||
|
ConvertRunesToBytes(latin1, runes, nrunes, prefix);
|
||||||
|
*foldcase = (re->parse_flags() & FoldCase) != 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determines whether regexp matches must be unanchored
|
||||||
|
// with a fixed string prefix. If so, returns the prefix.
|
||||||
|
// The prefix might be ASCII case-insensitive.
|
||||||
|
bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) {
|
||||||
|
prefix->clear();
|
||||||
|
*foldcase = false;
|
||||||
|
|
||||||
|
// No need for a walker: the regexp must either begin with or be
|
||||||
|
// a literal char or string. We "see through" capturing groups,
|
||||||
|
// but make no effort to glue multiple prefix fragments together.
|
||||||
|
Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
|
||||||
|
while (re->op_ == kRegexpCapture) {
|
||||||
|
re = re->sub()[0];
|
||||||
|
if (re->op_ == kRegexpConcat && re->nsub_ > 0)
|
||||||
|
re = re->sub()[0];
|
||||||
|
}
|
||||||
|
if (re->op_ != kRegexpLiteral &&
|
||||||
|
re->op_ != kRegexpLiteralString)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
bool latin1 = (re->parse_flags() & Latin1) != 0;
|
||||||
|
Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
|
||||||
|
int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
|
||||||
|
ConvertRunesToBytes(latin1, runes, nrunes, prefix);
|
||||||
|
*foldcase = (re->parse_flags() & FoldCase) != 0;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -903,7 +919,7 @@ void CharClassBuilder::Negate() {
|
|||||||
// The ranges are allocated in the same block as the header,
|
// The ranges are allocated in the same block as the header,
|
||||||
// necessitating a special allocator and Delete method.
|
// necessitating a special allocator and Delete method.
|
||||||
|
|
||||||
CharClass* CharClass::New(int maxranges) {
|
CharClass* CharClass::New(size_t maxranges) {
|
||||||
CharClass* cc;
|
CharClass* cc;
|
||||||
uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
|
uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
|
||||||
cc = reinterpret_cast<CharClass*>(data);
|
cc = reinterpret_cast<CharClass*>(data);
|
||||||
@ -920,7 +936,7 @@ void CharClass::Delete() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CharClass* CharClass::Negate() {
|
CharClass* CharClass::Negate() {
|
||||||
CharClass* cc = CharClass::New(nranges_+1);
|
CharClass* cc = CharClass::New(static_cast<size_t>(nranges_+1));
|
||||||
cc->folds_ascii_ = folds_ascii_;
|
cc->folds_ascii_ = folds_ascii_;
|
||||||
cc->nrunes_ = Runemax + 1 - nrunes_;
|
cc->nrunes_ = Runemax + 1 - nrunes_;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
@ -957,7 +973,7 @@ bool CharClass::Contains(Rune r) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CharClass* CharClassBuilder::GetCharClass() {
|
CharClass* CharClassBuilder::GetCharClass() {
|
||||||
CharClass* cc = CharClass::New(static_cast<int>(ranges_.size()));
|
CharClass* cc = CharClass::New(ranges_.size());
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (iterator it = begin(); it != end(); ++it)
|
for (iterator it = begin(); it != end(); ++it)
|
||||||
cc->ranges_[n++] = *it;
|
cc->ranges_[n++] = *it;
|
||||||
|
11
extern/re2/re2/regexp.h
vendored
11
extern/re2/re2/regexp.h
vendored
@ -86,6 +86,7 @@
|
|||||||
// form accessible to clients, so that client code can analyze the
|
// form accessible to clients, so that client code can analyze the
|
||||||
// parsed regular expressions.
|
// parsed regular expressions.
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
@ -177,6 +178,7 @@ enum RegexpStatusCode {
|
|||||||
kRegexpBadCharRange, // bad character class range
|
kRegexpBadCharRange, // bad character class range
|
||||||
kRegexpMissingBracket, // missing closing ]
|
kRegexpMissingBracket, // missing closing ]
|
||||||
kRegexpMissingParen, // missing closing )
|
kRegexpMissingParen, // missing closing )
|
||||||
|
kRegexpUnexpectedParen, // unexpected closing )
|
||||||
kRegexpTrailingBackslash, // at end of regexp
|
kRegexpTrailingBackslash, // at end of regexp
|
||||||
kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
|
kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
|
||||||
kRegexpRepeatSize, // bad repetition argument
|
kRegexpRepeatSize, // bad repetition argument
|
||||||
@ -258,7 +260,7 @@ class CharClass {
|
|||||||
private:
|
private:
|
||||||
CharClass(); // not implemented
|
CharClass(); // not implemented
|
||||||
~CharClass(); // not implemented
|
~CharClass(); // not implemented
|
||||||
static CharClass* New(int maxranges);
|
static CharClass* New(size_t maxranges);
|
||||||
|
|
||||||
friend class CharClassBuilder;
|
friend class CharClassBuilder;
|
||||||
|
|
||||||
@ -440,6 +442,13 @@ class Regexp {
|
|||||||
bool RequiredPrefix(std::string* prefix, bool* foldcase,
|
bool RequiredPrefix(std::string* prefix, bool* foldcase,
|
||||||
Regexp** suffix);
|
Regexp** suffix);
|
||||||
|
|
||||||
|
// Whether every match of this regexp must be unanchored and
|
||||||
|
// begin with a non-empty fixed string (perhaps after ASCII
|
||||||
|
// case-folding). If so, returns the prefix.
|
||||||
|
// Callers should expect *prefix and *foldcase to be "zeroed"
|
||||||
|
// regardless of the return value.
|
||||||
|
bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Constructor allocates vectors as appropriate for operator.
|
// Constructor allocates vectors as appropriate for operator.
|
||||||
explicit Regexp(RegexpOp op, ParseFlags parse_flags);
|
explicit Regexp(RegexpOp op, ParseFlags parse_flags);
|
||||||
|
48
extern/re2/re2/set.cc
vendored
48
extern/re2/re2/set.cc
vendored
@ -7,30 +7,49 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/pod_array.h"
|
#include "re2/pod_array.h"
|
||||||
#include "re2/stringpiece.h"
|
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
|
#include "re2/stringpiece.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
|
RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
|
||||||
options_.Copy(options);
|
: options_(options),
|
||||||
|
anchor_(anchor),
|
||||||
|
compiled_(false),
|
||||||
|
size_(0) {
|
||||||
options_.set_never_capture(true); // might unblock some optimisations
|
options_.set_never_capture(true); // might unblock some optimisations
|
||||||
anchor_ = anchor;
|
|
||||||
prog_ = NULL;
|
|
||||||
compiled_ = false;
|
|
||||||
size_ = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RE2::Set::~Set() {
|
RE2::Set::~Set() {
|
||||||
for (size_t i = 0; i < elem_.size(); i++)
|
for (size_t i = 0; i < elem_.size(); i++)
|
||||||
elem_[i].second->Decref();
|
elem_[i].second->Decref();
|
||||||
delete prog_;
|
}
|
||||||
|
|
||||||
|
RE2::Set::Set(Set&& other)
|
||||||
|
: options_(other.options_),
|
||||||
|
anchor_(other.anchor_),
|
||||||
|
elem_(std::move(other.elem_)),
|
||||||
|
compiled_(other.compiled_),
|
||||||
|
size_(other.size_),
|
||||||
|
prog_(std::move(other.prog_)) {
|
||||||
|
other.elem_.clear();
|
||||||
|
other.elem_.shrink_to_fit();
|
||||||
|
other.compiled_ = false;
|
||||||
|
other.size_ = 0;
|
||||||
|
other.prog_.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
RE2::Set& RE2::Set::operator=(Set&& other) {
|
||||||
|
this->~Set();
|
||||||
|
(void) new (this) Set(std::move(other));
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
|
int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
|
||||||
@ -97,9 +116,9 @@ bool RE2::Set::Compile() {
|
|||||||
options_.ParseFlags());
|
options_.ParseFlags());
|
||||||
re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
|
re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
|
||||||
|
|
||||||
prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
|
prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
|
||||||
re->Decref();
|
re->Decref();
|
||||||
return prog_ != NULL;
|
return prog_ != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
|
bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
|
||||||
@ -124,9 +143,10 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
|
|||||||
NULL, &dfa_failed, matches.get());
|
NULL, &dfa_failed, matches.get());
|
||||||
if (dfa_failed) {
|
if (dfa_failed) {
|
||||||
if (options_.log_errors())
|
if (options_.log_errors())
|
||||||
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
|
LOG(ERROR) << "DFA out of memory: "
|
||||||
<< "bytemap range " << prog_->bytemap_range() << ", "
|
<< "program size " << prog_->size() << ", "
|
||||||
<< "list count " << prog_->list_count();
|
<< "list count " << prog_->list_count() << ", "
|
||||||
|
<< "bytemap range " << prog_->bytemap_range();
|
||||||
if (error_info != NULL)
|
if (error_info != NULL)
|
||||||
error_info->kind = kOutOfMemory;
|
error_info->kind = kOutOfMemory;
|
||||||
return false;
|
return false;
|
||||||
|
13
extern/re2/re2/set.h
vendored
13
extern/re2/re2/set.h
vendored
@ -5,6 +5,7 @@
|
|||||||
#ifndef RE2_SET_H_
|
#ifndef RE2_SET_H_
|
||||||
#define RE2_SET_H_
|
#define RE2_SET_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -36,6 +37,13 @@ class RE2::Set {
|
|||||||
Set(const RE2::Options& options, RE2::Anchor anchor);
|
Set(const RE2::Options& options, RE2::Anchor anchor);
|
||||||
~Set();
|
~Set();
|
||||||
|
|
||||||
|
// Not copyable.
|
||||||
|
Set(const Set&) = delete;
|
||||||
|
Set& operator=(const Set&) = delete;
|
||||||
|
// Movable.
|
||||||
|
Set(Set&& other);
|
||||||
|
Set& operator=(Set&& other);
|
||||||
|
|
||||||
// Adds pattern to the set using the options passed to the constructor.
|
// Adds pattern to the set using the options passed to the constructor.
|
||||||
// Returns the index that will identify the regexp in the output of Match(),
|
// Returns the index that will identify the regexp in the output of Match(),
|
||||||
// or -1 if the regexp cannot be parsed.
|
// or -1 if the regexp cannot be parsed.
|
||||||
@ -67,12 +75,9 @@ class RE2::Set {
|
|||||||
RE2::Options options_;
|
RE2::Options options_;
|
||||||
RE2::Anchor anchor_;
|
RE2::Anchor anchor_;
|
||||||
std::vector<Elem> elem_;
|
std::vector<Elem> elem_;
|
||||||
re2::Prog* prog_;
|
|
||||||
bool compiled_;
|
bool compiled_;
|
||||||
int size_;
|
int size_;
|
||||||
|
std::unique_ptr<re2::Prog> prog_;
|
||||||
Set(const Set&) = delete;
|
|
||||||
Set& operator=(const Set&) = delete;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
26
extern/re2/re2/simplify.cc
vendored
26
extern/re2/re2/simplify.cc
vendored
@ -10,8 +10,8 @@
|
|||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/pod_array.h"
|
|
||||||
#include "util/utf.h"
|
#include "util/utf.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
#include "re2/walker-inl.h"
|
#include "re2/walker-inl.h"
|
||||||
|
|
||||||
@ -28,8 +28,6 @@ bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
|
|||||||
Regexp* sre = re->Simplify();
|
Regexp* sre = re->Simplify();
|
||||||
re->Decref();
|
re->Decref();
|
||||||
if (sre == NULL) {
|
if (sre == NULL) {
|
||||||
// Should not happen, since Simplify never fails.
|
|
||||||
LOG(ERROR) << "Simplify failed on " << src;
|
|
||||||
if (status) {
|
if (status) {
|
||||||
status->set_code(kRegexpInternalError);
|
status->set_code(kRegexpInternalError);
|
||||||
status->set_error_arg(src);
|
status->set_error_arg(src);
|
||||||
@ -180,10 +178,20 @@ Regexp* Regexp::Simplify() {
|
|||||||
CoalesceWalker cw;
|
CoalesceWalker cw;
|
||||||
Regexp* cre = cw.Walk(this, NULL);
|
Regexp* cre = cw.Walk(this, NULL);
|
||||||
if (cre == NULL)
|
if (cre == NULL)
|
||||||
return cre;
|
return NULL;
|
||||||
|
if (cw.stopped_early()) {
|
||||||
|
cre->Decref();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
SimplifyWalker sw;
|
SimplifyWalker sw;
|
||||||
Regexp* sre = sw.Walk(cre, NULL);
|
Regexp* sre = sw.Walk(cre, NULL);
|
||||||
cre->Decref();
|
cre->Decref();
|
||||||
|
if (sre == NULL)
|
||||||
|
return NULL;
|
||||||
|
if (sw.stopped_early()) {
|
||||||
|
sre->Decref();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
return sre;
|
return sre;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -212,9 +220,10 @@ Regexp* CoalesceWalker::Copy(Regexp* re) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
||||||
// This should never be called, since we use Walk and not
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
// WalkExponential.
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
|
LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return re->Incref();
|
return re->Incref();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -437,9 +446,10 @@ Regexp* SimplifyWalker::Copy(Regexp* re) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
||||||
// This should never be called, since we use Walk and not
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
// WalkExponential.
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
|
LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return re->Incref();
|
return re->Incref();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#ifndef UTIL_SPARSE_ARRAY_H_
|
#ifndef RE2_SPARSE_ARRAY_H_
|
||||||
#define UTIL_SPARSE_ARRAY_H_
|
#define RE2_SPARSE_ARRAY_H_
|
||||||
|
|
||||||
// DESCRIPTION
|
// DESCRIPTION
|
||||||
//
|
//
|
||||||
@ -102,7 +102,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "util/pod_array.h"
|
#include "re2/pod_array.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -389,4 +389,4 @@ template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,
|
|||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
|
||||||
#endif // UTIL_SPARSE_ARRAY_H_
|
#endif // RE2_SPARSE_ARRAY_H_
|
@ -2,8 +2,8 @@
|
|||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#ifndef UTIL_SPARSE_SET_H_
|
#ifndef RE2_SPARSE_SET_H_
|
||||||
#define UTIL_SPARSE_SET_H_
|
#define RE2_SPARSE_SET_H_
|
||||||
|
|
||||||
// DESCRIPTION
|
// DESCRIPTION
|
||||||
//
|
//
|
||||||
@ -61,7 +61,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "util/pod_array.h"
|
#include "re2/pod_array.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -261,4 +261,4 @@ typedef SparseSetT<void> SparseSet;
|
|||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
|
||||||
#endif // UTIL_SPARSE_SET_H_
|
#endif // RE2_SPARSE_SET_H_
|
54
extern/re2/re2/testing/backtrack.cc
vendored
54
extern/re2/re2/testing/backtrack.cc
vendored
@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
|
#include "re2/pod_array.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
|
|
||||||
@ -53,7 +54,6 @@ namespace re2 {
|
|||||||
class Backtracker {
|
class Backtracker {
|
||||||
public:
|
public:
|
||||||
explicit Backtracker(Prog* prog);
|
explicit Backtracker(Prog* prog);
|
||||||
~Backtracker();
|
|
||||||
|
|
||||||
bool Search(const StringPiece& text, const StringPiece& context,
|
bool Search(const StringPiece& text, const StringPiece& context,
|
||||||
bool anchored, bool longest,
|
bool anchored, bool longest,
|
||||||
@ -79,9 +79,11 @@ class Backtracker {
|
|||||||
int nsubmatch_; // # of submatches to fill in
|
int nsubmatch_; // # of submatches to fill in
|
||||||
|
|
||||||
// Search state
|
// Search state
|
||||||
const char* cap_[64]; // capture registers
|
const char* cap_[64]; // capture registers
|
||||||
uint32_t *visited_; // bitmap: (Inst*, char*) pairs already backtracked
|
PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited
|
||||||
size_t nvisited_; // # of words in bitmap
|
|
||||||
|
Backtracker(const Backtracker&) = delete;
|
||||||
|
Backtracker& operator=(const Backtracker&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
Backtracker::Backtracker(Prog* prog)
|
Backtracker::Backtracker(Prog* prog)
|
||||||
@ -90,13 +92,7 @@ Backtracker::Backtracker(Prog* prog)
|
|||||||
longest_(false),
|
longest_(false),
|
||||||
endmatch_(false),
|
endmatch_(false),
|
||||||
submatch_(NULL),
|
submatch_(NULL),
|
||||||
nsubmatch_(0),
|
nsubmatch_(0) {
|
||||||
visited_(NULL),
|
|
||||||
nvisited_(0) {
|
|
||||||
}
|
|
||||||
|
|
||||||
Backtracker::~Backtracker() {
|
|
||||||
delete[] visited_;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Runs a backtracking search.
|
// Runs a backtracking search.
|
||||||
@ -105,7 +101,7 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
|
|||||||
StringPiece* submatch, int nsubmatch) {
|
StringPiece* submatch, int nsubmatch) {
|
||||||
text_ = text;
|
text_ = text;
|
||||||
context_ = context;
|
context_ = context;
|
||||||
if (context_.begin() == NULL)
|
if (context_.data() == NULL)
|
||||||
context_ = text;
|
context_ = text;
|
||||||
if (prog_->anchor_start() && text.begin() > context_.begin())
|
if (prog_->anchor_start() && text.begin() > context_.begin())
|
||||||
return false;
|
return false;
|
||||||
@ -130,24 +126,28 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
|
|||||||
|
|
||||||
// Allocate new visited_ bitmap -- size is proportional
|
// Allocate new visited_ bitmap -- size is proportional
|
||||||
// to text, so have to reallocate on each call to Search.
|
// to text, so have to reallocate on each call to Search.
|
||||||
delete[] visited_;
|
int nvisited = prog_->size() * static_cast<int>(text.size()+1);
|
||||||
nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
|
nvisited = (nvisited + 31) / 32;
|
||||||
visited_ = new uint32_t[nvisited_];
|
visited_ = PODArray<uint32_t>(nvisited);
|
||||||
memset(visited_, 0, nvisited_*sizeof visited_[0]);
|
memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
|
||||||
|
|
||||||
// Anchored search must start at text.begin().
|
// Anchored search must start at text.begin().
|
||||||
if (anchored_) {
|
if (anchored_) {
|
||||||
cap_[0] = text.begin();
|
cap_[0] = text.data();
|
||||||
return Visit(prog_->start(), text.begin());
|
return Visit(prog_->start(), text.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unanchored search, starting from each possible text position.
|
// Unanchored search, starting from each possible text position.
|
||||||
// Notice that we have to try the empty string at the end of
|
// Notice that we have to try the empty string at the end of
|
||||||
// the text, so the loop condition is p <= text.end(), not p < text.end().
|
// the text, so the loop condition is p <= text.end(), not p < text.end().
|
||||||
for (const char* p = text.begin(); p <= text.end(); p++) {
|
for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
|
||||||
cap_[0] = p;
|
cap_[0] = p;
|
||||||
if (Visit(prog_->start(), p)) // Match must be leftmost; done.
|
if (Visit(prog_->start(), p)) // Match must be leftmost; done.
|
||||||
return true;
|
return true;
|
||||||
|
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||||
|
// by simply not continuing the loop.
|
||||||
|
if (p == NULL)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -158,9 +158,10 @@ bool Backtracker::Visit(int id, const char* p) {
|
|||||||
// Check bitmap. If we've already explored from here,
|
// Check bitmap. If we've already explored from here,
|
||||||
// either it didn't match or it did but we're hoping for a better match.
|
// either it didn't match or it did but we're hoping for a better match.
|
||||||
// Either way, don't go down that road again.
|
// Either way, don't go down that road again.
|
||||||
CHECK(p <= text_.end());
|
CHECK(p <= text_.data() + text_.size());
|
||||||
size_t n = id*(text_.size()+1) + (p - text_.begin());
|
int n = id * static_cast<int>(text_.size()+1) +
|
||||||
CHECK_LT(n/32, nvisited_);
|
static_cast<int>(p-text_.data());
|
||||||
|
CHECK_LT(n/32, visited_.size());
|
||||||
if (visited_[n/32] & (1 << (n&31)))
|
if (visited_[n/32] & (1 << (n&31)))
|
||||||
return false;
|
return false;
|
||||||
visited_[n/32] |= 1 << (n&31);
|
visited_[n/32] |= 1 << (n&31);
|
||||||
@ -182,7 +183,7 @@ bool Backtracker::Try(int id, const char* p) {
|
|||||||
// Pick out byte at current position. If at end of string,
|
// Pick out byte at current position. If at end of string,
|
||||||
// have to explore in hope of finishing a match. Use impossible byte -1.
|
// have to explore in hope of finishing a match. Use impossible byte -1.
|
||||||
int c = -1;
|
int c = -1;
|
||||||
if (p < text_.end())
|
if (p < text_.data() + text_.size())
|
||||||
c = *p & 0xFF;
|
c = *p & 0xFF;
|
||||||
|
|
||||||
Prog::Inst* ip = prog_->inst(id);
|
Prog::Inst* ip = prog_->inst(id);
|
||||||
@ -224,11 +225,12 @@ bool Backtracker::Try(int id, const char* p) {
|
|||||||
case kInstMatch:
|
case kInstMatch:
|
||||||
// We found a match. If it's the best so far, record the
|
// We found a match. If it's the best so far, record the
|
||||||
// parameters in the caller's submatch_ array.
|
// parameters in the caller's submatch_ array.
|
||||||
if (endmatch_ && p != context_.end())
|
if (endmatch_ && p != context_.data() + context_.size())
|
||||||
return false;
|
return false;
|
||||||
cap_[1] = p;
|
cap_[1] = p;
|
||||||
if (submatch_[0].data() == NULL || // First match so far ...
|
if (submatch_[0].data() == NULL ||
|
||||||
(longest_ && p > submatch_[0].end())) { // ... or better match
|
(longest_ && p > submatch_[0].data() + submatch_[0].size())) {
|
||||||
|
// First match so far - or better match.
|
||||||
for (int i = 0; i < nsubmatch_; i++)
|
for (int i = 0; i < nsubmatch_; i++)
|
||||||
submatch_[i] = StringPiece(
|
submatch_[i] = StringPiece(
|
||||||
cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
|
cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
|
||||||
|
4
extern/re2/re2/testing/charclass_test.cc
vendored
4
extern/re2/re2/testing/charclass_test.cc
vendored
@ -85,7 +85,7 @@ static CCTest tests[] = {
|
|||||||
{ {-1} } },
|
{ {-1} } },
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class CharClass>
|
template <typename CharClass>
|
||||||
static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
|
static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
|
||||||
if (t == NULL) {
|
if (t == NULL) {
|
||||||
printf("\t%s:", desc);
|
printf("\t%s:", desc);
|
||||||
@ -136,7 +136,7 @@ void Delete(CharClassBuilder* cc) {
|
|||||||
delete cc;
|
delete cc;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class CharClass>
|
template <typename CharClass>
|
||||||
bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
|
bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
|
||||||
typename CharClass::iterator it = cc->begin();
|
typename CharClass::iterator it = cc->begin();
|
||||||
int size = 0;
|
int size = 0;
|
||||||
|
60
extern/re2/re2/testing/compile_test.cc
vendored
60
extern/re2/re2/testing/compile_test.cc
vendored
@ -147,10 +147,19 @@ static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
|
|||||||
Regexp* re = Regexp::Parse(pattern, flags, NULL);
|
Regexp* re = Regexp::Parse(pattern, flags, NULL);
|
||||||
EXPECT_TRUE(re != NULL);
|
EXPECT_TRUE(re != NULL);
|
||||||
|
|
||||||
Prog* prog = re->CompileToProg(0);
|
{
|
||||||
EXPECT_TRUE(prog != NULL);
|
Prog* prog = re->CompileToProg(0);
|
||||||
*bytemap = prog->DumpByteMap();
|
EXPECT_TRUE(prog != NULL);
|
||||||
delete prog;
|
*bytemap = prog->DumpByteMap();
|
||||||
|
delete prog;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Prog* prog = re->CompileToReverseProg(0);
|
||||||
|
EXPECT_TRUE(prog != NULL);
|
||||||
|
EXPECT_EQ(*bytemap, prog->DumpByteMap());
|
||||||
|
delete prog;
|
||||||
|
}
|
||||||
|
|
||||||
re->Decref();
|
re->Decref();
|
||||||
}
|
}
|
||||||
@ -213,16 +222,11 @@ TEST(TestCompile, UTF8Ranges) {
|
|||||||
EXPECT_EQ("[00-09] -> 0\n"
|
EXPECT_EQ("[00-09] -> 0\n"
|
||||||
"[0a-0a] -> 1\n"
|
"[0a-0a] -> 1\n"
|
||||||
"[0b-7f] -> 0\n"
|
"[0b-7f] -> 0\n"
|
||||||
"[80-8f] -> 2\n"
|
"[80-bf] -> 2\n"
|
||||||
"[90-9f] -> 3\n"
|
|
||||||
"[a0-bf] -> 4\n"
|
|
||||||
"[c0-c1] -> 1\n"
|
"[c0-c1] -> 1\n"
|
||||||
"[c2-df] -> 5\n"
|
"[c2-df] -> 3\n"
|
||||||
"[e0-e0] -> 6\n"
|
"[e0-ef] -> 4\n"
|
||||||
"[e1-ef] -> 7\n"
|
"[f0-f4] -> 5\n"
|
||||||
"[f0-f0] -> 8\n"
|
|
||||||
"[f1-f3] -> 9\n"
|
|
||||||
"[f4-f4] -> 10\n"
|
|
||||||
"[f5-ff] -> 1\n",
|
"[f5-ff] -> 1\n",
|
||||||
bytemap);
|
bytemap);
|
||||||
}
|
}
|
||||||
@ -232,7 +236,7 @@ TEST(TestCompile, InsufficientMemory) {
|
|||||||
"^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
|
"^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
|
||||||
Regexp::LikePerl, NULL);
|
Regexp::LikePerl, NULL);
|
||||||
EXPECT_TRUE(re != NULL);
|
EXPECT_TRUE(re != NULL);
|
||||||
Prog* prog = re->CompileToProg(920);
|
Prog* prog = re->CompileToProg(850);
|
||||||
// If the memory budget has been exhausted, compilation should fail
|
// If the memory budget has been exhausted, compilation should fail
|
||||||
// and return NULL instead of trying to do anything with NoMatch().
|
// and return NULL instead of trying to do anything with NoMatch().
|
||||||
EXPECT_TRUE(prog == NULL);
|
EXPECT_TRUE(prog == NULL);
|
||||||
@ -299,20 +303,22 @@ TEST(TestCompile, Bug26705922) {
|
|||||||
"8. byte [f0-f0] 0 -> 7\n",
|
"8. byte [f0-f0] 0 -> 7\n",
|
||||||
reverse);
|
reverse);
|
||||||
|
|
||||||
Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse);
|
Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, &forward, &reverse);
|
||||||
EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
|
EXPECT_EQ("3+ byte [c2-df] 0 -> 6\n"
|
||||||
"4+ byte [c2-df] 0 -> 7\n"
|
"4+ byte [e0-ef] 0 -> 8\n"
|
||||||
"5+ byte [a0-bf] 1 -> 8\n"
|
"5. byte [f0-f4] 0 -> 9\n"
|
||||||
"6. byte [80-bf] 0 -> 9\n"
|
"6. byte [80-bf] 0 -> 7\n"
|
||||||
"7. match! 0\n"
|
"7. match! 0\n"
|
||||||
"8. byte [e0-e0] 0 -> 7\n"
|
"8. byte [80-bf] 0 -> 6\n"
|
||||||
"9+ byte [e1-ef] 0 -> 7\n"
|
"9. byte [80-bf] 0 -> 8\n",
|
||||||
"10+ byte [90-bf] 1 -> 13\n"
|
forward);
|
||||||
"11+ byte [80-bf] 1 -> 14\n"
|
EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
|
||||||
"12. byte [80-8f] 0 -> 15\n"
|
"4+ byte [c2-df] 0 -> 6\n"
|
||||||
"13. byte [f0-f0] 0 -> 7\n"
|
"5. byte [80-bf] 0 -> 7\n"
|
||||||
"14. byte [f1-f3] 0 -> 7\n"
|
"6. match! 0\n"
|
||||||
"15. byte [f4-f4] 0 -> 7\n",
|
"7+ byte [e0-ef] 0 -> 6\n"
|
||||||
|
"8. byte [80-bf] 0 -> 9\n"
|
||||||
|
"9. byte [f0-f4] 0 -> 6\n",
|
||||||
reverse);
|
reverse);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
82
extern/re2/re2/testing/dfa_test.cc
vendored
82
extern/re2/re2/testing/dfa_test.cc
vendored
@ -8,7 +8,9 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
|
#include "util/flags.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
|
#include "util/malloc_counter.h"
|
||||||
#include "util/strutil.h"
|
#include "util/strutil.h"
|
||||||
#include "re2/prog.h"
|
#include "re2/prog.h"
|
||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
@ -18,12 +20,26 @@
|
|||||||
|
|
||||||
static const bool UsingMallocCounter = false;
|
static const bool UsingMallocCounter = false;
|
||||||
|
|
||||||
DEFINE_int32(size, 8, "log2(number of DFA nodes)");
|
DEFINE_FLAG(int, size, 8, "log2(number of DFA nodes)");
|
||||||
DEFINE_int32(repeat, 2, "Repetition count.");
|
DEFINE_FLAG(int, repeat, 2, "Repetition count.");
|
||||||
DEFINE_int32(threads, 4, "number of threads");
|
DEFINE_FLAG(int, threads, 4, "number of threads");
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
|
static int state_cache_resets = 0;
|
||||||
|
static int search_failures = 0;
|
||||||
|
|
||||||
|
struct SetHooks {
|
||||||
|
SetHooks() {
|
||||||
|
hooks::SetDFAStateCacheResetHook([](const hooks::DFAStateCacheReset&) {
|
||||||
|
++state_cache_resets;
|
||||||
|
});
|
||||||
|
hooks::SetDFASearchFailureHook([](const hooks::DFASearchFailure&) {
|
||||||
|
++search_failures;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} set_hooks;
|
||||||
|
|
||||||
// Check that multithreaded access to DFA class works.
|
// Check that multithreaded access to DFA class works.
|
||||||
|
|
||||||
// Helper function: builds entire DFA for prog.
|
// Helper function: builds entire DFA for prog.
|
||||||
@ -34,7 +50,7 @@ static void DoBuild(Prog* prog) {
|
|||||||
TEST(Multithreaded, BuildEntireDFA) {
|
TEST(Multithreaded, BuildEntireDFA) {
|
||||||
// Create regexp with 2^FLAGS_size states in DFA.
|
// Create regexp with 2^FLAGS_size states in DFA.
|
||||||
std::string s = "a";
|
std::string s = "a";
|
||||||
for (int i = 0; i < FLAGS_size; i++)
|
for (int i = 0; i < GetFlag(FLAGS_size); i++)
|
||||||
s += "[ab]";
|
s += "[ab]";
|
||||||
s += "b";
|
s += "b";
|
||||||
Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
|
Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
|
||||||
@ -52,14 +68,14 @@ TEST(Multithreaded, BuildEntireDFA) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Build the DFA simultaneously in a bunch of threads.
|
// Build the DFA simultaneously in a bunch of threads.
|
||||||
for (int i = 0; i < FLAGS_repeat; i++) {
|
for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
|
||||||
Prog* prog = re->CompileToProg(0);
|
Prog* prog = re->CompileToProg(0);
|
||||||
ASSERT_TRUE(prog != NULL);
|
ASSERT_TRUE(prog != NULL);
|
||||||
|
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (int j = 0; j < FLAGS_threads; j++)
|
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||||
threads.emplace_back(DoBuild, prog);
|
threads.emplace_back(DoBuild, prog);
|
||||||
for (int j = 0; j < FLAGS_threads; j++)
|
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||||
threads[j].join();
|
threads[j].join();
|
||||||
|
|
||||||
// One more compile, to make sure everything is okay.
|
// One more compile, to make sure everything is okay.
|
||||||
@ -106,44 +122,6 @@ TEST(SingleThreaded, BuildEntireDFA) {
|
|||||||
re->Decref();
|
re->Decref();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generates and returns a string over binary alphabet {0,1} that contains
|
|
||||||
// all possible binary sequences of length n as subsequences. The obvious
|
|
||||||
// brute force method would generate a string of length n * 2^n, but this
|
|
||||||
// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
|
|
||||||
// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
|
|
||||||
// Such a string is useful for testing a DFA. If you have a DFA
|
|
||||||
// where distinct last n bytes implies distinct states, then running on a
|
|
||||||
// DeBruijn string causes the DFA to need to create a new state at every
|
|
||||||
// position in the input, never reusing any states until it gets to the
|
|
||||||
// end of the string. This is the worst possible case for DFA execution.
|
|
||||||
static std::string DeBruijnString(int n) {
|
|
||||||
CHECK_LT(n, static_cast<int>(8*sizeof(int)));
|
|
||||||
CHECK_GT(n, 0);
|
|
||||||
|
|
||||||
std::vector<bool> did(size_t{1}<<n);
|
|
||||||
for (int i = 0; i < 1<<n; i++)
|
|
||||||
did[i] = false;
|
|
||||||
|
|
||||||
std::string s;
|
|
||||||
for (int i = 0; i < n-1; i++)
|
|
||||||
s.append("0");
|
|
||||||
int bits = 0;
|
|
||||||
int mask = (1<<n) - 1;
|
|
||||||
for (int i = 0; i < (1<<n); i++) {
|
|
||||||
bits <<= 1;
|
|
||||||
bits &= mask;
|
|
||||||
if (!did[bits|1]) {
|
|
||||||
bits |= 1;
|
|
||||||
s.append("1");
|
|
||||||
} else {
|
|
||||||
s.append("0");
|
|
||||||
}
|
|
||||||
CHECK(!did[bits]);
|
|
||||||
did[bits] = true;
|
|
||||||
}
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test that the DFA gets the right result even if it runs
|
// Test that the DFA gets the right result even if it runs
|
||||||
// out of memory during a search. The regular expression
|
// out of memory during a search. The regular expression
|
||||||
// 0[01]{n}$ matches a binary string of 0s and 1s only if
|
// 0[01]{n}$ matches a binary string of 0s and 1s only if
|
||||||
@ -166,6 +144,8 @@ TEST(SingleThreaded, SearchDFA) {
|
|||||||
// if it can't get a good cache hit rate.)
|
// if it can't get a good cache hit rate.)
|
||||||
// Tell the DFA to trudge along instead.
|
// Tell the DFA to trudge along instead.
|
||||||
Prog::TEST_dfa_should_bail_when_slow(false);
|
Prog::TEST_dfa_should_bail_when_slow(false);
|
||||||
|
state_cache_resets = 0;
|
||||||
|
search_failures = 0;
|
||||||
|
|
||||||
// Choice of n is mostly arbitrary, except that:
|
// Choice of n is mostly arbitrary, except that:
|
||||||
// * making n too big makes the test run for too long.
|
// * making n too big makes the test run for too long.
|
||||||
@ -215,6 +195,8 @@ TEST(SingleThreaded, SearchDFA) {
|
|||||||
|
|
||||||
// Reset to original behaviour.
|
// Reset to original behaviour.
|
||||||
Prog::TEST_dfa_should_bail_when_slow(true);
|
Prog::TEST_dfa_should_bail_when_slow(true);
|
||||||
|
ASSERT_GT(state_cache_resets, 0);
|
||||||
|
ASSERT_EQ(search_failures, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function: searches for match, which should match,
|
// Helper function: searches for match, which should match,
|
||||||
@ -237,6 +219,8 @@ static void DoSearch(Prog* prog, const StringPiece& match,
|
|||||||
|
|
||||||
TEST(Multithreaded, SearchDFA) {
|
TEST(Multithreaded, SearchDFA) {
|
||||||
Prog::TEST_dfa_should_bail_when_slow(false);
|
Prog::TEST_dfa_should_bail_when_slow(false);
|
||||||
|
state_cache_resets = 0;
|
||||||
|
search_failures = 0;
|
||||||
|
|
||||||
// Same as single-threaded test above.
|
// Same as single-threaded test above.
|
||||||
const int n = 18;
|
const int n = 18;
|
||||||
@ -259,14 +243,14 @@ TEST(Multithreaded, SearchDFA) {
|
|||||||
|
|
||||||
// Run the search simultaneously in a bunch of threads.
|
// Run the search simultaneously in a bunch of threads.
|
||||||
// Reuse same flags for Multithreaded.BuildDFA above.
|
// Reuse same flags for Multithreaded.BuildDFA above.
|
||||||
for (int i = 0; i < FLAGS_repeat; i++) {
|
for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
|
||||||
Prog* prog = re->CompileToProg(1<<n);
|
Prog* prog = re->CompileToProg(1<<n);
|
||||||
ASSERT_TRUE(prog != NULL);
|
ASSERT_TRUE(prog != NULL);
|
||||||
|
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (int j = 0; j < FLAGS_threads; j++)
|
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||||
threads.emplace_back(DoSearch, prog, match, no_match);
|
threads.emplace_back(DoSearch, prog, match, no_match);
|
||||||
for (int j = 0; j < FLAGS_threads; j++)
|
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||||
threads[j].join();
|
threads[j].join();
|
||||||
|
|
||||||
delete prog;
|
delete prog;
|
||||||
@ -276,6 +260,8 @@ TEST(Multithreaded, SearchDFA) {
|
|||||||
|
|
||||||
// Reset to original behaviour.
|
// Reset to original behaviour.
|
||||||
Prog::TEST_dfa_should_bail_when_slow(true);
|
Prog::TEST_dfa_should_bail_when_slow(true);
|
||||||
|
ASSERT_GT(state_cache_resets, 0);
|
||||||
|
ASSERT_EQ(search_failures, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ReverseTest {
|
struct ReverseTest {
|
||||||
|
14
extern/re2/re2/testing/dump.cc
vendored
14
extern/re2/re2/testing/dump.cc
vendored
@ -25,9 +25,6 @@
|
|||||||
#include "re2/stringpiece.h"
|
#include "re2/stringpiece.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
|
|
||||||
// Cause a link error if this file is used outside of testing.
|
|
||||||
DECLARE_string(test_tmpdir);
|
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
static const char* kOpcodeNames[] = {
|
static const char* kOpcodeNames[] = {
|
||||||
@ -154,14 +151,11 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string Regexp::Dump() {
|
std::string Regexp::Dump() {
|
||||||
|
// Make sure that we are being called from a unit test.
|
||||||
|
// Should cause a link error if used outside of testing.
|
||||||
|
CHECK(!::testing::TempDir().empty());
|
||||||
|
|
||||||
std::string s;
|
std::string s;
|
||||||
|
|
||||||
// Make sure being called from a unit test.
|
|
||||||
if (FLAGS_test_tmpdir.empty()) {
|
|
||||||
LOG(ERROR) << "Cannot use except for testing.";
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
DumpRegexpAppending(this, &s);
|
DumpRegexpAppending(this, &s);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
9
extern/re2/re2/testing/exhaustive1_test.cc
vendored
9
extern/re2/re2/testing/exhaustive1_test.cc
vendored
@ -10,8 +10,6 @@
|
|||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
#include "re2/testing/exhaustive_tester.h"
|
#include "re2/testing/exhaustive_tester.h"
|
||||||
|
|
||||||
DECLARE_string(regexp_engines);
|
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
// Test simple repetition operators
|
// Test simple repetition operators
|
||||||
@ -34,11 +32,8 @@ TEST(Repetition, Capturing) {
|
|||||||
"%s* %s+ %s? %s*? %s+? %s??");
|
"%s* %s+ %s? %s*? %s+? %s??");
|
||||||
ExhaustiveTest(3, 2, Split(" ", "a (a) b"), ops,
|
ExhaustiveTest(3, 2, Split(" ", "a (a) b"), ops,
|
||||||
7, Explode("ab"), "(?:%s)", "");
|
7, Explode("ab"), "(?:%s)", "");
|
||||||
|
ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
|
||||||
// This would be a great test, but it runs forever when PCRE is enabled.
|
50, Explode("a"), "(?:%s)", "");
|
||||||
if (FLAGS_regexp_engines.find("PCRE") == std::string::npos)
|
|
||||||
ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
|
|
||||||
50, Explode("a"), "(?:%s)", "");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
1
extern/re2/re2/testing/exhaustive2_test.cc
vendored
1
extern/re2/re2/testing/exhaustive2_test.cc
vendored
@ -10,7 +10,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
#include "re2/re2.h"
|
|
||||||
#include "re2/testing/exhaustive_tester.h"
|
#include "re2/testing/exhaustive_tester.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
21
extern/re2/re2/testing/exhaustive_tester.cc
vendored
21
extern/re2/re2/testing/exhaustive_tester.cc
vendored
@ -14,6 +14,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
|
#include "util/flags.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/strutil.h"
|
#include "util/strutil.h"
|
||||||
#include "re2/testing/exhaustive_tester.h"
|
#include "re2/testing/exhaustive_tester.h"
|
||||||
@ -24,11 +25,11 @@
|
|||||||
#define LOGGING 0
|
#define LOGGING 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
DEFINE_bool(show_regexps, false, "show regexps during testing");
|
DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
|
||||||
|
|
||||||
DEFINE_int32(max_bad_regexp_inputs, 1,
|
DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
|
||||||
"Stop testing a regular expression after finding this many "
|
"Stop testing a regular expression after finding this many "
|
||||||
"strings that break it.");
|
"strings that break it.");
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -62,11 +63,12 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
|
|||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
printf(" ");
|
printf(" ");
|
||||||
if (m[i].begin() == NULL)
|
if (m[i].data() == NULL)
|
||||||
printf("-");
|
printf("-");
|
||||||
else
|
else
|
||||||
printf("%td-%td",
|
printf("%td-%td",
|
||||||
m[i].begin() - input.begin(), m[i].end() - input.begin());
|
m[i].begin() - input.begin(),
|
||||||
|
m[i].end() - input.begin());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,10 +78,11 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
|
|||||||
void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
|
void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
|
||||||
regexps_++;
|
regexps_++;
|
||||||
std::string regexp = const_regexp;
|
std::string regexp = const_regexp;
|
||||||
if (!topwrapper_.empty())
|
if (!topwrapper_.empty()) {
|
||||||
regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
|
regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
if (FLAGS_show_regexps) {
|
if (GetFlag(FLAGS_show_regexps)) {
|
||||||
printf("\r%s", regexp.c_str());
|
printf("\r%s", regexp.c_str());
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
@ -134,7 +137,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
|
|||||||
tests_++;
|
tests_++;
|
||||||
if (!tester.TestInput(strgen_.Next())) {
|
if (!tester.TestInput(strgen_.Next())) {
|
||||||
failures_++;
|
failures_++;
|
||||||
if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
|
if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
46
extern/re2/re2/testing/filtered_re2_test.cc
vendored
46
extern/re2/re2/testing/filtered_re2_test.cc
vendored
@ -7,6 +7,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
@ -291,4 +292,49 @@ TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
|
|||||||
"EmptyStringInStringSetBug", &v));
|
"EmptyStringInStringSetBug", &v));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(FilteredRE2Test, MoveSemantics) {
|
||||||
|
FilterTestVars v1;
|
||||||
|
int id;
|
||||||
|
v1.f.Add("foo\\d+", v1.opts, &id);
|
||||||
|
EXPECT_EQ(0, id);
|
||||||
|
v1.f.Compile(&v1.atoms);
|
||||||
|
EXPECT_EQ(1, v1.atoms.size());
|
||||||
|
EXPECT_EQ("foo", v1.atoms[0]);
|
||||||
|
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||||
|
EXPECT_EQ(1, v1.matches.size());
|
||||||
|
EXPECT_EQ(0, v1.matches[0]);
|
||||||
|
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||||
|
EXPECT_EQ(0, v1.matches.size());
|
||||||
|
|
||||||
|
// The moved-to object should do what the moved-from object did.
|
||||||
|
FilterTestVars v2;
|
||||||
|
v2.f = std::move(v1.f);
|
||||||
|
v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
|
||||||
|
EXPECT_EQ(1, v2.matches.size());
|
||||||
|
EXPECT_EQ(0, v2.matches[0]);
|
||||||
|
v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
|
||||||
|
EXPECT_EQ(0, v2.matches.size());
|
||||||
|
|
||||||
|
// The moved-from object should have been reset and be reusable.
|
||||||
|
v1.f.Add("bar\\d+", v1.opts, &id);
|
||||||
|
EXPECT_EQ(0, id);
|
||||||
|
v1.f.Compile(&v1.atoms);
|
||||||
|
EXPECT_EQ(1, v1.atoms.size());
|
||||||
|
EXPECT_EQ("bar", v1.atoms[0]);
|
||||||
|
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||||
|
EXPECT_EQ(0, v1.matches.size());
|
||||||
|
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||||
|
EXPECT_EQ(1, v1.matches.size());
|
||||||
|
EXPECT_EQ(0, v1.matches[0]);
|
||||||
|
|
||||||
|
// Verify that "overwriting" works and also doesn't leak memory.
|
||||||
|
// (The latter will need a leak detector such as LeakSanitizer.)
|
||||||
|
v1.f = std::move(v2.f);
|
||||||
|
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||||
|
EXPECT_EQ(1, v1.matches.size());
|
||||||
|
EXPECT_EQ(0, v1.matches[0]);
|
||||||
|
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||||
|
EXPECT_EQ(0, v1.matches.size());
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
13
extern/re2/re2/testing/null_walker.cc
vendored
13
extern/re2/re2/testing/null_walker.cc
vendored
@ -13,13 +13,16 @@ namespace re2 {
|
|||||||
|
|
||||||
class NullWalker : public Regexp::Walker<bool> {
|
class NullWalker : public Regexp::Walker<bool> {
|
||||||
public:
|
public:
|
||||||
NullWalker() { }
|
NullWalker() {}
|
||||||
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
|
||||||
bool* child_args, int nchild_args);
|
|
||||||
|
|
||||||
bool ShortVisit(Regexp* re, bool a) {
|
virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||||
// Should never be called: we use Walk not WalkExponential.
|
bool* child_args, int nchild_args);
|
||||||
|
|
||||||
|
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||||
|
// Should never be called: we use Walk(), not WalkExponential().
|
||||||
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||||
LOG(DFATAL) << "NullWalker::ShortVisit called";
|
LOG(DFATAL) << "NullWalker::ShortVisit called";
|
||||||
|
#endif
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
16
extern/re2/re2/testing/random_test.cc
vendored
16
extern/re2/re2/testing/random_test.cc
vendored
@ -9,12 +9,13 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
|
#include "util/flags.h"
|
||||||
#include "re2/testing/exhaustive_tester.h"
|
#include "re2/testing/exhaustive_tester.h"
|
||||||
|
|
||||||
DEFINE_int32(regexpseed, 404, "Random regexp seed.");
|
DEFINE_FLAG(int, regexpseed, 404, "Random regexp seed.");
|
||||||
DEFINE_int32(regexpcount, 100, "How many random regexps to generate.");
|
DEFINE_FLAG(int, regexpcount, 100, "How many random regexps to generate.");
|
||||||
DEFINE_int32(stringseed, 200, "Random string seed.");
|
DEFINE_FLAG(int, stringseed, 200, "Random string seed.");
|
||||||
DEFINE_int32(stringcount, 100, "How many random strings to generate.");
|
DEFINE_FLAG(int, stringcount, 100, "How many random strings to generate.");
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -37,8 +38,10 @@ static void RandomTest(int maxatoms, int maxops,
|
|||||||
|
|
||||||
ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
|
ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
|
||||||
maxstrlen, stralphabet, wrapper, "");
|
maxstrlen, stralphabet, wrapper, "");
|
||||||
t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
|
t.RandomStrings(GetFlag(FLAGS_stringseed),
|
||||||
t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount);
|
GetFlag(FLAGS_stringcount));
|
||||||
|
t.GenerateRandom(GetFlag(FLAGS_regexpseed),
|
||||||
|
GetFlag(FLAGS_regexpcount));
|
||||||
printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
|
printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
|
||||||
t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
|
t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
|
||||||
EXPECT_EQ(0, t.failures());
|
EXPECT_EQ(0, t.failures());
|
||||||
@ -96,4 +99,3 @@ TEST(Random, Complicated) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
|
||||||
|
25
extern/re2/re2/testing/re2_arg_test.cc
vendored
25
extern/re2/re2/testing/re2_arg_test.cc
vendored
@ -11,6 +11,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
|
#include "util/logging.h"
|
||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
@ -132,4 +133,28 @@ TEST(RE2ArgTest, Uint64Test) {
|
|||||||
PARSE_FOR_TYPE(uint64_t, 5);
|
PARSE_FOR_TYPE(uint64_t, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(RE2ArgTest, ParseFromTest) {
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
struct {
|
||||||
|
bool ParseFrom(const char* str, size_t n) {
|
||||||
|
LOG(INFO) << "str = " << str << ", n = " << n;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} obj1;
|
||||||
|
RE2::Arg arg1(&obj1);
|
||||||
|
EXPECT_TRUE(arg1.Parse("one", 3));
|
||||||
|
|
||||||
|
struct {
|
||||||
|
bool ParseFrom(const char* str, size_t n) {
|
||||||
|
LOG(INFO) << "str = " << str << ", n = " << n;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Ensure that RE2::Arg works even with overloaded ParseFrom().
|
||||||
|
void ParseFrom(const char* str) {}
|
||||||
|
} obj2;
|
||||||
|
RE2::Arg arg2(&obj2);
|
||||||
|
EXPECT_FALSE(arg2.Parse("two", 3));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
79
extern/re2/re2/testing/re2_test.cc
vendored
79
extern/re2/re2/testing/re2_test.cc
vendored
@ -12,6 +12,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
|
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <unistd.h> /* for sysconf */
|
#include <unistd.h> /* for sysconf */
|
||||||
@ -223,6 +224,15 @@ TEST(RE2, Extract) {
|
|||||||
ASSERT_EQ(s, "'foo'");
|
ASSERT_EQ(s, "'foo'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(RE2, MaxSubmatchTooLarge) {
|
||||||
|
std::string s;
|
||||||
|
ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
|
||||||
|
s = "foo";
|
||||||
|
ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
|
||||||
|
s = "foo";
|
||||||
|
ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(RE2, Consume) {
|
TEST(RE2, Consume) {
|
||||||
RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
|
RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
|
||||||
std::string word;
|
std::string word;
|
||||||
@ -473,28 +483,27 @@ TEST(ProgramFanout, BigProgram) {
|
|||||||
RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
|
RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
|
||||||
RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
|
RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
|
||||||
|
|
||||||
std::map<int, int> histogram;
|
std::vector<int> histogram;
|
||||||
|
|
||||||
// 3 is the largest non-empty bucket and has 1 element.
|
// 3 is the largest non-empty bucket and has 1 element.
|
||||||
ASSERT_EQ(3, re1.ProgramFanout(&histogram));
|
ASSERT_EQ(3, re1.ProgramFanout(&histogram));
|
||||||
ASSERT_EQ(1, histogram[3]);
|
ASSERT_EQ(1, histogram[3]);
|
||||||
|
|
||||||
// 7 is the largest non-empty bucket and has 10 elements.
|
// 6 is the largest non-empty bucket and has 10 elements.
|
||||||
ASSERT_EQ(7, re10.ProgramFanout(&histogram));
|
ASSERT_EQ(6, re10.ProgramFanout(&histogram));
|
||||||
ASSERT_EQ(10, histogram[7]);
|
ASSERT_EQ(10, histogram[6]);
|
||||||
|
|
||||||
// 10 is the largest non-empty bucket and has 100 elements.
|
// 9 is the largest non-empty bucket and has 100 elements.
|
||||||
ASSERT_EQ(10, re100.ProgramFanout(&histogram));
|
ASSERT_EQ(9, re100.ProgramFanout(&histogram));
|
||||||
ASSERT_EQ(100, histogram[10]);
|
ASSERT_EQ(100, histogram[9]);
|
||||||
|
|
||||||
// 13 is the largest non-empty bucket and has 1000 elements.
|
// 13 is the largest non-empty bucket and has 1000 elements.
|
||||||
ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
|
ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
|
||||||
ASSERT_EQ(1000, histogram[13]);
|
ASSERT_EQ(1000, histogram[13]);
|
||||||
|
|
||||||
// 2 is the largest non-empty bucket and has 3 elements.
|
// 2 is the largest non-empty bucket and has 1 element.
|
||||||
// This differs from the others due to how reverse `.' works.
|
|
||||||
ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
|
ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
|
||||||
ASSERT_EQ(3, histogram[2]);
|
ASSERT_EQ(1, histogram[2]);
|
||||||
|
|
||||||
// 5 is the largest non-empty bucket and has 10 elements.
|
// 5 is the largest non-empty bucket and has 10 elements.
|
||||||
ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
|
ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
|
||||||
@ -1232,11 +1241,10 @@ TEST(RE2, DeepRecursion) {
|
|||||||
// Suggested by Josh Hyman. Failed when SearchOnePass was
|
// Suggested by Josh Hyman. Failed when SearchOnePass was
|
||||||
// not implementing case-folding.
|
// not implementing case-folding.
|
||||||
TEST(CaseInsensitive, MatchAndConsume) {
|
TEST(CaseInsensitive, MatchAndConsume) {
|
||||||
std::string result;
|
|
||||||
std::string text = "A fish named *Wanda*";
|
std::string text = "A fish named *Wanda*";
|
||||||
StringPiece sp(text);
|
StringPiece sp(text);
|
||||||
|
StringPiece result;
|
||||||
EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
|
EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
|
||||||
EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
|
EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1269,38 +1277,43 @@ TEST(RE2, CL8622304) {
|
|||||||
EXPECT_EQ(val, "1,0x2F,030,4,5");
|
EXPECT_EQ(val, "1,0x2F,030,4,5");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Check that RE2 returns correct regexp pieces on error.
|
// Check that RE2 returns correct regexp pieces on error.
|
||||||
// In particular, make sure it returns whole runes
|
// In particular, make sure it returns whole runes
|
||||||
// and that it always reports invalid UTF-8.
|
// and that it always reports invalid UTF-8.
|
||||||
// Also check that Perl error flag piece is big enough.
|
// Also check that Perl error flag piece is big enough.
|
||||||
static struct ErrorTest {
|
static struct ErrorTest {
|
||||||
const char *regexp;
|
const char *regexp;
|
||||||
const char *error;
|
RE2::ErrorCode error_code;
|
||||||
|
const char *error_arg;
|
||||||
} error_tests[] = {
|
} error_tests[] = {
|
||||||
{ "ab\\αcd", "\\α" },
|
{ "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
|
||||||
{ "ef\\x☺01", "\\x☺0" },
|
{ "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
|
||||||
{ "gh\\x1☺01", "\\x1☺" },
|
{ "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
|
||||||
{ "ij\\x1", "\\x1" },
|
{ "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
|
||||||
{ "kl\\x", "\\x" },
|
{ "kl\\x", RE2::ErrorBadEscape, "\\x" },
|
||||||
{ "uv\\x{0000☺}", "\\x{0000☺" },
|
{ "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
|
||||||
{ "wx\\p{ABC", "\\p{ABC" },
|
{ "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
|
||||||
{ "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
|
// used to return (?s but the error is X
|
||||||
{ "aa(?sm☺i", "(?sm☺" },
|
{ "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
|
||||||
{ "bb[abc", "[abc" },
|
{ "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
|
||||||
|
{ "bb[abc", RE2::ErrorMissingBracket, "[abc" },
|
||||||
|
{ "abc(def", RE2::ErrorMissingParen, "abc(def" },
|
||||||
|
{ "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
|
||||||
|
|
||||||
{ "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
|
// no argument string returned for invalid UTF-8
|
||||||
{ "op\377qr", "" },
|
{ "mn\\x1\377", RE2::ErrorBadUTF8, "" },
|
||||||
{ "st\\x{00000\377", "" },
|
{ "op\377qr", RE2::ErrorBadUTF8, "" },
|
||||||
{ "zz\\p{\377}", "" },
|
{ "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
|
||||||
{ "zz\\x{00\377}", "" },
|
{ "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
|
||||||
{ "zz(?P<name\377>abc)", "" },
|
{ "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
|
||||||
|
{ "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
|
||||||
};
|
};
|
||||||
TEST(RE2, ErrorArgs) {
|
TEST(RE2, ErrorCodeAndArg) {
|
||||||
for (size_t i = 0; i < arraysize(error_tests); i++) {
|
for (size_t i = 0; i < arraysize(error_tests); i++) {
|
||||||
RE2 re(error_tests[i].regexp, RE2::Quiet);
|
RE2 re(error_tests[i].regexp, RE2::Quiet);
|
||||||
EXPECT_FALSE(re.ok());
|
EXPECT_FALSE(re.ok());
|
||||||
EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
|
EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
|
||||||
|
EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
1063
extern/re2/re2/testing/regexp_benchmark.cc
vendored
1063
extern/re2/re2/testing/regexp_benchmark.cc
vendored
File diff suppressed because it is too large
Load Diff
12
extern/re2/re2/testing/regexp_generator.cc
vendored
12
extern/re2/re2/testing/regexp_generator.cc
vendored
@ -241,7 +241,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
|
|||||||
std::vector<std::string> Explode(const StringPiece& s) {
|
std::vector<std::string> Explode(const StringPiece& s) {
|
||||||
std::vector<std::string> v;
|
std::vector<std::string> v;
|
||||||
|
|
||||||
for (const char *q = s.begin(); q < s.end(); ) {
|
for (const char *q = s.data(); q < s.data() + s.size(); ) {
|
||||||
const char* p = q;
|
const char* p = q;
|
||||||
Rune r;
|
Rune r;
|
||||||
q += chartorune(&r, q);
|
q += chartorune(&r, q);
|
||||||
@ -256,11 +256,11 @@ std::vector<std::string> Explode(const StringPiece& s) {
|
|||||||
std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
|
std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
|
||||||
std::vector<std::string> v;
|
std::vector<std::string> v;
|
||||||
|
|
||||||
if (sep.size() == 0)
|
if (sep.empty())
|
||||||
return Explode(s);
|
return Explode(s);
|
||||||
|
|
||||||
const char *p = s.begin();
|
const char *p = s.data();
|
||||||
for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
|
for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) {
|
||||||
if (StringPiece(q, sep.size()) == sep) {
|
if (StringPiece(q, sep.size()) == sep) {
|
||||||
v.push_back(std::string(p, q - p));
|
v.push_back(std::string(p, q - p));
|
||||||
p = q + sep.size();
|
p = q + sep.size();
|
||||||
@ -268,8 +268,8 @@ std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (p < s.end())
|
if (p < s.data() + s.size())
|
||||||
v.push_back(std::string(p, s.end() - p));
|
v.push_back(std::string(p, s.data() + s.size() - p));
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
95
extern/re2/re2/testing/required_prefix_test.cc
vendored
95
extern/re2/re2/testing/required_prefix_test.cc
vendored
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
|
#include "re2/prog.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
@ -19,15 +20,18 @@ struct PrefixTest {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static PrefixTest tests[] = {
|
static PrefixTest tests[] = {
|
||||||
// If the regexp is missing a ^, there's no required prefix.
|
// Empty cases.
|
||||||
{ "abc", false },
|
|
||||||
{ "", false },
|
{ "", false },
|
||||||
{ "(?m)^", false },
|
{ "(?m)^", false },
|
||||||
|
{ "(?-m)^", false },
|
||||||
|
|
||||||
|
// If the regexp has no ^, there's no required prefix.
|
||||||
|
{ "abc", false },
|
||||||
|
|
||||||
// If the regexp immediately goes into
|
// If the regexp immediately goes into
|
||||||
// something not a literal match, there's no required prefix.
|
// something not a literal match, there's no required prefix.
|
||||||
{ "^(abc)", false },
|
|
||||||
{ "^a*", false },
|
{ "^a*", false },
|
||||||
|
{ "^(abc)", false },
|
||||||
|
|
||||||
// Otherwise, it should work.
|
// Otherwise, it should work.
|
||||||
{ "^abc$", true, "abc", false, "(?-m:$)" },
|
{ "^abc$", true, "abc", false, "(?-m:$)" },
|
||||||
@ -53,15 +57,15 @@ TEST(RequiredPrefix, SimpleTests) {
|
|||||||
bool f;
|
bool f;
|
||||||
Regexp* s;
|
Regexp* s;
|
||||||
ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
|
ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
|
||||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf")
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
|
||||||
<< " " << re->Dump();
|
<< " " << re->Dump();
|
||||||
if (t.return_value) {
|
if (t.return_value) {
|
||||||
ASSERT_EQ(p, std::string(t.prefix))
|
ASSERT_EQ(p, std::string(t.prefix))
|
||||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||||
ASSERT_EQ(f, t.foldcase)
|
ASSERT_EQ(f, t.foldcase)
|
||||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||||
ASSERT_EQ(s->ToString(), std::string(t.suffix))
|
ASSERT_EQ(s->ToString(), std::string(t.suffix))
|
||||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||||
s->Decref();
|
s->Decref();
|
||||||
}
|
}
|
||||||
re->Decref();
|
re->Decref();
|
||||||
@ -69,4 +73,81 @@ TEST(RequiredPrefix, SimpleTests) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PrefixTest for_accel_tests[] = {
|
||||||
|
// Empty cases.
|
||||||
|
{ "", false },
|
||||||
|
{ "(?m)^", false },
|
||||||
|
{ "(?-m)^", false },
|
||||||
|
|
||||||
|
// If the regexp has a ^, there's no required prefix.
|
||||||
|
{ "^abc", false },
|
||||||
|
|
||||||
|
// If the regexp immediately goes into
|
||||||
|
// something not a literal match, there's no required prefix.
|
||||||
|
{ "a*", false },
|
||||||
|
|
||||||
|
// Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
|
||||||
|
// capturing groups, but doesn't try to glue prefix fragments together.
|
||||||
|
{ "(a?)def", false },
|
||||||
|
{ "(ab?)def", true, "a", false },
|
||||||
|
{ "(abc?)def", true, "ab", false },
|
||||||
|
{ "(()a)def", false },
|
||||||
|
{ "((a)b)def", true, "a", false },
|
||||||
|
{ "((ab)c)def", true, "ab", false },
|
||||||
|
|
||||||
|
// Otherwise, it should work.
|
||||||
|
{ "abc$", true, "abc", false },
|
||||||
|
{ "abc", true, "abc", false },
|
||||||
|
{ "(?i)abc", true, "abc", true },
|
||||||
|
{ "abcd*", true, "abc", false },
|
||||||
|
{ "[Aa][Bb]cd*", true, "ab", true },
|
||||||
|
{ "ab[Cc]d*", true, "ab", false },
|
||||||
|
{ "☺abc", true, "☺abc", false },
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(RequiredPrefixForAccel, SimpleTests) {
|
||||||
|
for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
|
||||||
|
const PrefixTest& t = for_accel_tests[i];
|
||||||
|
for (size_t j = 0; j < 2; j++) {
|
||||||
|
Regexp::ParseFlags flags = Regexp::LikePerl;
|
||||||
|
if (j == 0)
|
||||||
|
flags = flags | Regexp::Latin1;
|
||||||
|
Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
|
||||||
|
ASSERT_TRUE(re != NULL) << " " << t.regexp;
|
||||||
|
|
||||||
|
std::string p;
|
||||||
|
bool f;
|
||||||
|
ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
|
||||||
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
|
||||||
|
<< " " << re->Dump();
|
||||||
|
if (t.return_value) {
|
||||||
|
ASSERT_EQ(p, std::string(t.prefix))
|
||||||
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||||
|
ASSERT_EQ(f, t.foldcase)
|
||||||
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||||
|
}
|
||||||
|
re->Decref();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PrefixAccel, BasicTest) {
|
||||||
|
Regexp* re = Regexp::Parse("abc\\d+", Regexp::LikePerl, NULL);
|
||||||
|
ASSERT_TRUE(re != NULL);
|
||||||
|
Prog* prog = re->CompileToProg(0);
|
||||||
|
ASSERT_TRUE(prog != NULL);
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
std::string text(i, 'a');
|
||||||
|
const char* p = reinterpret_cast<const char*>(
|
||||||
|
prog->PrefixAccel(text.data(), text.size()));
|
||||||
|
EXPECT_TRUE(p == NULL);
|
||||||
|
text.append("abc");
|
||||||
|
p = reinterpret_cast<const char*>(
|
||||||
|
prog->PrefixAccel(text.data(), text.size()));
|
||||||
|
EXPECT_EQ(i, p-text.data());
|
||||||
|
}
|
||||||
|
delete prog;
|
||||||
|
re->Decref();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
26
extern/re2/re2/testing/set_test.cc
vendored
26
extern/re2/re2/testing/set_test.cc
vendored
@ -5,6 +5,7 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
@ -201,4 +202,29 @@ TEST(Set, Prefix) {
|
|||||||
ASSERT_EQ(v[0], 0);
|
ASSERT_EQ(v[0], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Set, MoveSemantics) {
|
||||||
|
RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED);
|
||||||
|
ASSERT_EQ(s1.Add("foo\\d+", NULL), 0);
|
||||||
|
ASSERT_EQ(s1.Compile(), true);
|
||||||
|
ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
|
||||||
|
ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
|
||||||
|
|
||||||
|
// The moved-to object should do what the moved-from object did.
|
||||||
|
RE2::Set s2 = std::move(s1);
|
||||||
|
ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true);
|
||||||
|
ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false);
|
||||||
|
|
||||||
|
// The moved-from object should have been reset and be reusable.
|
||||||
|
ASSERT_EQ(s1.Add("bar\\d+", NULL), 0);
|
||||||
|
ASSERT_EQ(s1.Compile(), true);
|
||||||
|
ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false);
|
||||||
|
ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true);
|
||||||
|
|
||||||
|
// Verify that "overwriting" works and also doesn't leak memory.
|
||||||
|
// (The latter will need a leak detector such as LeakSanitizer.)
|
||||||
|
s1 = std::move(s2);
|
||||||
|
ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
|
||||||
|
ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
27
extern/re2/re2/testing/string_generator.cc
vendored
27
extern/re2/re2/testing/string_generator.cc
vendored
@ -111,4 +111,31 @@ void StringGenerator::GenerateNULL() {
|
|||||||
hasnext_ = true;
|
hasnext_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string DeBruijnString(int n) {
|
||||||
|
CHECK_GE(n, 1);
|
||||||
|
CHECK_LE(n, 29);
|
||||||
|
const size_t size = size_t{1} << static_cast<size_t>(n);
|
||||||
|
const size_t mask = size - 1;
|
||||||
|
std::vector<bool> did(size, false);
|
||||||
|
std::string s;
|
||||||
|
s.reserve(static_cast<size_t>(n) + size);
|
||||||
|
for (size_t i = 0; i < static_cast<size_t>(n - 1); i++)
|
||||||
|
s += '0';
|
||||||
|
size_t bits = 0;
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
bits <<= 1;
|
||||||
|
bits &= mask;
|
||||||
|
if (!did[bits | 1]) {
|
||||||
|
bits |= 1;
|
||||||
|
s += '1';
|
||||||
|
} else {
|
||||||
|
s += '0';
|
||||||
|
}
|
||||||
|
CHECK(!did[bits]);
|
||||||
|
did[bits] = true;
|
||||||
|
}
|
||||||
|
CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
13
extern/re2/re2/testing/string_generator.h
vendored
13
extern/re2/re2/testing/string_generator.h
vendored
@ -58,6 +58,19 @@ class StringGenerator {
|
|||||||
StringGenerator& operator=(const StringGenerator&) = delete;
|
StringGenerator& operator=(const StringGenerator&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Generates and returns a string over binary alphabet {0,1} that contains
|
||||||
|
// all possible binary sequences of length n as subsequences. The obvious
|
||||||
|
// brute force method would generate a string of length n * 2^n, but this
|
||||||
|
// generates a string of length n-1 + 2^n called a De Bruijn cycle.
|
||||||
|
// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
|
||||||
|
//
|
||||||
|
// Such a string is useful for testing a DFA. If you have a DFA
|
||||||
|
// where distinct last n bytes implies distinct states, then running on a
|
||||||
|
// DeBruijn string causes the DFA to need to create a new state at every
|
||||||
|
// position in the input, never reusing any states until it gets to the
|
||||||
|
// end of the string. This is the worst possible case for DFA execution.
|
||||||
|
std::string DeBruijnString(int n);
|
||||||
|
|
||||||
} // namespace re2
|
} // namespace re2
|
||||||
|
|
||||||
#endif // RE2_TESTING_STRING_GENERATOR_H_
|
#endif // RE2_TESTING_STRING_GENERATOR_H_
|
||||||
|
60
extern/re2/re2/testing/tester.cc
vendored
60
extern/re2/re2/testing/tester.cc
vendored
@ -18,14 +18,15 @@
|
|||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
#include "re2/regexp.h"
|
#include "re2/regexp.h"
|
||||||
|
|
||||||
DEFINE_bool(dump_prog, false, "dump regexp program");
|
DEFINE_FLAG(bool, dump_prog, false, "dump regexp program");
|
||||||
DEFINE_bool(log_okay, false, "log successful runs");
|
DEFINE_FLAG(bool, log_okay, false, "log successful runs");
|
||||||
DEFINE_bool(dump_rprog, false, "dump reversed regexp program");
|
DEFINE_FLAG(bool, dump_rprog, false, "dump reversed regexp program");
|
||||||
|
|
||||||
DEFINE_int32(max_regexp_failures, 100,
|
DEFINE_FLAG(int, max_regexp_failures, 100,
|
||||||
"maximum number of regexp test failures (-1 = unlimited)");
|
"maximum number of regexp test failures (-1 = unlimited)");
|
||||||
|
|
||||||
DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test");
|
DEFINE_FLAG(std::string, regexp_engines, "",
|
||||||
|
"pattern to select regexp engines to test");
|
||||||
|
|
||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
@ -62,11 +63,11 @@ static uint32_t Engines() {
|
|||||||
if (did_parse)
|
if (did_parse)
|
||||||
return cached_engines;
|
return cached_engines;
|
||||||
|
|
||||||
if (FLAGS_regexp_engines.empty()) {
|
if (GetFlag(FLAGS_regexp_engines).empty()) {
|
||||||
cached_engines = ~0;
|
cached_engines = ~0;
|
||||||
} else {
|
} else {
|
||||||
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
|
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
|
||||||
if (FLAGS_regexp_engines.find(EngineName(i)) != std::string::npos)
|
if (GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos)
|
||||||
cached_engines |= 1<<i;
|
cached_engines |= 1<<i;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,6 +86,20 @@ static uint32_t Engines() {
|
|||||||
|
|
||||||
// The result of running a match.
|
// The result of running a match.
|
||||||
struct TestInstance::Result {
|
struct TestInstance::Result {
|
||||||
|
Result()
|
||||||
|
: skipped(false),
|
||||||
|
matched(false),
|
||||||
|
untrusted(false),
|
||||||
|
have_submatch(false),
|
||||||
|
have_submatch0(false) {
|
||||||
|
ClearSubmatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ClearSubmatch() {
|
||||||
|
for (int i = 0; i < kMaxSubmatch; i++)
|
||||||
|
submatch[i] = StringPiece();
|
||||||
|
}
|
||||||
|
|
||||||
bool skipped; // test skipped: wasn't applicable
|
bool skipped; // test skipped: wasn't applicable
|
||||||
bool matched; // found a match
|
bool matched; // found a match
|
||||||
bool untrusted; // don't really trust the answer
|
bool untrusted; // don't really trust the answer
|
||||||
@ -99,10 +114,11 @@ typedef TestInstance::Result Result;
|
|||||||
// where a and b are the starting and ending offsets of s in text.
|
// where a and b are the starting and ending offsets of s in text.
|
||||||
static std::string FormatCapture(const StringPiece& text,
|
static std::string FormatCapture(const StringPiece& text,
|
||||||
const StringPiece& s) {
|
const StringPiece& s) {
|
||||||
if (s.begin() == NULL)
|
if (s.data() == NULL)
|
||||||
return "(?,?)";
|
return "(?,?)";
|
||||||
return StringPrintf("(%td,%td)",
|
return StringPrintf("(%td,%td)",
|
||||||
s.begin() - text.begin(), s.end() - text.begin());
|
s.begin() - text.begin(),
|
||||||
|
s.end() - text.begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns whether text contains non-ASCII (>= 0x80) bytes.
|
// Returns whether text contains non-ASCII (>= 0x80) bytes.
|
||||||
@ -198,7 +214,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
|
|||||||
error_ = true;
|
error_ = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (FLAGS_dump_prog) {
|
if (GetFlag(FLAGS_dump_prog)) {
|
||||||
LOG(INFO) << "Prog for "
|
LOG(INFO) << "Prog for "
|
||||||
<< " regexp "
|
<< " regexp "
|
||||||
<< CEscape(regexp_str_)
|
<< CEscape(regexp_str_)
|
||||||
@ -216,7 +232,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
|
|||||||
error_ = true;
|
error_ = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (FLAGS_dump_rprog)
|
if (GetFlag(FLAGS_dump_rprog))
|
||||||
LOG(INFO) << rprog_->Dump();
|
LOG(INFO) << rprog_->Dump();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -290,9 +306,6 @@ void TestInstance::RunSearch(Engine type,
|
|||||||
const StringPiece& orig_context,
|
const StringPiece& orig_context,
|
||||||
Prog::Anchor anchor,
|
Prog::Anchor anchor,
|
||||||
Result* result) {
|
Result* result) {
|
||||||
// Result is not trivial, so we cannot freely clear it with memset(3),
|
|
||||||
// but zeroing objects like so is safe and expedient for our purposes.
|
|
||||||
memset(reinterpret_cast<void*>(result), 0, sizeof *result);
|
|
||||||
if (regexp_ == NULL) {
|
if (regexp_ == NULL) {
|
||||||
result->skipped = true;
|
result->skipped = true;
|
||||||
return;
|
return;
|
||||||
@ -476,7 +489,7 @@ void TestInstance::RunSearch(Engine type,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!result->matched)
|
if (!result->matched)
|
||||||
memset(result->submatch, 0, sizeof result->submatch);
|
result->ClearSubmatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks whether r is okay given that correct is the right answer.
|
// Checks whether r is okay given that correct is the right answer.
|
||||||
@ -489,7 +502,7 @@ static bool ResultOkay(const Result& r, const Result& correct) {
|
|||||||
return false;
|
return false;
|
||||||
if (r.have_submatch || r.have_submatch0) {
|
if (r.have_submatch || r.have_submatch0) {
|
||||||
for (int i = 0; i < kMaxSubmatch; i++) {
|
for (int i = 0; i < kMaxSubmatch; i++) {
|
||||||
if (correct.submatch[i].begin() != r.submatch[i].begin() ||
|
if (correct.submatch[i].data() != r.submatch[i].data() ||
|
||||||
correct.submatch[i].size() != r.submatch[i].size())
|
correct.submatch[i].size() != r.submatch[i].size())
|
||||||
return false;
|
return false;
|
||||||
if (!r.have_submatch)
|
if (!r.have_submatch)
|
||||||
@ -528,7 +541,7 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
|
|||||||
Result r;
|
Result r;
|
||||||
RunSearch(i, text, context, anchor, &r);
|
RunSearch(i, text, context, anchor, &r);
|
||||||
if (ResultOkay(r, correct)) {
|
if (ResultOkay(r, correct)) {
|
||||||
if (FLAGS_log_okay)
|
if (GetFlag(FLAGS_log_okay))
|
||||||
LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
|
LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -555,8 +568,8 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < 1+num_captures_; i++) {
|
for (int i = 0; i < 1+num_captures_; i++) {
|
||||||
if (r.submatch[i].begin() != correct.submatch[i].begin() ||
|
if (r.submatch[i].data() != correct.submatch[i].data() ||
|
||||||
r.submatch[i].end() != correct.submatch[i].end()) {
|
r.submatch[i].size() != correct.submatch[i].size()) {
|
||||||
LOG(INFO) <<
|
LOG(INFO) <<
|
||||||
StringPrintf(" $%d: should be %s is %s",
|
StringPrintf(" $%d: should be %s is %s",
|
||||||
i,
|
i,
|
||||||
@ -571,7 +584,10 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!all_okay) {
|
if (!all_okay) {
|
||||||
if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0)
|
// This will be initialised once (after flags have been initialised)
|
||||||
|
// and that is desirable because we want to enforce a global limit.
|
||||||
|
static int max_regexp_failures = GetFlag(FLAGS_max_regexp_failures);
|
||||||
|
if (max_regexp_failures > 0 && --max_regexp_failures == 0)
|
||||||
LOG(QFATAL) << "Too many regexp failures.";
|
LOG(QFATAL) << "Too many regexp failures.";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -640,7 +656,7 @@ static Prog::Anchor anchors[] = {
|
|||||||
|
|
||||||
bool Tester::TestInput(const StringPiece& text) {
|
bool Tester::TestInput(const StringPiece& text) {
|
||||||
bool okay = TestInputInContext(text, text);
|
bool okay = TestInputInContext(text, text);
|
||||||
if (text.size() > 0) {
|
if (!text.empty()) {
|
||||||
StringPiece sp;
|
StringPiece sp;
|
||||||
sp = text;
|
sp = text;
|
||||||
sp.remove_prefix(1);
|
sp.remove_prefix(1);
|
||||||
|
2
extern/re2/re2/unicode.py
vendored
2
extern/re2/re2/unicode.py
vendored
@ -13,7 +13,7 @@ import re
|
|||||||
from six.moves import urllib
|
from six.moves import urllib
|
||||||
|
|
||||||
# Directory or URL where Unicode tables reside.
|
# Directory or URL where Unicode tables reside.
|
||||||
_UNICODE_DIR = "https://www.unicode.org/Public/12.1.0/ucd"
|
_UNICODE_DIR = "https://www.unicode.org/Public/13.0.0/ucd"
|
||||||
|
|
||||||
# Largest valid Unicode code value.
|
# Largest valid Unicode code value.
|
||||||
_RUNE_MAX = 0x10FFFF
|
_RUNE_MAX = 0x10FFFF
|
||||||
|
12
extern/re2/re2/unicode_casefold.cc
vendored
12
extern/re2/re2/unicode_casefold.cc
vendored
@ -7,7 +7,7 @@
|
|||||||
namespace re2 {
|
namespace re2 {
|
||||||
|
|
||||||
|
|
||||||
// 1381 groups, 2792 pairs, 356 ranges
|
// 1384 groups, 2798 pairs, 358 ranges
|
||||||
const CaseFold unicode_casefold[] = {
|
const CaseFold unicode_casefold[] = {
|
||||||
{ 65, 90, 32 },
|
{ 65, 90, 32 },
|
||||||
{ 97, 106, -32 },
|
{ 97, 106, -32 },
|
||||||
@ -349,6 +349,8 @@ const CaseFold unicode_casefold[] = {
|
|||||||
{ 42948, 42948, -48 },
|
{ 42948, 42948, -48 },
|
||||||
{ 42949, 42949, -42307 },
|
{ 42949, 42949, -42307 },
|
||||||
{ 42950, 42950, -35384 },
|
{ 42950, 42950, -35384 },
|
||||||
|
{ 42951, 42954, OddEven },
|
||||||
|
{ 42997, 42998, OddEven },
|
||||||
{ 43859, 43859, -928 },
|
{ 43859, 43859, -928 },
|
||||||
{ 43888, 43967, -38864 },
|
{ 43888, 43967, -38864 },
|
||||||
{ 65313, 65338, 32 },
|
{ 65313, 65338, 32 },
|
||||||
@ -366,9 +368,9 @@ const CaseFold unicode_casefold[] = {
|
|||||||
{ 125184, 125217, 34 },
|
{ 125184, 125217, 34 },
|
||||||
{ 125218, 125251, -34 },
|
{ 125218, 125251, -34 },
|
||||||
};
|
};
|
||||||
const int num_unicode_casefold = 356;
|
const int num_unicode_casefold = 358;
|
||||||
|
|
||||||
// 1381 groups, 1411 pairs, 198 ranges
|
// 1384 groups, 1414 pairs, 200 ranges
|
||||||
const CaseFold unicode_tolower[] = {
|
const CaseFold unicode_tolower[] = {
|
||||||
{ 65, 90, 32 },
|
{ 65, 90, 32 },
|
||||||
{ 181, 181, 775 },
|
{ 181, 181, 775 },
|
||||||
@ -560,6 +562,8 @@ const CaseFold unicode_tolower[] = {
|
|||||||
{ 42948, 42948, -48 },
|
{ 42948, 42948, -48 },
|
||||||
{ 42949, 42949, -42307 },
|
{ 42949, 42949, -42307 },
|
||||||
{ 42950, 42950, -35384 },
|
{ 42950, 42950, -35384 },
|
||||||
|
{ 42951, 42953, OddEvenSkip },
|
||||||
|
{ 42997, 42997, OddEven },
|
||||||
{ 43888, 43967, -38864 },
|
{ 43888, 43967, -38864 },
|
||||||
{ 65313, 65338, 32 },
|
{ 65313, 65338, 32 },
|
||||||
{ 66560, 66599, 40 },
|
{ 66560, 66599, 40 },
|
||||||
@ -569,7 +573,7 @@ const CaseFold unicode_tolower[] = {
|
|||||||
{ 93760, 93791, 32 },
|
{ 93760, 93791, 32 },
|
||||||
{ 125184, 125217, 34 },
|
{ 125184, 125217, 34 },
|
||||||
};
|
};
|
||||||
const int num_unicode_tolower = 198;
|
const int num_unicode_tolower = 200;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
361
extern/re2/re2/unicode_groups.cc
vendored
361
extern/re2/re2/unicode_groups.cc
vendored
File diff suppressed because it is too large
Load Diff
28
extern/re2/re2/walker-inl.h
vendored
28
extern/re2/re2/walker-inl.h
vendored
@ -89,7 +89,7 @@ template<typename T> class Regexp::Walker {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Walk state for the entire traversal.
|
// Walk state for the entire traversal.
|
||||||
std::stack<WalkState<T> >* stack_;
|
std::stack<WalkState<T>> stack_;
|
||||||
bool stopped_early_;
|
bool stopped_early_;
|
||||||
int max_visits_;
|
int max_visits_;
|
||||||
|
|
||||||
@ -119,7 +119,7 @@ template<typename T> T Regexp::Walker<T>::Copy(T arg) {
|
|||||||
|
|
||||||
// State about a single level in the traversal.
|
// State about a single level in the traversal.
|
||||||
template<typename T> struct WalkState {
|
template<typename T> struct WalkState {
|
||||||
WalkState<T>(Regexp* re, T parent)
|
WalkState(Regexp* re, T parent)
|
||||||
: re(re),
|
: re(re),
|
||||||
n(-1),
|
n(-1),
|
||||||
parent_arg(parent),
|
parent_arg(parent),
|
||||||
@ -134,24 +134,22 @@ template<typename T> struct WalkState {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<typename T> Regexp::Walker<T>::Walker() {
|
template<typename T> Regexp::Walker<T>::Walker() {
|
||||||
stack_ = new std::stack<WalkState<T> >;
|
|
||||||
stopped_early_ = false;
|
stopped_early_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T> Regexp::Walker<T>::~Walker() {
|
template<typename T> Regexp::Walker<T>::~Walker() {
|
||||||
Reset();
|
Reset();
|
||||||
delete stack_;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clears the stack. Should never be necessary, since
|
// Clears the stack. Should never be necessary, since
|
||||||
// Walk always enters and exits with an empty stack.
|
// Walk always enters and exits with an empty stack.
|
||||||
// Logs DFATAL if stack is not already clear.
|
// Logs DFATAL if stack is not already clear.
|
||||||
template<typename T> void Regexp::Walker<T>::Reset() {
|
template<typename T> void Regexp::Walker<T>::Reset() {
|
||||||
if (stack_ && stack_->size() > 0) {
|
if (!stack_.empty()) {
|
||||||
LOG(DFATAL) << "Stack not empty.";
|
LOG(DFATAL) << "Stack not empty.";
|
||||||
while (stack_->size() > 0) {
|
while (!stack_.empty()) {
|
||||||
delete stack_->top().child_args;
|
delete[] stack_.top().child_args;
|
||||||
stack_->pop();
|
stack_.pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -165,12 +163,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
|||||||
return top_arg;
|
return top_arg;
|
||||||
}
|
}
|
||||||
|
|
||||||
stack_->push(WalkState<T>(re, top_arg));
|
stack_.push(WalkState<T>(re, top_arg));
|
||||||
|
|
||||||
WalkState<T>* s;
|
WalkState<T>* s;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
T t;
|
T t;
|
||||||
s = &stack_->top();
|
s = &stack_.top();
|
||||||
Regexp* re = s->re;
|
Regexp* re = s->re;
|
||||||
switch (s->n) {
|
switch (s->n) {
|
||||||
case -1: {
|
case -1: {
|
||||||
@ -201,7 +199,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
|||||||
s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
|
s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
|
||||||
s->n++;
|
s->n++;
|
||||||
} else {
|
} else {
|
||||||
stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
|
stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -214,12 +212,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We've finished stack_->top().
|
// We've finished stack_.top().
|
||||||
// Update next guy down.
|
// Update next guy down.
|
||||||
stack_->pop();
|
stack_.pop();
|
||||||
if (stack_->size() == 0)
|
if (stack_.empty())
|
||||||
return t;
|
return t;
|
||||||
s = &stack_->top();
|
s = &stack_.top();
|
||||||
if (s->child_args != NULL)
|
if (s->child_args != NULL)
|
||||||
s->child_args[s->n] = t;
|
s->child_args[s->n] = t;
|
||||||
else
|
else
|
||||||
|
12
extern/re2/re2_test.bzl
vendored
12
extern/re2/re2_test.bzl
vendored
@ -1,12 +0,0 @@
|
|||||||
# Copyright 2009 The RE2 Authors. All Rights Reserved.
|
|
||||||
# Use of this source code is governed by a BSD-style
|
|
||||||
# license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
# Defines a Bazel macro that instantiates a native cc_test rule for an RE2 test.
|
|
||||||
def re2_test(name, deps=[], size="medium"):
|
|
||||||
native.cc_test(
|
|
||||||
name=name,
|
|
||||||
srcs=["re2/testing/%s.cc" % (name)],
|
|
||||||
deps=[":test"] + deps,
|
|
||||||
size=size,
|
|
||||||
)
|
|
0
extern/re2/runtests
vendored
Normal file → Executable file
0
extern/re2/runtests
vendored
Normal file → Executable file
35
extern/re2/testinstall.cc
vendored
35
extern/re2/testinstall.cc
vendored
@ -2,23 +2,26 @@
|
|||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#include <re2/re2.h>
|
|
||||||
#include <re2/filtered_re2.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <re2/filtered_re2.h>
|
||||||
|
#include <re2/re2.h>
|
||||||
|
|
||||||
int main(void) {
|
int main() {
|
||||||
re2::FilteredRE2 f;
|
re2::FilteredRE2 f;
|
||||||
int id;
|
int id;
|
||||||
f.Add("a.*b.*c", RE2::DefaultOptions, &id);
|
f.Add("a.*b.*c", RE2::DefaultOptions, &id);
|
||||||
std::vector<std::string> v;
|
std::vector<std::string> v;
|
||||||
f.Compile(&v);
|
f.Compile(&v);
|
||||||
std::vector<int> ids;
|
std::vector<int> ids;
|
||||||
f.FirstMatch("abbccc", ids);
|
f.FirstMatch("abbccc", ids);
|
||||||
|
|
||||||
if(RE2::FullMatch("axbyc", "a.*b.*c")) {
|
int n;
|
||||||
printf("PASS\n");
|
if (RE2::FullMatch("axbyc", "a.*b.*c") &&
|
||||||
return 0;
|
RE2::PartialMatch("foo123bar", "(\\d+)", &n) && n == 123) {
|
||||||
}
|
printf("PASS\n");
|
||||||
printf("FAIL\n");
|
return 0;
|
||||||
return 2;
|
}
|
||||||
|
|
||||||
|
printf("FAIL\n");
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
|
192
extern/re2/util/benchmark.cc
vendored
192
extern/re2/util/benchmark.cc
vendored
@ -7,155 +7,125 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <thread>
|
|
||||||
|
|
||||||
#include "util/util.h"
|
|
||||||
#include "util/flags.h"
|
|
||||||
#include "util/benchmark.h"
|
#include "util/benchmark.h"
|
||||||
|
#include "util/flags.h"
|
||||||
#include "re2/re2.h"
|
#include "re2/re2.h"
|
||||||
|
|
||||||
DEFINE_string(test_tmpdir, "/var/tmp", "temp directory");
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define snprintf _snprintf
|
#define snprintf _snprintf
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using testing::Benchmark;
|
using ::testing::Benchmark;
|
||||||
|
|
||||||
static Benchmark* benchmarks[10000];
|
static Benchmark* benchmarks[10000];
|
||||||
static int nbenchmarks;
|
static int nbenchmarks;
|
||||||
|
|
||||||
void Benchmark::Register() {
|
void Benchmark::Register() {
|
||||||
benchmarks[nbenchmarks] = this;
|
lo_ = std::max(1, lo_);
|
||||||
if(lo < 1)
|
hi_ = std::max(lo_, hi_);
|
||||||
lo = 1;
|
benchmarks[nbenchmarks++] = this;
|
||||||
if(hi < lo)
|
|
||||||
hi = lo;
|
|
||||||
nbenchmarks++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int64_t nsec() {
|
static int64_t nsec() {
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||||
std::chrono::steady_clock::now().time_since_epoch()).count();
|
std::chrono::steady_clock::now().time_since_epoch())
|
||||||
|
.count();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int64_t bytes;
|
|
||||||
static int64_t ns;
|
|
||||||
static int64_t t0;
|
static int64_t t0;
|
||||||
|
static int64_t ns;
|
||||||
|
static int64_t bytes;
|
||||||
static int64_t items;
|
static int64_t items;
|
||||||
|
|
||||||
void SetBenchmarkBytesProcessed(int64_t x) {
|
void StartBenchmarkTiming() {
|
||||||
bytes = x;
|
if (t0 == 0) {
|
||||||
|
t0 = nsec();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void StopBenchmarkTiming() {
|
void StopBenchmarkTiming() {
|
||||||
if(t0 != 0)
|
if (t0 != 0) {
|
||||||
ns += nsec() - t0;
|
ns += nsec() - t0;
|
||||||
t0 = 0;
|
t0 = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void StartBenchmarkTiming() {
|
void SetBenchmarkBytesProcessed(int64_t b) { bytes = b; }
|
||||||
if(t0 == 0)
|
|
||||||
t0 = nsec();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetBenchmarkItemsProcessed(int n) {
|
void SetBenchmarkItemsProcessed(int64_t i) { items = i; }
|
||||||
items = n;
|
|
||||||
}
|
|
||||||
|
|
||||||
void BenchmarkMemoryUsage() {
|
static void RunFunc(Benchmark* b, int iters, int arg) {
|
||||||
// TODO(rsc): Implement.
|
t0 = nsec();
|
||||||
}
|
ns = 0;
|
||||||
|
bytes = 0;
|
||||||
int NumCPUs() {
|
items = 0;
|
||||||
return static_cast<int>(std::thread::hardware_concurrency());
|
b->func()(iters, arg);
|
||||||
}
|
StopBenchmarkTiming();
|
||||||
|
|
||||||
static void runN(Benchmark *b, int n, int siz) {
|
|
||||||
bytes = 0;
|
|
||||||
items = 0;
|
|
||||||
ns = 0;
|
|
||||||
t0 = nsec();
|
|
||||||
if(b->fn)
|
|
||||||
b->fn(n);
|
|
||||||
else if(b->fnr)
|
|
||||||
b->fnr(n, siz);
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "%s: missing function\n", b->name);
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
if(t0 != 0)
|
|
||||||
ns += nsec() - t0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int round(int n) {
|
static int round(int n) {
|
||||||
int base = 1;
|
int base = 1;
|
||||||
|
while (base * 10 < n) base *= 10;
|
||||||
while(base*10 < n)
|
if (n < 2 * base) return 2 * base;
|
||||||
base *= 10;
|
if (n < 5 * base) return 5 * base;
|
||||||
if(n < 2*base)
|
return 10 * base;
|
||||||
return 2*base;
|
|
||||||
if(n < 5*base)
|
|
||||||
return 5*base;
|
|
||||||
return 10*base;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RunBench(Benchmark* b, int nthread, int siz) {
|
static void RunBench(Benchmark* b, int arg) {
|
||||||
int n, last;
|
int iters, last;
|
||||||
|
|
||||||
// TODO(rsc): Threaded benchmarks.
|
// Run once just in case it's expensive.
|
||||||
if(nthread != 1)
|
iters = 1;
|
||||||
return;
|
RunFunc(b, iters, arg);
|
||||||
|
while (ns < (int)1e9 && iters < (int)1e9) {
|
||||||
|
last = iters;
|
||||||
|
if (ns / iters == 0) {
|
||||||
|
iters = (int)1e9;
|
||||||
|
} else {
|
||||||
|
iters = (int)1e9 / static_cast<int>(ns / iters);
|
||||||
|
}
|
||||||
|
iters = std::max(last + 1, std::min(iters + iters / 2, 100 * last));
|
||||||
|
iters = round(iters);
|
||||||
|
RunFunc(b, iters, arg);
|
||||||
|
}
|
||||||
|
|
||||||
// run once in case it's expensive
|
char mb[100];
|
||||||
n = 1;
|
char suf[100];
|
||||||
runN(b, n, siz);
|
mb[0] = '\0';
|
||||||
while(ns < (int)1e9 && n < (int)1e9) {
|
suf[0] = '\0';
|
||||||
last = n;
|
if (ns > 0 && bytes > 0)
|
||||||
if(ns/n == 0)
|
snprintf(mb, sizeof mb, "\t%7.2f MB/s",
|
||||||
n = (int)1e9;
|
((double)bytes / 1e6) / ((double)ns / 1e9));
|
||||||
else
|
if (b->has_arg()) {
|
||||||
n = (int)1e9 / static_cast<int>(ns/n);
|
if (arg >= (1 << 20)) {
|
||||||
|
snprintf(suf, sizeof suf, "/%dM", arg / (1 << 20));
|
||||||
n = std::max(last+1, std::min(n+n/2, 100*last));
|
} else if (arg >= (1 << 10)) {
|
||||||
n = round(n);
|
snprintf(suf, sizeof suf, "/%dK", arg / (1 << 10));
|
||||||
runN(b, n, siz);
|
} else {
|
||||||
}
|
snprintf(suf, sizeof suf, "/%d", arg);
|
||||||
|
}
|
||||||
char mb[100];
|
}
|
||||||
char suf[100];
|
printf("%s%s\t%8d\t%10lld ns/op%s\n", b->name(), suf, iters,
|
||||||
mb[0] = '\0';
|
(long long)ns / iters, mb);
|
||||||
suf[0] = '\0';
|
fflush(stdout);
|
||||||
if(ns > 0 && bytes > 0)
|
|
||||||
snprintf(mb, sizeof mb, "\t%7.2f MB/s", ((double)bytes/1e6)/((double)ns/1e9));
|
|
||||||
if(b->fnr || b->lo != b->hi) {
|
|
||||||
if(siz >= (1<<20))
|
|
||||||
snprintf(suf, sizeof suf, "/%dM", siz/(1<<20));
|
|
||||||
else if(siz >= (1<<10))
|
|
||||||
snprintf(suf, sizeof suf, "/%dK", siz/(1<<10));
|
|
||||||
else
|
|
||||||
snprintf(suf, sizeof suf, "/%d", siz);
|
|
||||||
}
|
|
||||||
printf("%s%s\t%8lld\t%10lld ns/op%s\n", b->name, suf, (long long)n, (long long)ns/n, mb);
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int match(const char* name, int argc, const char** argv) {
|
static bool WantBench(const char* name, int argc, const char** argv) {
|
||||||
if(argc == 1)
|
if (argc == 1) return true;
|
||||||
return 1;
|
for (int i = 1; i < argc; i++) {
|
||||||
for(int i = 1; i < argc; i++)
|
if (RE2::PartialMatch(name, argv[i]))
|
||||||
if(RE2::PartialMatch(name, argv[i]))
|
return true;
|
||||||
return 1;
|
}
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, const char** argv) {
|
int main(int argc, const char** argv) {
|
||||||
for(int i = 0; i < nbenchmarks; i++) {
|
for (int i = 0; i < nbenchmarks; i++) {
|
||||||
Benchmark* b = benchmarks[i];
|
Benchmark* b = benchmarks[i];
|
||||||
if(match(b->name, argc, argv))
|
if (!WantBench(b->name(), argc, argv))
|
||||||
for(int j = b->threadlo; j <= b->threadhi; j++)
|
continue;
|
||||||
for(int k = std::max(b->lo, 1); k <= std::max(b->hi, 1); k<<=1)
|
for (int arg = b->lo(); arg <= b->hi(); arg <<= 1)
|
||||||
RunBench(b, j, k);
|
RunBench(b, arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
163
extern/re2/util/benchmark.h
vendored
163
extern/re2/util/benchmark.h
vendored
@ -6,38 +6,151 @@
|
|||||||
#define UTIL_BENCHMARK_H_
|
#define UTIL_BENCHMARK_H_
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/util.h"
|
||||||
|
|
||||||
|
// Globals for the old benchmark API.
|
||||||
|
void StartBenchmarkTiming();
|
||||||
|
void StopBenchmarkTiming();
|
||||||
|
void SetBenchmarkBytesProcessed(int64_t b);
|
||||||
|
void SetBenchmarkItemsProcessed(int64_t i);
|
||||||
|
|
||||||
|
namespace benchmark {
|
||||||
|
|
||||||
|
// The new benchmark API implemented as a layer over the old benchmark API.
|
||||||
|
// (Please refer to https://github.com/google/benchmark for documentation.)
|
||||||
|
class State {
|
||||||
|
private:
|
||||||
|
class Iterator {
|
||||||
|
public:
|
||||||
|
// Benchmark code looks like this:
|
||||||
|
//
|
||||||
|
// for (auto _ : state) {
|
||||||
|
// // ...
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// We try to avoid compiler warnings about such variables being unused.
|
||||||
|
struct ATTRIBUTE_UNUSED Value {};
|
||||||
|
|
||||||
|
explicit Iterator(int64_t iters) : iters_(iters) {}
|
||||||
|
|
||||||
|
bool operator!=(const Iterator& that) const {
|
||||||
|
if (iters_ != that.iters_) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// We are about to stop the loop, so stop timing.
|
||||||
|
StopBenchmarkTiming();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Value operator*() const {
|
||||||
|
return Value();
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator& operator++() {
|
||||||
|
--iters_;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int64_t iters_;
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit State(int64_t iters)
|
||||||
|
: iters_(iters), arg_(0), has_arg_(false) {}
|
||||||
|
|
||||||
|
State(int64_t iters, int64_t arg)
|
||||||
|
: iters_(iters), arg_(arg), has_arg_(true) {}
|
||||||
|
|
||||||
|
Iterator begin() {
|
||||||
|
// We are about to start the loop, so start timing.
|
||||||
|
StartBenchmarkTiming();
|
||||||
|
return Iterator(iters_);
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator end() {
|
||||||
|
return Iterator(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetBytesProcessed(int64_t b) { SetBenchmarkBytesProcessed(b); }
|
||||||
|
void SetItemsProcessed(int64_t i) { SetBenchmarkItemsProcessed(i); }
|
||||||
|
int64_t iterations() const { return iters_; }
|
||||||
|
// Pretend to support multiple arguments.
|
||||||
|
int64_t range(int pos) const { CHECK(has_arg_); return arg_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int64_t iters_;
|
||||||
|
int64_t arg_;
|
||||||
|
bool has_arg_;
|
||||||
|
|
||||||
|
State(const State&) = delete;
|
||||||
|
State& operator=(const State&) = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace benchmark
|
||||||
|
|
||||||
namespace testing {
|
namespace testing {
|
||||||
struct Benchmark {
|
|
||||||
const char* name;
|
|
||||||
void (*fn)(int);
|
|
||||||
void (*fnr)(int, int);
|
|
||||||
int lo;
|
|
||||||
int hi;
|
|
||||||
int threadlo;
|
|
||||||
int threadhi;
|
|
||||||
|
|
||||||
|
class Benchmark {
|
||||||
|
public:
|
||||||
|
Benchmark(const char* name, void (*func)(benchmark::State&))
|
||||||
|
: name_(name),
|
||||||
|
func_([func](int iters, int arg) {
|
||||||
|
benchmark::State state(iters);
|
||||||
|
func(state);
|
||||||
|
}),
|
||||||
|
lo_(0),
|
||||||
|
hi_(0),
|
||||||
|
has_arg_(false) {
|
||||||
|
Register();
|
||||||
|
}
|
||||||
|
|
||||||
|
Benchmark(const char* name, void (*func)(benchmark::State&), int lo, int hi)
|
||||||
|
: name_(name),
|
||||||
|
func_([func](int iters, int arg) {
|
||||||
|
benchmark::State state(iters, arg);
|
||||||
|
func(state);
|
||||||
|
}),
|
||||||
|
lo_(lo),
|
||||||
|
hi_(hi),
|
||||||
|
has_arg_(true) {
|
||||||
|
Register();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pretend to support multiple threads.
|
||||||
|
Benchmark* ThreadRange(int lo, int hi) { return this; }
|
||||||
|
|
||||||
|
const char* name() const { return name_; }
|
||||||
|
const std::function<void(int, int)>& func() const { return func_; }
|
||||||
|
int lo() const { return lo_; }
|
||||||
|
int hi() const { return hi_; }
|
||||||
|
bool has_arg() const { return has_arg_; }
|
||||||
|
|
||||||
|
private:
|
||||||
void Register();
|
void Register();
|
||||||
Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
|
|
||||||
Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
|
const char* name_;
|
||||||
void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
|
std::function<void(int, int)> func_;
|
||||||
Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
|
int lo_;
|
||||||
|
int hi_;
|
||||||
|
bool has_arg_;
|
||||||
|
|
||||||
|
Benchmark(const Benchmark&) = delete;
|
||||||
|
Benchmark& operator=(const Benchmark&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace testing
|
} // namespace testing
|
||||||
|
|
||||||
void SetBenchmarkBytesProcessed(int64_t);
|
#define BENCHMARK(f) \
|
||||||
void StopBenchmarkTiming();
|
::testing::Benchmark* _benchmark_##f = \
|
||||||
void StartBenchmarkTiming();
|
(new ::testing::Benchmark(#f, f))
|
||||||
void BenchmarkMemoryUsage();
|
|
||||||
void SetBenchmarkItemsProcessed(int);
|
|
||||||
|
|
||||||
int NumCPUs();
|
#define BENCHMARK_RANGE(f, lo, hi) \
|
||||||
|
::testing::Benchmark* _benchmark_##f = \
|
||||||
#define BENCHMARK(f) \
|
(new ::testing::Benchmark(#f, f, lo, hi))
|
||||||
::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
|
|
||||||
|
|
||||||
#define BENCHMARK_RANGE(f, lo, hi) \
|
|
||||||
::testing::Benchmark* _benchmark_##f = \
|
|
||||||
(new ::testing::Benchmark(#f, f, lo, hi))
|
|
||||||
|
|
||||||
#endif // UTIL_BENCHMARK_H_
|
#endif // UTIL_BENCHMARK_H_
|
||||||
|
19
extern/re2/util/flags.h
vendored
19
extern/re2/util/flags.h
vendored
@ -10,20 +10,17 @@
|
|||||||
// If you want to do that, see
|
// If you want to do that, see
|
||||||
// https://gflags.github.io/gflags/
|
// https://gflags.github.io/gflags/
|
||||||
|
|
||||||
#include <stdint.h>
|
#define DEFINE_FLAG(type, name, deflt, desc) \
|
||||||
|
|
||||||
#define DEFINE_flag(type, name, deflt, desc) \
|
|
||||||
namespace re2 { type FLAGS_##name = deflt; }
|
namespace re2 { type FLAGS_##name = deflt; }
|
||||||
|
|
||||||
#define DECLARE_flag(type, name) \
|
#define DECLARE_FLAG(type, name) \
|
||||||
namespace re2 { extern type FLAGS_##name; }
|
namespace re2 { extern type FLAGS_##name; }
|
||||||
|
|
||||||
#define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
|
namespace re2 {
|
||||||
#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32_t, name, deflt, desc)
|
template <typename T>
|
||||||
#define DEFINE_string(name, deflt, desc) DEFINE_flag(std::string, name, deflt, desc)
|
T GetFlag(const T& flag) {
|
||||||
|
return flag;
|
||||||
#define DECLARE_bool(name) DECLARE_flag(bool, name)
|
}
|
||||||
#define DECLARE_int32(name) DECLARE_flag(int32_t, name)
|
} // namespace re2
|
||||||
#define DECLARE_string(name) DECLARE_flag(std::string, name)
|
|
||||||
|
|
||||||
#endif // UTIL_FLAGS_H_
|
#endif // UTIL_FLAGS_H_
|
||||||
|
19
extern/re2/util/malloc_counter.h
vendored
Normal file
19
extern/re2/util/malloc_counter.h
vendored
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
#ifndef UTIL_MALLOC_COUNTER_H_
|
||||||
|
#define UTIL_MALLOC_COUNTER_H_
|
||||||
|
|
||||||
|
namespace testing {
|
||||||
|
class MallocCounter {
|
||||||
|
public:
|
||||||
|
MallocCounter(int x) {}
|
||||||
|
static const int THIS_THREAD_ONLY = 0;
|
||||||
|
long long HeapGrowth() { return 0; }
|
||||||
|
long long PeakHeapGrowth() { return 0; }
|
||||||
|
void Reset() {}
|
||||||
|
};
|
||||||
|
} // namespace testing
|
||||||
|
|
||||||
|
#endif // UTIL_MALLOC_COUNTER_H_
|
23
extern/re2/util/mutex.h
vendored
23
extern/re2/util/mutex.h
vendored
@ -10,7 +10,13 @@
|
|||||||
* You should assume the locks are *not* re-entrant.
|
* You should assume the locks are *not* re-entrant.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if !defined(_WIN32)
|
#ifdef _WIN32
|
||||||
|
// Requires Windows Vista or Windows Server 2008 at minimum.
|
||||||
|
#include <windows.h>
|
||||||
|
#if defined(WINVER) && WINVER >= 0x0600
|
||||||
|
#define MUTEX_IS_WIN32_SRWLOCK
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
#ifndef _POSIX_C_SOURCE
|
#ifndef _POSIX_C_SOURCE
|
||||||
#define _POSIX_C_SOURCE 200809L
|
#define _POSIX_C_SOURCE 200809L
|
||||||
#endif
|
#endif
|
||||||
@ -20,7 +26,9 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(MUTEX_IS_PTHREAD_RWLOCK)
|
#if defined(MUTEX_IS_WIN32_SRWLOCK)
|
||||||
|
typedef SRWLOCK MutexType;
|
||||||
|
#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
typedef pthread_rwlock_t MutexType;
|
typedef pthread_rwlock_t MutexType;
|
||||||
@ -56,7 +64,16 @@ class Mutex {
|
|||||||
Mutex& operator=(const Mutex&) = delete;
|
Mutex& operator=(const Mutex&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(MUTEX_IS_PTHREAD_RWLOCK)
|
#if defined(MUTEX_IS_WIN32_SRWLOCK)
|
||||||
|
|
||||||
|
Mutex::Mutex() { InitializeSRWLock(&mutex_); }
|
||||||
|
Mutex::~Mutex() { }
|
||||||
|
void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
|
||||||
|
void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
|
||||||
|
void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
|
||||||
|
void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
|
||||||
|
|
||||||
|
#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
|
||||||
|
|
||||||
#define SAFE_PTHREAD(fncall) \
|
#define SAFE_PTHREAD(fncall) \
|
||||||
do { \
|
do { \
|
||||||
|
42
extern/re2/util/pcre.cc
vendored
42
extern/re2/util/pcre.cc
vendored
@ -22,9 +22,7 @@
|
|||||||
#include "util/strutil.h"
|
#include "util/strutil.h"
|
||||||
|
|
||||||
// Silence warnings about the wacky formatting in the operator() functions.
|
// Silence warnings about the wacky formatting in the operator() functions.
|
||||||
// Note that we test for Clang first because it defines __GNUC__ as well.
|
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
|
||||||
#if defined(__clang__)
|
|
||||||
#elif defined(__GNUC__) && __GNUC__ >= 6
|
|
||||||
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
|
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -35,9 +33,10 @@
|
|||||||
// not exceed main thread stacks. Note that other threads
|
// not exceed main thread stacks. Note that other threads
|
||||||
// often have smaller stacks, and therefore tightening
|
// often have smaller stacks, and therefore tightening
|
||||||
// regexp_stack_limit may frequently be necessary.
|
// regexp_stack_limit may frequently be necessary.
|
||||||
DEFINE_int32(regexp_stack_limit, 256<<10, "default PCRE stack limit (bytes)");
|
DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
|
||||||
DEFINE_int32(regexp_match_limit, 1000000,
|
"default PCRE stack limit (bytes)");
|
||||||
"default PCRE match limit (function calls)");
|
DEFINE_FLAG(int, regexp_match_limit, 1000000,
|
||||||
|
"default PCRE match limit (function calls)");
|
||||||
|
|
||||||
#ifndef USEPCRE
|
#ifndef USEPCRE
|
||||||
|
|
||||||
@ -523,12 +522,12 @@ int PCRE::TryMatch(const StringPiece& text,
|
|||||||
|
|
||||||
int match_limit = match_limit_;
|
int match_limit = match_limit_;
|
||||||
if (match_limit <= 0) {
|
if (match_limit <= 0) {
|
||||||
match_limit = FLAGS_regexp_match_limit;
|
match_limit = GetFlag(FLAGS_regexp_match_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
int stack_limit = stack_limit_;
|
int stack_limit = stack_limit_;
|
||||||
if (stack_limit <= 0) {
|
if (stack_limit <= 0) {
|
||||||
stack_limit = FLAGS_regexp_stack_limit;
|
stack_limit = GetFlag(FLAGS_regexp_stack_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
pcre_extra extra = { 0 };
|
pcre_extra extra = { 0 };
|
||||||
@ -977,32 +976,7 @@ static bool parse_double_float(const char* str, size_t n, bool isfloat,
|
|||||||
} else {
|
} else {
|
||||||
r = strtod(buf, &end);
|
r = strtod(buf, &end);
|
||||||
}
|
}
|
||||||
if (end != buf + n) {
|
if (end != buf + n) return false; // Leftover junk
|
||||||
#ifdef _WIN32
|
|
||||||
// Microsoft's strtod() doesn't handle inf and nan, so we have to
|
|
||||||
// handle it explicitly. Speed is not important here because this
|
|
||||||
// code is only called in unit tests.
|
|
||||||
bool pos = true;
|
|
||||||
const char* i = buf;
|
|
||||||
if ('-' == *i) {
|
|
||||||
pos = false;
|
|
||||||
++i;
|
|
||||||
} else if ('+' == *i) {
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
if (0 == _stricmp(i, "inf") || 0 == _stricmp(i, "infinity")) {
|
|
||||||
r = std::numeric_limits<double>::infinity();
|
|
||||||
if (!pos)
|
|
||||||
r = -r;
|
|
||||||
} else if (0 == _stricmp(i, "nan")) {
|
|
||||||
r = std::numeric_limits<double>::quiet_NaN();
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
return false; // Leftover junk
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
if (errno) return false;
|
if (errno) return false;
|
||||||
if (dest == NULL) return true;
|
if (dest == NULL) return true;
|
||||||
if (isfloat) {
|
if (isfloat) {
|
||||||
|
6
extern/re2/util/pcre.h
vendored
6
extern/re2/util/pcre.h
vendored
@ -555,7 +555,7 @@ class PCRE_Options {
|
|||||||
// Hex/Octal/Binary?
|
// Hex/Octal/Binary?
|
||||||
|
|
||||||
// Special class for parsing into objects that define a ParseFrom() method
|
// Special class for parsing into objects that define a ParseFrom() method
|
||||||
template <class T>
|
template <typename T>
|
||||||
class _PCRE_MatchObject {
|
class _PCRE_MatchObject {
|
||||||
public:
|
public:
|
||||||
static inline bool Parse(const char* str, size_t n, void* dest) {
|
static inline bool Parse(const char* str, size_t n, void* dest) {
|
||||||
@ -600,9 +600,9 @@ class PCRE::Arg {
|
|||||||
#undef MAKE_PARSER
|
#undef MAKE_PARSER
|
||||||
|
|
||||||
// Generic constructor
|
// Generic constructor
|
||||||
template <class T> Arg(T*, Parser parser);
|
template <typename T> Arg(T*, Parser parser);
|
||||||
// Generic constructor template
|
// Generic constructor template
|
||||||
template <class T> Arg(T* p)
|
template <typename T> Arg(T* p)
|
||||||
: arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
|
: arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
5
extern/re2/util/test.cc
vendored
5
extern/re2/util/test.cc
vendored
@ -3,10 +3,13 @@
|
|||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#include "util/test.h"
|
#include "util/test.h"
|
||||||
|
|
||||||
DEFINE_string(test_tmpdir, "/var/tmp", "temp directory");
|
namespace testing {
|
||||||
|
std::string TempDir() { return "/tmp/"; }
|
||||||
|
} // namespace testing
|
||||||
|
|
||||||
struct Test {
|
struct Test {
|
||||||
void (*fn)(void);
|
void (*fn)(void);
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user