mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-22 20:03:02 +01:00
Update re2 to version 2021-04-01.
This commit is contained in:
parent
d9d8cc36d4
commit
2e35cc66e7
2
extern/re2/kokoro/bazel.sh → extern/re2/.github/bazel.sh
vendored
Normal file → Executable file
2
extern/re2/kokoro/bazel.sh → extern/re2/.github/bazel.sh
vendored
Normal file → Executable file
@ -1,8 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
|
||||
cd git/re2
|
||||
|
||||
bazel clean
|
||||
bazel build --compilation_mode=dbg -- //:all
|
||||
bazel test --compilation_mode=dbg --test_output=errors -- //:all \
|
12
extern/re2/.github/cmake.sh
vendored
Executable file
12
extern/re2/.github/cmake.sh
vendored
Executable file
@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
|
||||
cmake -D CMAKE_BUILD_TYPE=Debug
|
||||
cmake --build . --config Debug --clean-first
|
||||
ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
|
||||
|
||||
cmake -D CMAKE_BUILD_TYPE=Release
|
||||
cmake --build . --config Release --clean-first
|
||||
ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
|
||||
|
||||
exit 0
|
17
extern/re2/.github/workflows/ci-bazel.yml
vendored
Normal file
17
extern/re2/.github/workflows/ci-bazel.yml
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
name: CI (Bazel)
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest, ubuntu-latest, windows-latest]
|
||||
env:
|
||||
BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: .github/bazel.sh
|
||||
shell: bash
|
15
extern/re2/.github/workflows/ci-cmake.yml
vendored
Normal file
15
extern/re2/.github/workflows/ci-cmake.yml
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
name: CI (CMake)
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest, ubuntu-latest, windows-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: .github/cmake.sh
|
||||
shell: bash
|
51
extern/re2/.github/workflows/ci.yml
vendored
Normal file
51
extern/re2/.github/workflows/ci.yml
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
name: CI
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest, ubuntu-latest]
|
||||
env:
|
||||
CC: clang
|
||||
CXX: clang++
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: make && make test
|
||||
shell: bash
|
||||
build-clang:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
tag: [9, 10, 11]
|
||||
env:
|
||||
CC: clang-${{ matrix.tag }}
|
||||
CXX: clang++-${{ matrix.tag }}
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install Clang ${{ matrix.tag }}
|
||||
run: |
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x ./llvm.sh
|
||||
sudo ./llvm.sh ${{ matrix.tag }}
|
||||
shell: bash
|
||||
- run: make && make test
|
||||
shell: bash
|
||||
build-gcc:
|
||||
runs-on: ubuntu-latest
|
||||
container: gcc:${{ matrix.tag }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
tag: [4, 5, 6, 7, 8, 9, 10]
|
||||
env:
|
||||
CC: gcc
|
||||
CXX: g++
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: make && make test
|
||||
shell: bash
|
1
extern/re2/.gitignore
vendored
1
extern/re2/.gitignore
vendored
@ -3,4 +3,3 @@
|
||||
core
|
||||
obj/
|
||||
benchlog.*
|
||||
builds/
|
||||
|
179
extern/re2/.travis.yml
vendored
179
extern/re2/.travis.yml
vendored
@ -1,179 +0,0 @@
|
||||
language: cpp
|
||||
sudo: false
|
||||
dist: trusty
|
||||
script:
|
||||
- make
|
||||
- make test
|
||||
matrix:
|
||||
include:
|
||||
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
env:
|
||||
- MATRIX_EVAL="CC=gcc-4.8 CXX=g++-4.8"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.9
|
||||
env:
|
||||
- MATRIX_EVAL="CC=gcc-4.9 CXX=g++-4.9"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-5
|
||||
env:
|
||||
- MATRIX_EVAL="CC=gcc-5 CXX=g++-5"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-6
|
||||
env:
|
||||
- MATRIX_EVAL="CC=gcc-6 CXX=g++-6"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-7
|
||||
env:
|
||||
- MATRIX_EVAL="CC=gcc-7 CXX=g++-7"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-8
|
||||
env:
|
||||
- MATRIX_EVAL="CC=gcc-8 CXX=g++-8"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-9
|
||||
env:
|
||||
- MATRIX_EVAL="CC=gcc-9 CXX=g++-9"
|
||||
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-precise-3.5
|
||||
packages:
|
||||
- clang-3.5
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-3.5 CXX=clang++-3.5"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-precise-3.6
|
||||
packages:
|
||||
- clang-3.6
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-3.6 CXX=clang++-3.6"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-precise-3.7
|
||||
packages:
|
||||
- clang-3.7
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-3.7 CXX=clang++-3.7"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-precise-3.8
|
||||
packages:
|
||||
- clang-3.8
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-3.8 CXX=clang++-3.8"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-precise-3.9
|
||||
packages:
|
||||
- clang-3.9
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-3.9 CXX=clang++-3.9"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-trusty-4.0
|
||||
packages:
|
||||
- clang-4.0
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-4.0 CXX=clang++-4.0"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-trusty-5.0
|
||||
packages:
|
||||
- clang-5.0
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-5.0 CXX=clang++-5.0"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-6.0 main'
|
||||
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
|
||||
packages:
|
||||
- clang-6.0
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-6.0 CXX=clang++-6.0"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main'
|
||||
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
|
||||
packages:
|
||||
- clang-7
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-7 CXX=clang++-7"
|
||||
- os: linux
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-8 main'
|
||||
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
|
||||
packages:
|
||||
- clang-8
|
||||
env:
|
||||
- MATRIX_EVAL="CC=clang-8 CXX=clang++-8"
|
||||
|
||||
before_install:
|
||||
- eval "${MATRIX_EVAL}"
|
151
extern/re2/BUILD
vendored
151
extern/re2/BUILD
vendored
@ -9,19 +9,21 @@ licenses(["notice"])
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
config_setting(
|
||||
name = "darwin",
|
||||
name = "macos",
|
||||
values = {"cpu": "darwin"},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "wasm",
|
||||
values = {"cpu": "wasm32"},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "windows",
|
||||
values = {"cpu": "x64_windows"},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "windows_msvc",
|
||||
values = {"cpu": "x64_windows_msvc"},
|
||||
)
|
||||
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
|
||||
|
||||
cc_library(
|
||||
name = "re2",
|
||||
@ -36,6 +38,7 @@ cc_library(
|
||||
"re2/onepass.cc",
|
||||
"re2/parse.cc",
|
||||
"re2/perl_groups.cc",
|
||||
"re2/pod_array.h",
|
||||
"re2/prefilter.cc",
|
||||
"re2/prefilter.h",
|
||||
"re2/prefilter_tree.cc",
|
||||
@ -47,6 +50,8 @@ cc_library(
|
||||
"re2/regexp.h",
|
||||
"re2/set.cc",
|
||||
"re2/simplify.cc",
|
||||
"re2/sparse_array.h",
|
||||
"re2/sparse_set.h",
|
||||
"re2/stringpiece.cc",
|
||||
"re2/tostring.cc",
|
||||
"re2/unicode_casefold.cc",
|
||||
@ -54,14 +59,10 @@ cc_library(
|
||||
"re2/unicode_groups.cc",
|
||||
"re2/unicode_groups.h",
|
||||
"re2/walker-inl.h",
|
||||
"util/flags.h",
|
||||
"util/logging.h",
|
||||
"util/mix.h",
|
||||
"util/mutex.h",
|
||||
"util/pod_array.h",
|
||||
"util/rune.cc",
|
||||
"util/sparse_array.h",
|
||||
"util/sparse_set.h",
|
||||
"util/strutil.cc",
|
||||
"util/strutil.h",
|
||||
"util/utf.h",
|
||||
@ -74,17 +75,17 @@ cc_library(
|
||||
"re2/stringpiece.h",
|
||||
],
|
||||
copts = select({
|
||||
":wasm": [],
|
||||
":windows": [],
|
||||
":windows_msvc": [],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}),
|
||||
linkopts = select({
|
||||
# Darwin doesn't need `-pthread' when linking and it appears that
|
||||
# macOS doesn't need `-pthread' when linking and it appears that
|
||||
# older versions of Clang will warn about the unused command line
|
||||
# argument, so just don't pass it.
|
||||
":darwin": [],
|
||||
":macos": [],
|
||||
":wasm": [],
|
||||
":windows": [],
|
||||
":windows_msvc": [],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
@ -109,6 +110,8 @@ cc_library(
|
||||
"re2/testing/string_generator.h",
|
||||
"re2/testing/tester.h",
|
||||
"util/benchmark.h",
|
||||
"util/flags.h",
|
||||
"util/malloc_counter.h",
|
||||
"util/pcre.h",
|
||||
"util/test.h",
|
||||
],
|
||||
@ -122,106 +125,144 @@ cc_library(
|
||||
deps = [":testing"],
|
||||
)
|
||||
|
||||
load(":re2_test.bzl", "re2_test")
|
||||
|
||||
re2_test(
|
||||
"charclass_test",
|
||||
cc_test(
|
||||
name = "charclass_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/charclass_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"compile_test",
|
||||
cc_test(
|
||||
name = "compile_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/compile_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"filtered_re2_test",
|
||||
cc_test(
|
||||
name = "filtered_re2_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/filtered_re2_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"mimics_pcre_test",
|
||||
cc_test(
|
||||
name = "mimics_pcre_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/mimics_pcre_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"parse_test",
|
||||
cc_test(
|
||||
name = "parse_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/parse_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"possible_match_test",
|
||||
cc_test(
|
||||
name = "possible_match_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/possible_match_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"re2_arg_test",
|
||||
cc_test(
|
||||
name = "re2_arg_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/re2_arg_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"re2_test",
|
||||
cc_test(
|
||||
name = "re2_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/re2_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"regexp_test",
|
||||
cc_test(
|
||||
name = "regexp_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/regexp_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"required_prefix_test",
|
||||
cc_test(
|
||||
name = "required_prefix_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/required_prefix_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"search_test",
|
||||
cc_test(
|
||||
name = "search_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/search_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"set_test",
|
||||
cc_test(
|
||||
name = "set_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/set_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"simplify_test",
|
||||
cc_test(
|
||||
name = "simplify_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/simplify_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"string_generator_test",
|
||||
cc_test(
|
||||
name = "string_generator_test",
|
||||
size = "small",
|
||||
srcs = ["re2/testing/string_generator_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"dfa_test",
|
||||
cc_test(
|
||||
name = "dfa_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/dfa_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"exhaustive1_test",
|
||||
cc_test(
|
||||
name = "exhaustive1_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive1_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"exhaustive2_test",
|
||||
cc_test(
|
||||
name = "exhaustive2_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive2_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"exhaustive3_test",
|
||||
cc_test(
|
||||
name = "exhaustive3_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive3_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"exhaustive_test",
|
||||
cc_test(
|
||||
name = "exhaustive_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/exhaustive_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
re2_test(
|
||||
"random_test",
|
||||
cc_test(
|
||||
name = "random_test",
|
||||
size = "large",
|
||||
srcs = ["re2/testing/random_test.cc"],
|
||||
deps = [":test"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
|
43
extern/re2/CMakeLists.txt
vendored
43
extern/re2/CMakeLists.txt
vendored
@ -2,8 +2,8 @@
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Old enough to support Ubuntu Trusty.
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
# Old enough to support Ubuntu Xenial.
|
||||
cmake_minimum_required(VERSION 3.5.1)
|
||||
|
||||
if(POLICY CMP0048)
|
||||
cmake_policy(SET CMP0048 NEW)
|
||||
@ -11,6 +11,12 @@ endif()
|
||||
|
||||
project(RE2 CXX)
|
||||
include(CTest)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
if(NOT CMAKE_CXX_STANDARD)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
endif()
|
||||
|
||||
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
||||
option(USEPCRE "use PCRE in tests and benchmarks" OFF)
|
||||
@ -19,6 +25,10 @@ option(USEPCRE "use PCRE in tests and benchmarks" OFF)
|
||||
# so we provide an option similar to BUILD_TESTING, but just for RE2.
|
||||
option(RE2_BUILD_TESTING "enable testing for RE2" ON)
|
||||
|
||||
# ABI version
|
||||
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
|
||||
set(SONAME 9)
|
||||
|
||||
set(EXTRA_TARGET_LINK_LIBRARIES)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
||||
@ -27,7 +37,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
# See http://www.kitware.com/blog/home/post/939 for details.
|
||||
cmake_minimum_required(VERSION 3.4)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
endif()
|
||||
# CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX,
|
||||
@ -36,13 +45,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
||||
# Without a byte order mark (BOM), Visual Studio assumes that the source
|
||||
# file is encoded using the current user code page, so we specify UTF-8.
|
||||
add_compile_options(/utf-8)
|
||||
# allow multi-processor compilation
|
||||
add_compile_options(/MP)
|
||||
elseif(CYGWIN OR MINGW)
|
||||
# See https://stackoverflow.com/questions/38139631 for details.
|
||||
add_compile_options(-std=gnu++11)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
||||
add_compile_options(-std=c++11)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
@ -58,8 +60,6 @@ if(USEPCRE)
|
||||
list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre)
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
set(RE2_SOURCES
|
||||
re2/bitstate.cc
|
||||
re2/compile.cc
|
||||
@ -86,6 +86,8 @@ set(RE2_SOURCES
|
||||
)
|
||||
|
||||
add_library(re2 ${RE2_SOURCES})
|
||||
target_include_directories(re2 PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
|
||||
set_target_properties(re2 PROPERTIES SOVERSION ${SONAME} VERSION ${SONAME}.0.0)
|
||||
add_library(re2::re2 ALIAS re2)
|
||||
|
||||
if(RE2_BUILD_TESTING)
|
||||
@ -101,6 +103,7 @@ if(RE2_BUILD_TESTING)
|
||||
)
|
||||
|
||||
add_library(testing STATIC ${TESTING_SOURCES})
|
||||
target_link_libraries(testing PUBLIC re2)
|
||||
|
||||
set(TEST_TARGETS
|
||||
charclass_test
|
||||
@ -132,13 +135,13 @@ if(RE2_BUILD_TESTING)
|
||||
|
||||
foreach(target ${TEST_TARGETS})
|
||||
add_executable(${target} re2/testing/${target}.cc util/test.cc)
|
||||
target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
add_test(NAME ${target} COMMAND ${target})
|
||||
endforeach(target)
|
||||
|
||||
foreach(target ${BENCHMARK_TARGETS})
|
||||
add_executable(${target} re2/testing/${target}.cc util/benchmark.cc)
|
||||
target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
endforeach(target)
|
||||
endif()
|
||||
|
||||
@ -149,6 +152,12 @@ set(RE2_HEADERS
|
||||
re2/stringpiece.h
|
||||
)
|
||||
|
||||
install(FILES ${RE2_HEADERS} DESTINATION include/re2)
|
||||
install(TARGETS re2 EXPORT re2Config ARCHIVE DESTINATION lib LIBRARY DESTINATION lib RUNTIME DESTINATION bin INCLUDES DESTINATION include)
|
||||
install(EXPORT re2Config DESTINATION lib/cmake/re2 NAMESPACE re2::)
|
||||
install(FILES ${RE2_HEADERS}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/re2)
|
||||
install(TARGETS re2 EXPORT re2Config
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
install(EXPORT re2Config
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2 NAMESPACE re2::)
|
||||
|
88
extern/re2/Makefile
vendored
88
extern/re2/Makefile
vendored
@ -44,7 +44,7 @@ endif
|
||||
|
||||
# ABI version
|
||||
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
|
||||
SONAME=0
|
||||
SONAME=9
|
||||
|
||||
# To rebuild the Tables generated by Perl and Python scripts (requires Internet
|
||||
# access for Unicode data), uncomment the following line:
|
||||
@ -55,7 +55,7 @@ ifeq ($(shell uname),Darwin)
|
||||
SOEXT=dylib
|
||||
SOEXTVER=$(SONAME).$(SOEXT)
|
||||
SOEXTVER00=$(SONAME).0.0.$(SOEXT)
|
||||
MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-compatibility_version,$(SONAME),-current_version,$(SONAME).0.0,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
else ifeq ($(shell uname),SunOS)
|
||||
SOEXT=so
|
||||
SOEXTVER=$(SOEXT).$(SONAME)
|
||||
@ -68,6 +68,7 @@ SOEXTVER00=$(SOEXT).$(SONAME).0.0
|
||||
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
endif
|
||||
|
||||
.PHONY: all
|
||||
all: obj/libre2.a obj/so/libre2.$(SOEXT)
|
||||
|
||||
INSTALL_HFILES=\
|
||||
@ -80,24 +81,25 @@ HFILES=\
|
||||
util/benchmark.h\
|
||||
util/flags.h\
|
||||
util/logging.h\
|
||||
util/malloc_counter.h\
|
||||
util/mix.h\
|
||||
util/mutex.h\
|
||||
util/pcre.h\
|
||||
util/pod_array.h\
|
||||
util/sparse_array.h\
|
||||
util/sparse_set.h\
|
||||
util/strutil.h\
|
||||
util/test.h\
|
||||
util/utf.h\
|
||||
util/util.h\
|
||||
re2/bitmap256.h\
|
||||
re2/filtered_re2.h\
|
||||
re2/pod_array.h\
|
||||
re2/prefilter.h\
|
||||
re2/prefilter_tree.h\
|
||||
re2/prog.h\
|
||||
re2/re2.h\
|
||||
re2/regexp.h\
|
||||
re2/set.h\
|
||||
re2/sparse_array.h\
|
||||
re2/sparse_set.h\
|
||||
re2/stringpiece.h\
|
||||
re2/testing/exhaustive_tester.h\
|
||||
re2/testing/regexp_generator.h\
|
||||
@ -175,117 +177,156 @@ DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
|
||||
DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
|
||||
DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
|
||||
|
||||
.PRECIOUS: obj/%.o
|
||||
obj/%.o: %.cc $(HFILES)
|
||||
@mkdir -p $$(dirname $@)
|
||||
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
||||
|
||||
.PRECIOUS: obj/dbg/%.o
|
||||
obj/dbg/%.o: %.cc $(HFILES)
|
||||
@mkdir -p $$(dirname $@)
|
||||
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc
|
||||
|
||||
.PRECIOUS: obj/so/%.o
|
||||
obj/so/%.o: %.cc $(HFILES)
|
||||
@mkdir -p $$(dirname $@)
|
||||
$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
|
||||
|
||||
.PRECIOUS: obj/libre2.a
|
||||
obj/libre2.a: $(OFILES)
|
||||
@mkdir -p obj
|
||||
$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
|
||||
|
||||
.PRECIOUS: obj/dbg/libre2.a
|
||||
obj/dbg/libre2.a: $(DOFILES)
|
||||
@mkdir -p obj/dbg
|
||||
$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
|
||||
|
||||
obj/so/libre2.$(SOEXT): $(SOFILES)
|
||||
.PRECIOUS: obj/so/libre2.$(SOEXT)
|
||||
obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin
|
||||
@mkdir -p obj/so
|
||||
$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)
|
||||
ln -sf libre2.$(SOEXTVER) $@
|
||||
|
||||
.PRECIOUS: obj/dbg/test/%
|
||||
obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
|
||||
@mkdir -p obj/dbg/test
|
||||
$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
.PRECIOUS: obj/test/%
|
||||
obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
|
||||
@mkdir -p obj/test
|
||||
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
# Test the shared lib, falling back to the static lib for private symbols
|
||||
.PRECIOUS: obj/so/test/%
|
||||
obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
|
||||
@mkdir -p obj/so/test
|
||||
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
# Filter out dump.o because testing::TempDir() isn't available for it.
|
||||
obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
|
||||
@mkdir -p obj/test
|
||||
$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(filter-out obj/re2/testing/dump.o, $(TESTOFILES)) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
# re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing
|
||||
# is simply a way to check that the target builds and then to run it against a
|
||||
# fixed set of inputs. To perform real fuzzing, refer to the documentation for
|
||||
# libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/).
|
||||
obj/test/re2_fuzzer: CXXFLAGS:=-I./re2/fuzzing/compiler-rt/include $(CXXFLAGS)
|
||||
obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o
|
||||
@mkdir -p obj/test
|
||||
$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
|
||||
|
||||
ifdef REBUILD_TABLES
|
||||
.PRECIOUS: re2/perl_groups.cc
|
||||
re2/perl_groups.cc: re2/make_perl_groups.pl
|
||||
perl $< > $@
|
||||
|
||||
.PRECIOUS: re2/unicode_%.cc
|
||||
re2/unicode_%.cc: re2/make_unicode_%.py
|
||||
python $< > $@
|
||||
|
||||
.PRECIOUS: re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
|
||||
endif
|
||||
|
||||
.PHONY: distclean
|
||||
distclean: clean
|
||||
rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf obj
|
||||
rm -f re2/*.pyc
|
||||
|
||||
.PHONY: testofiles
|
||||
testofiles: $(TESTOFILES)
|
||||
|
||||
.PHONY: test
|
||||
test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
|
||||
|
||||
.PHONY: debug-test
|
||||
debug-test: $(DTESTS)
|
||||
@./runtests $(DTESTS)
|
||||
|
||||
.PHONY: static-test
|
||||
static-test: $(TESTS)
|
||||
@./runtests $(TESTS)
|
||||
|
||||
.PHONY: shared-test
|
||||
shared-test: $(STESTS)
|
||||
@./runtests -shared-library-path obj/so $(STESTS)
|
||||
|
||||
.PHONY: debug-bigtest
|
||||
debug-bigtest: $(DTESTS) $(DBIGTESTS)
|
||||
@./runtests $(DTESTS) $(DBIGTESTS)
|
||||
|
||||
.PHONY: static-bigtest
|
||||
static-bigtest: $(TESTS) $(BIGTESTS)
|
||||
@./runtests $(TESTS) $(BIGTESTS)
|
||||
|
||||
.PHONY: shared-bigtest
|
||||
shared-bigtest: $(STESTS) $(SBIGTESTS)
|
||||
@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)
|
||||
|
||||
.PHONY: benchmark
|
||||
benchmark: obj/test/regexp_benchmark
|
||||
|
||||
.PHONY: fuzz
|
||||
fuzz: obj/test/re2_fuzzer
|
||||
|
||||
install: obj/libre2.a obj/so/libre2.$(SOEXT)
|
||||
mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
|
||||
$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
|
||||
.PHONY: install
|
||||
install: static-install shared-install
|
||||
|
||||
.PHONY: static
|
||||
static: obj/libre2.a
|
||||
|
||||
.PHONY: static-install
|
||||
static-install: obj/libre2.a common-install
|
||||
$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
|
||||
|
||||
.PHONY: shared
|
||||
shared: obj/so/libre2.$(SOEXT)
|
||||
|
||||
.PHONY: shared-install
|
||||
shared-install: obj/so/libre2.$(SOEXT) common-install
|
||||
$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
|
||||
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
|
||||
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
|
||||
$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@prefix@#${prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@exec_prefix@#${exec_prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@includedir@#${includedir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@libdir@#${libdir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
|
||||
.PHONY: common-install
|
||||
common-install:
|
||||
mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
|
||||
$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
|
||||
$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@includedir@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
$(SED_INPLACE) -e "s#@libdir@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
|
||||
|
||||
.PHONY: testinstall
|
||||
testinstall: static-testinstall shared-testinstall
|
||||
@echo
|
||||
@echo Install tests passed.
|
||||
@echo
|
||||
|
||||
.PHONY: static-testinstall
|
||||
static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
|
||||
static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
|
||||
static-testinstall:
|
||||
@ -300,6 +341,7 @@ else
|
||||
obj/testinstall
|
||||
endif
|
||||
|
||||
.PHONY: shared-testinstall
|
||||
shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
|
||||
shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
|
||||
shared-testinstall:
|
||||
@ -312,19 +354,14 @@ else
|
||||
LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/testinstall
|
||||
endif
|
||||
|
||||
.PHONY: benchlog
|
||||
benchlog: obj/test/regexp_benchmark
|
||||
(echo '==BENCHMARK==' `hostname` `date`; \
|
||||
(uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
|
||||
echo; \
|
||||
./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
|
||||
|
||||
# Keep gmake from deleting intermediate files it creates.
|
||||
# This makes repeated builds faster and preserves debug info on OS X.
|
||||
|
||||
.PRECIOUS: obj/%.o obj/dbg/%.o obj/so/%.o obj/libre2.a \
|
||||
obj/dbg/libre2.a obj/so/libre2.a \
|
||||
obj/test/% obj/so/test/% obj/dbg/test/%
|
||||
|
||||
.PHONY: log
|
||||
log:
|
||||
$(MAKE) clean
|
||||
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
|
||||
@ -340,6 +377,3 @@ log:
|
||||
echo '#' RE2 basic search tests built by make $@ >re2-search.txt
|
||||
echo '#' $$(date) >>re2-search.txt
|
||||
obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt
|
||||
|
||||
x: x.cc obj/libre2.a
|
||||
g++ -I. -o x x.cc obj/libre2.a
|
||||
|
5
extern/re2/README
vendored
5
extern/re2/README
vendored
@ -27,12 +27,15 @@ under the BSD-style license found in the LICENSE file.
|
||||
|
||||
RE2's native language is C++.
|
||||
|
||||
The Python wrapper is at https://github.com/google/re2/tree/abseil/python
|
||||
and on PyPI (https://pypi.org/project/google-re2/).
|
||||
|
||||
A C wrapper is at https://github.com/marcomaggi/cre2/.
|
||||
An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
|
||||
An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
|
||||
A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
|
||||
An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
|
||||
A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
|
||||
A Python wrapper is at https://github.com/facebook/pyre2/ and on PyPI (pypi.org).
|
||||
An R wrapper is at https://github.com/qinwf/re2r/ and on CRAN (cran.r-project.org).
|
||||
A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
|
||||
A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).
|
||||
|
9
extern/re2/WORKSPACE
vendored
9
extern/re2/WORKSPACE
vendored
@ -3,4 +3,13 @@
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Bazel (http://bazel.io/) WORKSPACE file for RE2.
|
||||
|
||||
workspace(name = "com_googlesource_code_re2")
|
||||
|
||||
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
|
||||
|
||||
http_archive(
|
||||
name = "rules_cc",
|
||||
strip_prefix = "rules_cc-master",
|
||||
urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
|
||||
)
|
||||
|
0
extern/re2/benchlog/benchplot.py
vendored
Normal file → Executable file
0
extern/re2/benchlog/benchplot.py
vendored
Normal file → Executable file
0
extern/re2/benchlog/mktable
vendored
Normal file → Executable file
0
extern/re2/benchlog/mktable
vendored
Normal file → Executable file
1
extern/re2/doc/mksyntaxgo
vendored
Normal file → Executable file
1
extern/re2/doc/mksyntaxgo
vendored
Normal file → Executable file
@ -33,6 +33,7 @@ Parts of the syntax can be disabled by passing alternate flags to Parse.
|
||||
|
||||
.
|
||||
$a
|
||||
Unicode character classes are those in unicode.Categories and unicode.Scripts.
|
||||
*/
|
||||
package syntax
|
||||
.
|
||||
|
0
extern/re2/doc/mksyntaxhtml
vendored
Normal file → Executable file
0
extern/re2/doc/mksyntaxhtml
vendored
Normal file → Executable file
0
extern/re2/doc/mksyntaxwiki
vendored
Normal file → Executable file
0
extern/re2/doc/mksyntaxwiki
vendored
Normal file → Executable file
273
extern/re2/doc/syntax.html
vendored
273
extern/re2/doc/syntax.html
vendored
@ -47,6 +47,10 @@
|
||||
<tr><td><code><font color=#808080>x{-n}</font></code></td><td>(≡ <code>x{n}?</code>) <font size=-2>VIM</font></td></tr>
|
||||
<tr><td><code><font color=#808080>x=</font></code></td><td>(≡ <code>x?</code>) <font size=-2>VIM</font></td></tr>
|
||||
<tr><td></td></tr>
|
||||
<tr><td colspan=2>Implementation restriction: The counting forms <code>x{n,m}</code>, <code>x{n,}</code>, and <code>x{n}</code></td></tr>
|
||||
<tr><td colspan=2>reject forms that create a minimum or maximum repetition count above 1000.</td></tr>
|
||||
<tr><td colspan=2>Unlimited repetitions are not subject to this restriction.</td></tr>
|
||||
<tr><td></td></tr>
|
||||
<tr><td colspan=2><b>Possessive repetitions:</b></td></tr>
|
||||
<tr><td><code><font color=#808080>x*+</font></code></td><td>zero or more <code>x</code>, possessive </td></tr>
|
||||
<tr><td><code><font color=#808080>x++</font></code></td><td>one or more <code>x</code>, possessive </td></tr>
|
||||
@ -56,10 +60,10 @@
|
||||
<tr><td><code><font color=#808080>x{n}+</font></code></td><td>exactly <code>n</code> <code>x</code>, possessive </td></tr>
|
||||
<tr><td></td></tr>
|
||||
<tr><td colspan=2><b>Grouping:</b></td></tr>
|
||||
<tr><td><code>(re)</code></td><td>numbered capturing group</td></tr>
|
||||
<tr><td><code>(?P<name>re)</code></td><td>named & numbered capturing group</td></tr>
|
||||
<tr><td><code><font color=#808080>(?<name>re)</font></code></td><td>named & numbered capturing group </td></tr>
|
||||
<tr><td><code><font color=#808080>(?'name're)</font></code></td><td>named & numbered capturing group </td></tr>
|
||||
<tr><td><code>(re)</code></td><td>numbered capturing group (submatch)</td></tr>
|
||||
<tr><td><code>(?P<name>re)</code></td><td>named & numbered capturing group (submatch)</td></tr>
|
||||
<tr><td><code><font color=#808080>(?<name>re)</font></code></td><td>named & numbered capturing group (submatch) </td></tr>
|
||||
<tr><td><code><font color=#808080>(?'name're)</font></code></td><td>named & numbered capturing group (submatch) </td></tr>
|
||||
<tr><td><code>(?:re)</code></td><td>non-capturing group</td></tr>
|
||||
<tr><td><code>(?flags)</code></td><td>set flags within current group; non-capturing</td></tr>
|
||||
<tr><td><code>(?flags:re)</code></td><td>set flags during re; non-capturing</td></tr>
|
||||
@ -80,8 +84,8 @@
|
||||
<tr><td><code>^</code></td><td>at beginning of text or line (<code>m</code>=true)</td></tr>
|
||||
<tr><td><code>$</code></td><td>at end of text (like <code>\z</code> not <code>\Z</code>) or line (<code>m</code>=true)</td></tr>
|
||||
<tr><td><code>\A</code></td><td>at beginning of text</td></tr>
|
||||
<tr><td><code>\b</code></td><td>at word boundary (<code>\w</code> on one side and <code>\W</code>, <code>\A</code>, or <code>\z</code> on the other)</td></tr>
|
||||
<tr><td><code>\B</code></td><td>not a word boundary</td></tr>
|
||||
<tr><td><code>\b</code></td><td>at ASCII word boundary (<code>\w</code> on one side and <code>\W</code>, <code>\A</code>, or <code>\z</code> on the other)</td></tr>
|
||||
<tr><td><code>\B</code></td><td>not at ASCII word boundary</td></tr>
|
||||
<tr><td><code><font color=#808080>\G</font></code></td><td>at beginning of subtext being searched <font size=-2>PCRE</font></td></tr>
|
||||
<tr><td><code><font color=#808080>\G</font></code></td><td>at end of last match <font size=-2>PERL</font></td></tr>
|
||||
<tr><td><code><font color=#808080>\Z</font></code></td><td>at end of text, or before newline at end of text </td></tr>
|
||||
@ -166,7 +170,7 @@
|
||||
<tr><td><code>[\p{Name}]</code></td><td>named Unicode property inside character class (≡ <code>\p{Name}</code>)</td></tr>
|
||||
<tr><td><code>[^\p{Name}]</code></td><td>named Unicode property inside negated character class (≡ <code>\P{Name}</code>)</td></tr>
|
||||
<tr><td></td></tr>
|
||||
<tr><td colspan=2><b>Perl character classes:</b></td></tr>
|
||||
<tr><td colspan=2><b>Perl character classes (all ASCII-only):</b></td></tr>
|
||||
<tr><td><code>\d</code></td><td>digits (≡ <code>[0-9]</code>)</td></tr>
|
||||
<tr><td><code>\D</code></td><td>not digits (≡ <code>[^0-9]</code>)</td></tr>
|
||||
<tr><td><code>\s</code></td><td>whitespace (≡ <code>[\t\n\f\r ]</code>)</td></tr>
|
||||
@ -237,105 +241,162 @@
|
||||
<tr><td><code>Zs</code></td><td>space separator</td></tr>
|
||||
<tr><td></td></tr>
|
||||
<tr><td colspan=2><b>Unicode character class names--scripts:</b></td></tr>
|
||||
<tr><td><code>Arabic</code></td><td>Arabic</td></tr>
|
||||
<tr><td><code>Armenian</code></td><td>Armenian</td></tr>
|
||||
<tr><td><code>Balinese</code></td><td>Balinese</td></tr>
|
||||
<tr><td><code>Bamum</code></td><td>Bamum</td></tr>
|
||||
<tr><td><code>Batak</code></td><td>Batak</td></tr>
|
||||
<tr><td><code>Bengali</code></td><td>Bengali</td></tr>
|
||||
<tr><td><code>Bopomofo</code></td><td>Bopomofo</td></tr>
|
||||
<tr><td><code>Brahmi</code></td><td>Brahmi</td></tr>
|
||||
<tr><td><code>Braille</code></td><td>Braille</td></tr>
|
||||
<tr><td><code>Buginese</code></td><td>Buginese</td></tr>
|
||||
<tr><td><code>Buhid</code></td><td>Buhid</td></tr>
|
||||
<tr><td><code>Canadian_Aboriginal</code></td><td>Canadian Aboriginal</td></tr>
|
||||
<tr><td><code>Carian</code></td><td>Carian</td></tr>
|
||||
<tr><td><code>Chakma</code></td><td>Chakma</td></tr>
|
||||
<tr><td><code>Cham</code></td><td>Cham</td></tr>
|
||||
<tr><td><code>Cherokee</code></td><td>Cherokee</td></tr>
|
||||
<tr><td><code>Common</code></td><td>characters not specific to one script</td></tr>
|
||||
<tr><td><code>Coptic</code></td><td>Coptic</td></tr>
|
||||
<tr><td><code>Cuneiform</code></td><td>Cuneiform</td></tr>
|
||||
<tr><td><code>Cypriot</code></td><td>Cypriot</td></tr>
|
||||
<tr><td><code>Cyrillic</code></td><td>Cyrillic</td></tr>
|
||||
<tr><td><code>Deseret</code></td><td>Deseret</td></tr>
|
||||
<tr><td><code>Devanagari</code></td><td>Devanagari</td></tr>
|
||||
<tr><td><code>Egyptian_Hieroglyphs</code></td><td>Egyptian Hieroglyphs</td></tr>
|
||||
<tr><td><code>Ethiopic</code></td><td>Ethiopic</td></tr>
|
||||
<tr><td><code>Georgian</code></td><td>Georgian</td></tr>
|
||||
<tr><td><code>Glagolitic</code></td><td>Glagolitic</td></tr>
|
||||
<tr><td><code>Gothic</code></td><td>Gothic</td></tr>
|
||||
<tr><td><code>Greek</code></td><td>Greek</td></tr>
|
||||
<tr><td><code>Gujarati</code></td><td>Gujarati</td></tr>
|
||||
<tr><td><code>Gurmukhi</code></td><td>Gurmukhi</td></tr>
|
||||
<tr><td><code>Han</code></td><td>Han</td></tr>
|
||||
<tr><td><code>Hangul</code></td><td>Hangul</td></tr>
|
||||
<tr><td><code>Hanunoo</code></td><td>Hanunoo</td></tr>
|
||||
<tr><td><code>Hebrew</code></td><td>Hebrew</td></tr>
|
||||
<tr><td><code>Hiragana</code></td><td>Hiragana</td></tr>
|
||||
<tr><td><code>Imperial_Aramaic</code></td><td>Imperial Aramaic</td></tr>
|
||||
<tr><td><code>Inherited</code></td><td>inherit script from previous character</td></tr>
|
||||
<tr><td><code>Inscriptional_Pahlavi</code></td><td>Inscriptional Pahlavi</td></tr>
|
||||
<tr><td><code>Inscriptional_Parthian</code></td><td>Inscriptional Parthian</td></tr>
|
||||
<tr><td><code>Javanese</code></td><td>Javanese</td></tr>
|
||||
<tr><td><code>Kaithi</code></td><td>Kaithi</td></tr>
|
||||
<tr><td><code>Kannada</code></td><td>Kannada</td></tr>
|
||||
<tr><td><code>Katakana</code></td><td>Katakana</td></tr>
|
||||
<tr><td><code>Kayah_Li</code></td><td>Kayah Li</td></tr>
|
||||
<tr><td><code>Kharoshthi</code></td><td>Kharoshthi</td></tr>
|
||||
<tr><td><code>Khmer</code></td><td>Khmer</td></tr>
|
||||
<tr><td><code>Lao</code></td><td>Lao</td></tr>
|
||||
<tr><td><code>Latin</code></td><td>Latin</td></tr>
|
||||
<tr><td><code>Lepcha</code></td><td>Lepcha</td></tr>
|
||||
<tr><td><code>Limbu</code></td><td>Limbu</td></tr>
|
||||
<tr><td><code>Linear_B</code></td><td>Linear B</td></tr>
|
||||
<tr><td><code>Lycian</code></td><td>Lycian</td></tr>
|
||||
<tr><td><code>Lydian</code></td><td>Lydian</td></tr>
|
||||
<tr><td><code>Malayalam</code></td><td>Malayalam</td></tr>
|
||||
<tr><td><code>Mandaic</code></td><td>Mandaic</td></tr>
|
||||
<tr><td><code>Meetei_Mayek</code></td><td>Meetei Mayek</td></tr>
|
||||
<tr><td><code>Meroitic_Cursive</code></td><td>Meroitic Cursive</td></tr>
|
||||
<tr><td><code>Meroitic_Hieroglyphs</code></td><td>Meroitic Hieroglyphs</td></tr>
|
||||
<tr><td><code>Miao</code></td><td>Miao</td></tr>
|
||||
<tr><td><code>Mongolian</code></td><td>Mongolian</td></tr>
|
||||
<tr><td><code>Myanmar</code></td><td>Myanmar</td></tr>
|
||||
<tr><td><code>New_Tai_Lue</code></td><td>New Tai Lue (aka Simplified Tai Lue)</td></tr>
|
||||
<tr><td><code>Nko</code></td><td>Nko</td></tr>
|
||||
<tr><td><code>Ogham</code></td><td>Ogham</td></tr>
|
||||
<tr><td><code>Ol_Chiki</code></td><td>Ol Chiki</td></tr>
|
||||
<tr><td><code>Old_Italic</code></td><td>Old Italic</td></tr>
|
||||
<tr><td><code>Old_Persian</code></td><td>Old Persian</td></tr>
|
||||
<tr><td><code>Old_South_Arabian</code></td><td>Old South Arabian</td></tr>
|
||||
<tr><td><code>Old_Turkic</code></td><td>Old Turkic</td></tr>
|
||||
<tr><td><code>Oriya</code></td><td>Oriya</td></tr>
|
||||
<tr><td><code>Osmanya</code></td><td>Osmanya</td></tr>
|
||||
<tr><td><code>Phags_Pa</code></td><td>'Phags Pa</td></tr>
|
||||
<tr><td><code>Phoenician</code></td><td>Phoenician</td></tr>
|
||||
<tr><td><code>Rejang</code></td><td>Rejang</td></tr>
|
||||
<tr><td><code>Runic</code></td><td>Runic</td></tr>
|
||||
<tr><td><code>Saurashtra</code></td><td>Saurashtra</td></tr>
|
||||
<tr><td><code>Sharada</code></td><td>Sharada</td></tr>
|
||||
<tr><td><code>Shavian</code></td><td>Shavian</td></tr>
|
||||
<tr><td><code>Sinhala</code></td><td>Sinhala</td></tr>
|
||||
<tr><td><code>Sora_Sompeng</code></td><td>Sora Sompeng</td></tr>
|
||||
<tr><td><code>Sundanese</code></td><td>Sundanese</td></tr>
|
||||
<tr><td><code>Syloti_Nagri</code></td><td>Syloti Nagri</td></tr>
|
||||
<tr><td><code>Syriac</code></td><td>Syriac</td></tr>
|
||||
<tr><td><code>Tagalog</code></td><td>Tagalog</td></tr>
|
||||
<tr><td><code>Tagbanwa</code></td><td>Tagbanwa</td></tr>
|
||||
<tr><td><code>Tai_Le</code></td><td>Tai Le</td></tr>
|
||||
<tr><td><code>Tai_Tham</code></td><td>Tai Tham</td></tr>
|
||||
<tr><td><code>Tai_Viet</code></td><td>Tai Viet</td></tr>
|
||||
<tr><td><code>Takri</code></td><td>Takri</td></tr>
|
||||
<tr><td><code>Tamil</code></td><td>Tamil</td></tr>
|
||||
<tr><td><code>Telugu</code></td><td>Telugu</td></tr>
|
||||
<tr><td><code>Thaana</code></td><td>Thaana</td></tr>
|
||||
<tr><td><code>Thai</code></td><td>Thai</td></tr>
|
||||
<tr><td><code>Tibetan</code></td><td>Tibetan</td></tr>
|
||||
<tr><td><code>Tifinagh</code></td><td>Tifinagh</td></tr>
|
||||
<tr><td><code>Ugaritic</code></td><td>Ugaritic</td></tr>
|
||||
<tr><td><code>Vai</code></td><td>Vai</td></tr>
|
||||
<tr><td><code>Yi</code></td><td>Yi</td></tr>
|
||||
<tr><td colspan=2>Adlam</td></tr>
|
||||
<tr><td colspan=2>Ahom</td></tr>
|
||||
<tr><td colspan=2>Anatolian_Hieroglyphs</td></tr>
|
||||
<tr><td colspan=2>Arabic</td></tr>
|
||||
<tr><td colspan=2>Armenian</td></tr>
|
||||
<tr><td colspan=2>Avestan</td></tr>
|
||||
<tr><td colspan=2>Balinese</td></tr>
|
||||
<tr><td colspan=2>Bamum</td></tr>
|
||||
<tr><td colspan=2>Bassa_Vah</td></tr>
|
||||
<tr><td colspan=2>Batak</td></tr>
|
||||
<tr><td colspan=2>Bengali</td></tr>
|
||||
<tr><td colspan=2>Bhaiksuki</td></tr>
|
||||
<tr><td colspan=2>Bopomofo</td></tr>
|
||||
<tr><td colspan=2>Brahmi</td></tr>
|
||||
<tr><td colspan=2>Braille</td></tr>
|
||||
<tr><td colspan=2>Buginese</td></tr>
|
||||
<tr><td colspan=2>Buhid</td></tr>
|
||||
<tr><td colspan=2>Canadian_Aboriginal</td></tr>
|
||||
<tr><td colspan=2>Carian</td></tr>
|
||||
<tr><td colspan=2>Caucasian_Albanian</td></tr>
|
||||
<tr><td colspan=2>Chakma</td></tr>
|
||||
<tr><td colspan=2>Cham</td></tr>
|
||||
<tr><td colspan=2>Cherokee</td></tr>
|
||||
<tr><td colspan=2>Chorasmian</td></tr>
|
||||
<tr><td colspan=2>Common</td></tr>
|
||||
<tr><td colspan=2>Coptic</td></tr>
|
||||
<tr><td colspan=2>Cuneiform</td></tr>
|
||||
<tr><td colspan=2>Cypriot</td></tr>
|
||||
<tr><td colspan=2>Cyrillic</td></tr>
|
||||
<tr><td colspan=2>Deseret</td></tr>
|
||||
<tr><td colspan=2>Devanagari</td></tr>
|
||||
<tr><td colspan=2>Dives_Akuru</td></tr>
|
||||
<tr><td colspan=2>Dogra</td></tr>
|
||||
<tr><td colspan=2>Duployan</td></tr>
|
||||
<tr><td colspan=2>Egyptian_Hieroglyphs</td></tr>
|
||||
<tr><td colspan=2>Elbasan</td></tr>
|
||||
<tr><td colspan=2>Elymaic</td></tr>
|
||||
<tr><td colspan=2>Ethiopic</td></tr>
|
||||
<tr><td colspan=2>Georgian</td></tr>
|
||||
<tr><td colspan=2>Glagolitic</td></tr>
|
||||
<tr><td colspan=2>Gothic</td></tr>
|
||||
<tr><td colspan=2>Grantha</td></tr>
|
||||
<tr><td colspan=2>Greek</td></tr>
|
||||
<tr><td colspan=2>Gujarati</td></tr>
|
||||
<tr><td colspan=2>Gunjala_Gondi</td></tr>
|
||||
<tr><td colspan=2>Gurmukhi</td></tr>
|
||||
<tr><td colspan=2>Han</td></tr>
|
||||
<tr><td colspan=2>Hangul</td></tr>
|
||||
<tr><td colspan=2>Hanifi_Rohingya</td></tr>
|
||||
<tr><td colspan=2>Hanunoo</td></tr>
|
||||
<tr><td colspan=2>Hatran</td></tr>
|
||||
<tr><td colspan=2>Hebrew</td></tr>
|
||||
<tr><td colspan=2>Hiragana</td></tr>
|
||||
<tr><td colspan=2>Imperial_Aramaic</td></tr>
|
||||
<tr><td colspan=2>Inherited</td></tr>
|
||||
<tr><td colspan=2>Inscriptional_Pahlavi</td></tr>
|
||||
<tr><td colspan=2>Inscriptional_Parthian</td></tr>
|
||||
<tr><td colspan=2>Javanese</td></tr>
|
||||
<tr><td colspan=2>Kaithi</td></tr>
|
||||
<tr><td colspan=2>Kannada</td></tr>
|
||||
<tr><td colspan=2>Katakana</td></tr>
|
||||
<tr><td colspan=2>Kayah_Li</td></tr>
|
||||
<tr><td colspan=2>Kharoshthi</td></tr>
|
||||
<tr><td colspan=2>Khitan_Small_Script</td></tr>
|
||||
<tr><td colspan=2>Khmer</td></tr>
|
||||
<tr><td colspan=2>Khojki</td></tr>
|
||||
<tr><td colspan=2>Khudawadi</td></tr>
|
||||
<tr><td colspan=2>Lao</td></tr>
|
||||
<tr><td colspan=2>Latin</td></tr>
|
||||
<tr><td colspan=2>Lepcha</td></tr>
|
||||
<tr><td colspan=2>Limbu</td></tr>
|
||||
<tr><td colspan=2>Linear_A</td></tr>
|
||||
<tr><td colspan=2>Linear_B</td></tr>
|
||||
<tr><td colspan=2>Lisu</td></tr>
|
||||
<tr><td colspan=2>Lycian</td></tr>
|
||||
<tr><td colspan=2>Lydian</td></tr>
|
||||
<tr><td colspan=2>Mahajani</td></tr>
|
||||
<tr><td colspan=2>Makasar</td></tr>
|
||||
<tr><td colspan=2>Malayalam</td></tr>
|
||||
<tr><td colspan=2>Mandaic</td></tr>
|
||||
<tr><td colspan=2>Manichaean</td></tr>
|
||||
<tr><td colspan=2>Marchen</td></tr>
|
||||
<tr><td colspan=2>Masaram_Gondi</td></tr>
|
||||
<tr><td colspan=2>Medefaidrin</td></tr>
|
||||
<tr><td colspan=2>Meetei_Mayek</td></tr>
|
||||
<tr><td colspan=2>Mende_Kikakui</td></tr>
|
||||
<tr><td colspan=2>Meroitic_Cursive</td></tr>
|
||||
<tr><td colspan=2>Meroitic_Hieroglyphs</td></tr>
|
||||
<tr><td colspan=2>Miao</td></tr>
|
||||
<tr><td colspan=2>Modi</td></tr>
|
||||
<tr><td colspan=2>Mongolian</td></tr>
|
||||
<tr><td colspan=2>Mro</td></tr>
|
||||
<tr><td colspan=2>Multani</td></tr>
|
||||
<tr><td colspan=2>Myanmar</td></tr>
|
||||
<tr><td colspan=2>Nabataean</td></tr>
|
||||
<tr><td colspan=2>Nandinagari</td></tr>
|
||||
<tr><td colspan=2>New_Tai_Lue</td></tr>
|
||||
<tr><td colspan=2>Newa</td></tr>
|
||||
<tr><td colspan=2>Nko</td></tr>
|
||||
<tr><td colspan=2>Nushu</td></tr>
|
||||
<tr><td colspan=2>Nyiakeng_Puachue_Hmong</td></tr>
|
||||
<tr><td colspan=2>Ogham</td></tr>
|
||||
<tr><td colspan=2>Ol_Chiki</td></tr>
|
||||
<tr><td colspan=2>Old_Hungarian</td></tr>
|
||||
<tr><td colspan=2>Old_Italic</td></tr>
|
||||
<tr><td colspan=2>Old_North_Arabian</td></tr>
|
||||
<tr><td colspan=2>Old_Permic</td></tr>
|
||||
<tr><td colspan=2>Old_Persian</td></tr>
|
||||
<tr><td colspan=2>Old_Sogdian</td></tr>
|
||||
<tr><td colspan=2>Old_South_Arabian</td></tr>
|
||||
<tr><td colspan=2>Old_Turkic</td></tr>
|
||||
<tr><td colspan=2>Oriya</td></tr>
|
||||
<tr><td colspan=2>Osage</td></tr>
|
||||
<tr><td colspan=2>Osmanya</td></tr>
|
||||
<tr><td colspan=2>Pahawh_Hmong</td></tr>
|
||||
<tr><td colspan=2>Palmyrene</td></tr>
|
||||
<tr><td colspan=2>Pau_Cin_Hau</td></tr>
|
||||
<tr><td colspan=2>Phags_Pa</td></tr>
|
||||
<tr><td colspan=2>Phoenician</td></tr>
|
||||
<tr><td colspan=2>Psalter_Pahlavi</td></tr>
|
||||
<tr><td colspan=2>Rejang</td></tr>
|
||||
<tr><td colspan=2>Runic</td></tr>
|
||||
<tr><td colspan=2>Samaritan</td></tr>
|
||||
<tr><td colspan=2>Saurashtra</td></tr>
|
||||
<tr><td colspan=2>Sharada</td></tr>
|
||||
<tr><td colspan=2>Shavian</td></tr>
|
||||
<tr><td colspan=2>Siddham</td></tr>
|
||||
<tr><td colspan=2>SignWriting</td></tr>
|
||||
<tr><td colspan=2>Sinhala</td></tr>
|
||||
<tr><td colspan=2>Sogdian</td></tr>
|
||||
<tr><td colspan=2>Sora_Sompeng</td></tr>
|
||||
<tr><td colspan=2>Soyombo</td></tr>
|
||||
<tr><td colspan=2>Sundanese</td></tr>
|
||||
<tr><td colspan=2>Syloti_Nagri</td></tr>
|
||||
<tr><td colspan=2>Syriac</td></tr>
|
||||
<tr><td colspan=2>Tagalog</td></tr>
|
||||
<tr><td colspan=2>Tagbanwa</td></tr>
|
||||
<tr><td colspan=2>Tai_Le</td></tr>
|
||||
<tr><td colspan=2>Tai_Tham</td></tr>
|
||||
<tr><td colspan=2>Tai_Viet</td></tr>
|
||||
<tr><td colspan=2>Takri</td></tr>
|
||||
<tr><td colspan=2>Tamil</td></tr>
|
||||
<tr><td colspan=2>Tangut</td></tr>
|
||||
<tr><td colspan=2>Telugu</td></tr>
|
||||
<tr><td colspan=2>Thaana</td></tr>
|
||||
<tr><td colspan=2>Thai</td></tr>
|
||||
<tr><td colspan=2>Tibetan</td></tr>
|
||||
<tr><td colspan=2>Tifinagh</td></tr>
|
||||
<tr><td colspan=2>Tirhuta</td></tr>
|
||||
<tr><td colspan=2>Ugaritic</td></tr>
|
||||
<tr><td colspan=2>Vai</td></tr>
|
||||
<tr><td colspan=2>Wancho</td></tr>
|
||||
<tr><td colspan=2>Warang_Citi</td></tr>
|
||||
<tr><td colspan=2>Yezidi</td></tr>
|
||||
<tr><td colspan=2>Yi</td></tr>
|
||||
<tr><td colspan=2>Zanabazar_Square</td></tr>
|
||||
<tr><td></td></tr>
|
||||
<tr><td colspan=2><b>Vim character classes:</b></td></tr>
|
||||
<tr><td><code><font color=#808080>\i</font></code></td><td>identifier character <font size=-2>VIM</font></td></tr>
|
||||
|
4
extern/re2/doc/syntax.txt
vendored
4
extern/re2/doc/syntax.txt
vendored
@ -253,6 +253,7 @@ Caucasian_Albanian
|
||||
Chakma
|
||||
Cham
|
||||
Cherokee
|
||||
Chorasmian
|
||||
Common
|
||||
Coptic
|
||||
Cuneiform
|
||||
@ -260,6 +261,7 @@ Cypriot
|
||||
Cyrillic
|
||||
Deseret
|
||||
Devanagari
|
||||
Dives_Akuru
|
||||
Dogra
|
||||
Duployan
|
||||
Egyptian_Hieroglyphs
|
||||
@ -291,6 +293,7 @@ Kannada
|
||||
Katakana
|
||||
Kayah_Li
|
||||
Kharoshthi
|
||||
Khitan_Small_Script
|
||||
Khmer
|
||||
Khojki
|
||||
Khudawadi
|
||||
@ -380,6 +383,7 @@ Ugaritic
|
||||
Vai
|
||||
Wancho
|
||||
Warang_Citi
|
||||
Yezidi
|
||||
Yi
|
||||
Zanabazar_Square
|
||||
|
||||
|
25
extern/re2/kokoro/cmake.sh
vendored
25
extern/re2/kokoro/cmake.sh
vendored
@ -1,25 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
|
||||
cd git/re2
|
||||
|
||||
case "${KOKORO_JOB_NAME}" in
|
||||
*/windows-*)
|
||||
CMAKE_G_A_FLAGS=('-G' 'Visual Studio 14 2015' '-A' 'x64')
|
||||
;;
|
||||
*)
|
||||
CMAKE_G_A_FLAGS=()
|
||||
# Work around a bug in older versions of bash. :/
|
||||
set +u
|
||||
;;
|
||||
esac
|
||||
|
||||
cmake -D CMAKE_BUILD_TYPE=Debug "${CMAKE_G_A_FLAGS[@]}" .
|
||||
cmake --build . --config Debug --clean-first
|
||||
ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
|
||||
|
||||
cmake -D CMAKE_BUILD_TYPE=Release "${CMAKE_G_A_FLAGS[@]}" .
|
||||
cmake --build . --config Release --clean-first
|
||||
ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
|
||||
|
||||
exit 0
|
1
extern/re2/kokoro/macos-bazel.cfg
vendored
1
extern/re2/kokoro/macos-bazel.cfg
vendored
@ -1 +0,0 @@
|
||||
build_file: "re2/kokoro/macos-bazel.sh"
|
4
extern/re2/kokoro/macos-bazel.sh
vendored
4
extern/re2/kokoro/macos-bazel.sh
vendored
@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
bash git/re2/kokoro/bazel.sh
|
||||
exit $?
|
1
extern/re2/kokoro/macos-cmake.cfg
vendored
1
extern/re2/kokoro/macos-cmake.cfg
vendored
@ -1 +0,0 @@
|
||||
build_file: "re2/kokoro/macos-cmake.sh"
|
4
extern/re2/kokoro/macos-cmake.sh
vendored
4
extern/re2/kokoro/macos-cmake.sh
vendored
@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
bash git/re2/kokoro/cmake.sh
|
||||
exit $?
|
1
extern/re2/kokoro/ubuntu-bazel.cfg
vendored
1
extern/re2/kokoro/ubuntu-bazel.cfg
vendored
@ -1 +0,0 @@
|
||||
build_file: "re2/kokoro/ubuntu-bazel.sh"
|
4
extern/re2/kokoro/ubuntu-bazel.sh
vendored
4
extern/re2/kokoro/ubuntu-bazel.sh
vendored
@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
bash git/re2/kokoro/bazel.sh
|
||||
exit $?
|
2
extern/re2/kokoro/windows-bazel.bat
vendored
2
extern/re2/kokoro/windows-bazel.bat
vendored
@ -1,2 +0,0 @@
|
||||
bash git/re2/kokoro/bazel.sh
|
||||
EXIT /B %ERRORLEVEL%
|
1
extern/re2/kokoro/windows-bazel.cfg
vendored
1
extern/re2/kokoro/windows-bazel.cfg
vendored
@ -1 +0,0 @@
|
||||
build_file: "re2/kokoro/windows-bazel.bat"
|
2
extern/re2/kokoro/windows-cmake.bat
vendored
2
extern/re2/kokoro/windows-cmake.bat
vendored
@ -1,2 +0,0 @@
|
||||
bash git/re2/kokoro/cmake.sh
|
||||
EXIT /B %ERRORLEVEL%
|
1
extern/re2/kokoro/windows-cmake.cfg
vendored
1
extern/re2/kokoro/windows-cmake.cfg
vendored
@ -1 +0,0 @@
|
||||
build_file: "re2/kokoro/windows-cmake.bat"
|
0
extern/re2/lib/git/commit-msg.hook
vendored
Normal file → Executable file
0
extern/re2/lib/git/commit-msg.hook
vendored
Normal file → Executable file
3
extern/re2/libre2.symbols
vendored
3
extern/re2/libre2.symbols
vendored
@ -11,6 +11,9 @@
|
||||
# re2::FilteredRE2*
|
||||
_ZN3re211FilteredRE2*;
|
||||
_ZNK3re211FilteredRE2*;
|
||||
# re2::re2_internal*
|
||||
_ZN3re212re2_internal*;
|
||||
_ZNK3re212re2_internal*;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
|
3
extern/re2/libre2.symbols.darwin
vendored
3
extern/re2/libre2.symbols.darwin
vendored
@ -10,3 +10,6 @@ __ZN3re2ls*
|
||||
# re2::FilteredRE2*
|
||||
__ZN3re211FilteredRE2*
|
||||
__ZNK3re211FilteredRE2*
|
||||
# re2::re2_internal*
|
||||
__ZN3re212re2_internal*
|
||||
__ZNK3re212re2_internal*
|
||||
|
2
extern/re2/re2.pc
vendored
2
extern/re2/re2.pc
vendored
@ -1,5 +1,3 @@
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
includedir=@includedir@
|
||||
libdir=@libdir@
|
||||
|
||||
|
7
extern/re2/re2/bitmap256.h
vendored
7
extern/re2/re2/bitmap256.h
vendored
@ -32,7 +32,7 @@ class Bitmap256 {
|
||||
DCHECK_GE(c, 0);
|
||||
DCHECK_LE(c, 255);
|
||||
|
||||
return (words_[c / 64] & (1ULL << (c % 64))) != 0;
|
||||
return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
|
||||
}
|
||||
|
||||
// Sets the bit with index c.
|
||||
@ -40,7 +40,7 @@ class Bitmap256 {
|
||||
DCHECK_GE(c, 0);
|
||||
DCHECK_LE(c, 255);
|
||||
|
||||
words_[c / 64] |= (1ULL << (c % 64));
|
||||
words_[c / 64] |= (uint64_t{1} << (c % 64));
|
||||
}
|
||||
|
||||
// Finds the next non-zero bit with index >= c.
|
||||
@ -51,7 +51,6 @@ class Bitmap256 {
|
||||
// Finds the least significant non-zero bit in n.
|
||||
static int FindLSBSet(uint64_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
|
||||
#if defined(__GNUC__)
|
||||
return __builtin_ctzll(n);
|
||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||
@ -89,7 +88,7 @@ int Bitmap256::FindNextSetBit(int c) const {
|
||||
|
||||
// Check the word that contains the bit. Mask out any lower bits.
|
||||
int i = c / 64;
|
||||
uint64_t word = words_[i] & (~0ULL << (c % 64));
|
||||
uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
|
||||
if (word != 0)
|
||||
return (i * 64) + FindLSBSet(word);
|
||||
|
||||
|
47
extern/re2/re2/bitstate.cc
vendored
47
extern/re2/re2/bitstate.cc
vendored
@ -7,7 +7,7 @@
|
||||
// Prog::SearchBitState is a regular expression search with submatch
|
||||
// tracking for small regular expressions and texts. Similarly to
|
||||
// testing/backtrack.cc, it allocates a bitmap with (count of
|
||||
// lists) * (length of prog) bits to make sure it never explores the
|
||||
// lists) * (length of text) bits to make sure it never explores the
|
||||
// same (instruction list, character position) multiple times. This
|
||||
// limits the search to run in time linear in the length of the text.
|
||||
//
|
||||
@ -24,7 +24,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
@ -63,11 +63,14 @@ class BitState {
|
||||
int nsubmatch_; // # of submatches to fill in
|
||||
|
||||
// Search state
|
||||
static const int VisitedBits = 32;
|
||||
PODArray<uint32_t> visited_; // bitmap: (list ID, char*) pairs visited
|
||||
static constexpr int kVisitedBits = 64;
|
||||
PODArray<uint64_t> visited_; // bitmap: (list ID, char*) pairs visited
|
||||
PODArray<const char*> cap_; // capture registers
|
||||
PODArray<Job> job_; // stack of text positions to explore
|
||||
int njob_; // stack size
|
||||
|
||||
BitState(const BitState&) = delete;
|
||||
BitState& operator=(const BitState&) = delete;
|
||||
};
|
||||
|
||||
BitState::BitState(Prog* prog)
|
||||
@ -86,10 +89,10 @@ BitState::BitState(Prog* prog)
|
||||
// we don't repeat the visit.
|
||||
bool BitState::ShouldVisit(int id, const char* p) {
|
||||
int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
|
||||
static_cast<int>(p-text_.begin());
|
||||
if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
|
||||
static_cast<int>(p-text_.data());
|
||||
if (visited_[n/kVisitedBits] & (uint64_t{1} << (n & (kVisitedBits-1))))
|
||||
return false;
|
||||
visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
|
||||
visited_[n/kVisitedBits] |= uint64_t{1} << (n & (kVisitedBits-1));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -134,7 +137,7 @@ void BitState::Push(int id, const char* p) {
|
||||
// Return whether it succeeded.
|
||||
bool BitState::TrySearch(int id0, const char* p0) {
|
||||
bool matched = false;
|
||||
const char* end = text_.end();
|
||||
const char* end = text_.data() + text_.size();
|
||||
njob_ = 0;
|
||||
// Push() no longer checks ShouldVisit(),
|
||||
// so we must perform the check ourselves.
|
||||
@ -251,7 +254,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
|
||||
matched = true;
|
||||
cap_[1] = p;
|
||||
if (submatch_[0].data() == NULL ||
|
||||
(longest_ && p > submatch_[0].end())) {
|
||||
(longest_ && p > submatch_[0].data() + submatch_[0].size())) {
|
||||
for (int i = 0; i < nsubmatch_; i++)
|
||||
submatch_[i] =
|
||||
StringPiece(cap_[2 * i],
|
||||
@ -288,7 +291,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
||||
// Search parameters.
|
||||
text_ = text;
|
||||
context_ = context;
|
||||
if (context_.begin() == NULL)
|
||||
if (context_.data() == NULL)
|
||||
context_ = text;
|
||||
if (prog_->anchor_start() && context_.begin() != text.begin())
|
||||
return false;
|
||||
@ -304,8 +307,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
||||
|
||||
// Allocate scratch space.
|
||||
int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
|
||||
nvisited = (nvisited + VisitedBits-1) / VisitedBits;
|
||||
visited_ = PODArray<uint32_t>(nvisited);
|
||||
nvisited = (nvisited + kVisitedBits-1) / kVisitedBits;
|
||||
visited_ = PODArray<uint64_t>(nvisited);
|
||||
memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
|
||||
|
||||
int ncap = 2*nsubmatch;
|
||||
@ -319,8 +322,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
||||
|
||||
// Anchored search must start at text.begin().
|
||||
if (anchored_) {
|
||||
cap_[0] = text.begin();
|
||||
return TrySearch(prog_->start(), text.begin());
|
||||
cap_[0] = text.data();
|
||||
return TrySearch(prog_->start(), text.data());
|
||||
}
|
||||
|
||||
// Unanchored search, starting from each possible text position.
|
||||
@ -329,18 +332,22 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
|
||||
// This looks like it's quadratic in the size of the text,
|
||||
// but we are not clearing visited_ between calls to TrySearch,
|
||||
// so no work is duplicated and it ends up still being linear.
|
||||
for (const char* p = text.begin(); p <= text.end(); p++) {
|
||||
// Try to use memchr to find the first byte quickly.
|
||||
int fb = prog_->first_byte();
|
||||
if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
|
||||
p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
|
||||
const char* etext = text.data() + text.size();
|
||||
for (const char* p = text.data(); p <= etext; p++) {
|
||||
// Try to use prefix accel (e.g. memchr) to skip ahead.
|
||||
if (p < etext && prog_->can_prefix_accel()) {
|
||||
p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
|
||||
if (p == NULL)
|
||||
p = text.end();
|
||||
p = etext;
|
||||
}
|
||||
|
||||
cap_[0] = p;
|
||||
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
|
||||
return true;
|
||||
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||
// by simply not continuing the loop.
|
||||
if (p == NULL)
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
238
extern/re2/re2/compile.cc
vendored
238
extern/re2/re2/compile.cc
vendored
@ -14,8 +14,8 @@
|
||||
#include <utility>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
@ -30,91 +30,57 @@ namespace re2 {
|
||||
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
|
||||
//
|
||||
// Because the out and out1 fields in Inst are no longer pointers,
|
||||
// we can't use pointers directly here either. Instead, p refers
|
||||
// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1).
|
||||
// p == 0 represents the NULL list. This is okay because instruction #0
|
||||
// we can't use pointers directly here either. Instead, head refers
|
||||
// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
|
||||
// head == 0 represents the NULL list. This is okay because instruction #0
|
||||
// is always the fail instruction, which never appears on a list.
|
||||
|
||||
struct PatchList {
|
||||
uint32_t p;
|
||||
|
||||
// Returns patch list containing just p.
|
||||
static PatchList Mk(uint32_t p);
|
||||
static PatchList Mk(uint32_t p) {
|
||||
return {p, p};
|
||||
}
|
||||
|
||||
// Patches all the entries on l to have value v.
|
||||
// Patches all the entries on l to have value p.
|
||||
// Caller must not ever use patch list again.
|
||||
static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v);
|
||||
|
||||
// Deref returns the next pointer pointed at by p.
|
||||
static PatchList Deref(Prog::Inst *inst0, PatchList l);
|
||||
static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
|
||||
while (l.head != 0) {
|
||||
Prog::Inst* ip = &inst0[l.head>>1];
|
||||
if (l.head&1) {
|
||||
l.head = ip->out1();
|
||||
ip->out1_ = p;
|
||||
} else {
|
||||
l.head = ip->out();
|
||||
ip->set_out(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Appends two patch lists and returns result.
|
||||
static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
|
||||
static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
|
||||
if (l1.head == 0)
|
||||
return l2;
|
||||
if (l2.head == 0)
|
||||
return l1;
|
||||
Prog::Inst* ip = &inst0[l1.tail>>1];
|
||||
if (l1.tail&1)
|
||||
ip->out1_ = l2.head;
|
||||
else
|
||||
ip->set_out(l2.head);
|
||||
return {l1.head, l2.tail};
|
||||
}
|
||||
|
||||
uint32_t head;
|
||||
uint32_t tail; // for constant-time append
|
||||
};
|
||||
|
||||
static PatchList nullPatchList = { 0 };
|
||||
|
||||
// Returns patch list containing just p.
|
||||
PatchList PatchList::Mk(uint32_t p) {
|
||||
PatchList l;
|
||||
l.p = p;
|
||||
return l;
|
||||
}
|
||||
|
||||
// Returns the next pointer pointed at by l.
|
||||
PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) {
|
||||
Prog::Inst* ip = &inst0[l.p>>1];
|
||||
if (l.p&1)
|
||||
l.p = ip->out1();
|
||||
else
|
||||
l.p = ip->out();
|
||||
return l;
|
||||
}
|
||||
|
||||
// Patches all the entries on l to have value v.
|
||||
void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32_t val) {
|
||||
while (l.p != 0) {
|
||||
Prog::Inst* ip = &inst0[l.p>>1];
|
||||
if (l.p&1) {
|
||||
l.p = ip->out1();
|
||||
ip->out1_ = val;
|
||||
} else {
|
||||
l.p = ip->out();
|
||||
ip->set_out(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Appends two patch lists and returns result.
|
||||
PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
|
||||
if (l1.p == 0)
|
||||
return l2;
|
||||
if (l2.p == 0)
|
||||
return l1;
|
||||
|
||||
PatchList l = l1;
|
||||
for (;;) {
|
||||
PatchList next = PatchList::Deref(inst0, l);
|
||||
if (next.p == 0)
|
||||
break;
|
||||
l = next;
|
||||
}
|
||||
|
||||
Prog::Inst* ip = &inst0[l.p>>1];
|
||||
if (l.p&1)
|
||||
ip->out1_ = l2.p;
|
||||
else
|
||||
ip->set_out(l2.p);
|
||||
|
||||
return l1;
|
||||
}
|
||||
static const PatchList kNullPatchList = {0, 0};
|
||||
|
||||
// Compiled program fragment.
|
||||
struct Frag {
|
||||
uint32_t begin;
|
||||
PatchList end;
|
||||
|
||||
Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector
|
||||
Frag() : begin(0) { end.head = 0; } // needed so Frag can go in vector
|
||||
Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {}
|
||||
};
|
||||
|
||||
@ -212,8 +178,8 @@ class Compiler : public Regexp::Walker<Frag> {
|
||||
int AddSuffixRecursive(int root, int id);
|
||||
|
||||
// Finds the trie node for the given suffix. Returns a Frag in order to
|
||||
// distinguish between pointing at the root node directly (end.p == 0)
|
||||
// and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively).
|
||||
// distinguish between pointing at the root node directly (end.head == 0)
|
||||
// and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
|
||||
Frag FindByteRange(int root, int id);
|
||||
|
||||
// Compares two ByteRanges and returns true iff they are equal.
|
||||
@ -225,8 +191,8 @@ class Compiler : public Regexp::Walker<Frag> {
|
||||
// Single rune.
|
||||
Frag Literal(Rune r, bool foldcase);
|
||||
|
||||
void Setup(Regexp::ParseFlags, int64_t, RE2::Anchor);
|
||||
Prog* Finish();
|
||||
void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor);
|
||||
Prog* Finish(Regexp* re);
|
||||
|
||||
// Returns .* where dot = any byte
|
||||
Frag DotStar();
|
||||
@ -298,7 +264,7 @@ int Compiler::AllocInst(int n) {
|
||||
|
||||
// Returns an unmatchable fragment.
|
||||
Frag Compiler::NoMatch() {
|
||||
return Frag(0, nullPatchList);
|
||||
return Frag(0, kNullPatchList);
|
||||
}
|
||||
|
||||
// Is a an unmatchable fragment?
|
||||
@ -314,7 +280,7 @@ Frag Compiler::Cat(Frag a, Frag b) {
|
||||
// Elide no-op.
|
||||
Prog::Inst* begin = &inst_[a.begin];
|
||||
if (begin->opcode() == kInstNop &&
|
||||
a.end.p == (a.begin << 1) &&
|
||||
a.end.head == (a.begin << 1) &&
|
||||
begin->out() == 0) {
|
||||
// in case refs to a somewhere
|
||||
PatchList::Patch(inst_.data(), a.end, b.begin);
|
||||
@ -419,7 +385,7 @@ Frag Compiler::Match(int32_t match_id) {
|
||||
if (id < 0)
|
||||
return NoMatch();
|
||||
inst_[id].InitMatch(match_id);
|
||||
return Frag(id, nullPatchList);
|
||||
return Frag(id, kNullPatchList);
|
||||
}
|
||||
|
||||
// Returns a fragment matching a particular empty-width op (like ^ or $)
|
||||
@ -467,7 +433,7 @@ static int MaxRune(int len) {
|
||||
void Compiler::BeginRange() {
|
||||
rune_cache_.clear();
|
||||
rune_range_.begin = 0;
|
||||
rune_range_.end = nullPatchList;
|
||||
rune_range_.end = kNullPatchList;
|
||||
}
|
||||
|
||||
int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
|
||||
@ -548,9 +514,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
|
||||
}
|
||||
|
||||
int br;
|
||||
if (f.end.p == 0)
|
||||
if (f.end.head == 0)
|
||||
br = root;
|
||||
else if (f.end.p&1)
|
||||
else if (f.end.head&1)
|
||||
br = inst_[f.begin].out1();
|
||||
else
|
||||
br = inst_[f.begin].out();
|
||||
@ -566,9 +532,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
|
||||
// Ensure that the parent points to the clone, not to the original.
|
||||
// Note that this could leave the head unreachable except via the cache.
|
||||
br = byterange;
|
||||
if (f.end.p == 0)
|
||||
if (f.end.head == 0)
|
||||
root = br;
|
||||
else if (f.end.p&1)
|
||||
else if (f.end.head&1)
|
||||
inst_[f.begin].out1_ = br;
|
||||
else
|
||||
inst_[f.begin].set_out(br);
|
||||
@ -601,7 +567,7 @@ bool Compiler::ByteRangeEqual(int id1, int id2) {
|
||||
Frag Compiler::FindByteRange(int root, int id) {
|
||||
if (inst_[root].opcode() == kInstByteRange) {
|
||||
if (ByteRangeEqual(root, id))
|
||||
return Frag(root, nullPatchList);
|
||||
return Frag(root, kNullPatchList);
|
||||
else
|
||||
return NoMatch();
|
||||
}
|
||||
@ -662,48 +628,43 @@ void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
|
||||
static_cast<uint8_t>(hi), foldcase, 0));
|
||||
}
|
||||
|
||||
// Table describing how to make a UTF-8 matching machine
|
||||
// for the rune range 80-10FFFF (Runeself-Runemax).
|
||||
// This range happens frequently enough (for example /./ and /[^a-z]/)
|
||||
// and the rune_cache_ map is slow enough that this is worth
|
||||
// special handling. Makes compilation of a small expression
|
||||
// with a dot in it about 10% faster.
|
||||
// The * in the comments below mark whole sequences.
|
||||
static struct ByteRangeProg {
|
||||
int next;
|
||||
int lo;
|
||||
int hi;
|
||||
} prog_80_10ffff[] = {
|
||||
// Two-byte
|
||||
{ -1, 0x80, 0xBF, }, // 0: 80-BF
|
||||
{ 0, 0xC2, 0xDF, }, // 1: C2-DF 80-BF*
|
||||
|
||||
// Three-byte
|
||||
{ 0, 0xA0, 0xBF, }, // 2: A0-BF 80-BF
|
||||
{ 2, 0xE0, 0xE0, }, // 3: E0 A0-BF 80-BF*
|
||||
{ 0, 0x80, 0xBF, }, // 4: 80-BF 80-BF
|
||||
{ 4, 0xE1, 0xEF, }, // 5: E1-EF 80-BF 80-BF*
|
||||
|
||||
// Four-byte
|
||||
{ 4, 0x90, 0xBF, }, // 6: 90-BF 80-BF 80-BF
|
||||
{ 6, 0xF0, 0xF0, }, // 7: F0 90-BF 80-BF 80-BF*
|
||||
{ 4, 0x80, 0xBF, }, // 8: 80-BF 80-BF 80-BF
|
||||
{ 8, 0xF1, 0xF3, }, // 9: F1-F3 80-BF 80-BF 80-BF*
|
||||
{ 4, 0x80, 0x8F, }, // 10: 80-8F 80-BF 80-BF
|
||||
{ 10, 0xF4, 0xF4, }, // 11: F4 80-8F 80-BF 80-BF*
|
||||
};
|
||||
|
||||
void Compiler::Add_80_10ffff() {
|
||||
int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning
|
||||
for (size_t i = 0; i < arraysize(prog_80_10ffff); i++) {
|
||||
const ByteRangeProg& p = prog_80_10ffff[i];
|
||||
int next = 0;
|
||||
if (p.next >= 0)
|
||||
next = inst[p.next];
|
||||
inst[i] = UncachedRuneByteSuffix(static_cast<uint8_t>(p.lo),
|
||||
static_cast<uint8_t>(p.hi), false, next);
|
||||
if ((p.lo & 0xC0) != 0x80)
|
||||
AddSuffix(inst[i]);
|
||||
// The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough
|
||||
// (for example, for /./ and /[^a-z]/) that it is worth simplifying: by
|
||||
// permitting overlong encodings in E0 and F0 sequences and code points
|
||||
// over 10FFFF in F4 sequences, the size of the bytecode and the number
|
||||
// of equivalence classes are reduced significantly.
|
||||
int id;
|
||||
if (reversed_) {
|
||||
// Prefix factoring matters, but we don't have to handle it here
|
||||
// because the rune range trie logic takes care of that already.
|
||||
id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0);
|
||||
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||
AddSuffix(id);
|
||||
|
||||
id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0);
|
||||
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||
AddSuffix(id);
|
||||
|
||||
id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0);
|
||||
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||
id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
|
||||
AddSuffix(id);
|
||||
} else {
|
||||
// Suffix factoring matters - and we do have to handle it here.
|
||||
int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0);
|
||||
id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1);
|
||||
AddSuffix(id);
|
||||
|
||||
int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1);
|
||||
id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2);
|
||||
AddSuffix(id);
|
||||
|
||||
int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2);
|
||||
id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3);
|
||||
AddSuffix(id);
|
||||
}
|
||||
}
|
||||
|
||||
@ -711,9 +672,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
|
||||
if (lo > hi)
|
||||
return;
|
||||
|
||||
// Pick off 80-10FFFF as a common special case
|
||||
// that can bypass the slow rune_cache_.
|
||||
if (lo == 0x80 && hi == 0x10ffff && !reversed_) {
|
||||
// Pick off 80-10FFFF as a common special case.
|
||||
if (lo == 0x80 && hi == 0x10ffff) {
|
||||
Add_80_10ffff();
|
||||
return;
|
||||
}
|
||||
@ -1095,8 +1055,6 @@ static bool IsAnchorEnd(Regexp** pre, int depth) {
|
||||
|
||||
void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
|
||||
RE2::Anchor anchor) {
|
||||
prog_->set_flags(flags);
|
||||
|
||||
if (flags & Regexp::Latin1)
|
||||
encoding_ = kEncodingLatin1;
|
||||
max_mem_ = max_mem;
|
||||
@ -1117,14 +1075,11 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
|
||||
// on the program.)
|
||||
if (m >= 1<<24)
|
||||
m = 1<<24;
|
||||
|
||||
// Inst imposes its own limit (currently bigger than 2^24 but be safe).
|
||||
if (m > Prog::Inst::kMaxInst)
|
||||
m = Prog::Inst::kMaxInst;
|
||||
|
||||
max_ninst_ = static_cast<int>(m);
|
||||
}
|
||||
|
||||
anchor_ = anchor;
|
||||
}
|
||||
|
||||
@ -1178,10 +1133,10 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
|
||||
c.prog_->set_start_unanchored(all.begin);
|
||||
|
||||
// Hand ownership of prog_ to caller.
|
||||
return c.Finish();
|
||||
return c.Finish(re);
|
||||
}
|
||||
|
||||
Prog* Compiler::Finish() {
|
||||
Prog* Compiler::Finish(Regexp* re) {
|
||||
if (failed_)
|
||||
return NULL;
|
||||
|
||||
@ -1198,6 +1153,17 @@ Prog* Compiler::Finish() {
|
||||
prog_->Flatten();
|
||||
prog_->ComputeByteMap();
|
||||
|
||||
if (!prog_->reversed()) {
|
||||
std::string prefix;
|
||||
bool prefix_foldcase;
|
||||
if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase) &&
|
||||
!prefix_foldcase) {
|
||||
prog_->prefix_size_ = prefix.size();
|
||||
prog_->prefix_front_ = prefix.front();
|
||||
prog_->prefix_back_ = prefix.back();
|
||||
}
|
||||
}
|
||||
|
||||
// Record remaining memory for DFA.
|
||||
if (max_mem_ <= 0) {
|
||||
prog_->set_dfa_mem(1<<20);
|
||||
@ -1254,7 +1220,7 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
|
||||
c.prog_->set_start(all.begin);
|
||||
c.prog_->set_start_unanchored(all.begin);
|
||||
|
||||
Prog* prog = c.Finish();
|
||||
Prog* prog = c.Finish(re);
|
||||
if (prog == NULL)
|
||||
return NULL;
|
||||
|
||||
|
263
extern/re2/re2/dfa.cc
vendored
263
extern/re2/re2/dfa.cc
vendored
@ -39,10 +39,11 @@
|
||||
#include "util/logging.h"
|
||||
#include "util/mix.h"
|
||||
#include "util/mutex.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "util/sparse_set.h"
|
||||
#include "util/strutil.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/sparse_set.h"
|
||||
#include "re2/stringpiece.h"
|
||||
|
||||
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
|
||||
@ -52,17 +53,6 @@
|
||||
|
||||
namespace re2 {
|
||||
|
||||
#if !defined(__linux__) /* only Linux seems to have memrchr */
|
||||
static void* memrchr(const void* s, int c, size_t n) {
|
||||
const unsigned char* p = (const unsigned char*)s;
|
||||
for (p += n; n > 0; n--)
|
||||
if (*--p == c)
|
||||
return (void*)p;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Controls whether the DFA should bail out early if the NFA would be faster.
|
||||
static bool dfa_should_bail_when_slow = true;
|
||||
|
||||
@ -177,11 +167,8 @@ class DFA {
|
||||
typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
|
||||
|
||||
private:
|
||||
// Special "first_byte" values for a state. (Values >= 0 denote actual bytes.)
|
||||
enum {
|
||||
kFbUnknown = -1, // No analysis has been performed.
|
||||
kFbNone = -2, // The first-byte trick cannot be used.
|
||||
};
|
||||
// Make it easier to swap in a scalable reader-writer mutex.
|
||||
using CacheMutex = Mutex;
|
||||
|
||||
enum {
|
||||
// Indices into start_ for unanchored searches.
|
||||
@ -249,12 +236,13 @@ class DFA {
|
||||
struct SearchParams {
|
||||
SearchParams(const StringPiece& text, const StringPiece& context,
|
||||
RWLocker* cache_lock)
|
||||
: text(text), context(context),
|
||||
: text(text),
|
||||
context(context),
|
||||
anchored(false),
|
||||
can_prefix_accel(false),
|
||||
want_earliest_match(false),
|
||||
run_forward(false),
|
||||
start(NULL),
|
||||
first_byte(kFbUnknown),
|
||||
cache_lock(cache_lock),
|
||||
failed(false),
|
||||
ep(NULL),
|
||||
@ -263,10 +251,10 @@ class DFA {
|
||||
StringPiece text;
|
||||
StringPiece context;
|
||||
bool anchored;
|
||||
bool can_prefix_accel;
|
||||
bool want_earliest_match;
|
||||
bool run_forward;
|
||||
State* start;
|
||||
int first_byte;
|
||||
RWLocker* cache_lock;
|
||||
bool failed; // "out" parameter: whether search gave up
|
||||
const char* ep; // "out" parameter: end pointer for match
|
||||
@ -278,15 +266,13 @@ class DFA {
|
||||
};
|
||||
|
||||
// Before each search, the parameters to Search are analyzed by
|
||||
// AnalyzeSearch to determine the state in which to start and the
|
||||
// "first_byte" for that state, if any.
|
||||
// AnalyzeSearch to determine the state in which to start.
|
||||
struct StartInfo {
|
||||
StartInfo() : start(NULL), first_byte(kFbUnknown) {}
|
||||
State* start;
|
||||
std::atomic<int> first_byte;
|
||||
StartInfo() : start(NULL) {}
|
||||
std::atomic<State*> start;
|
||||
};
|
||||
|
||||
// Fills in params->start and params->first_byte using
|
||||
// Fills in params->start and params->can_prefix_accel using
|
||||
// the other search parameters. Returns true on success,
|
||||
// false on failure.
|
||||
// cache_mutex_.r <= L < mutex_
|
||||
@ -297,10 +283,10 @@ class DFA {
|
||||
// The generic search loop, inlined to create specialized versions.
|
||||
// cache_mutex_.r <= L < mutex_
|
||||
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
||||
inline bool InlinedSearchLoop(SearchParams* params,
|
||||
bool have_first_byte,
|
||||
template <bool can_prefix_accel,
|
||||
bool want_earliest_match,
|
||||
bool run_forward);
|
||||
bool run_forward>
|
||||
inline bool InlinedSearchLoop(SearchParams* params);
|
||||
|
||||
// The specialized versions of InlinedSearchLoop. The three letters
|
||||
// at the ends of the name denote the true/false values used as the
|
||||
@ -322,13 +308,6 @@ class DFA {
|
||||
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
||||
bool FastSearchLoop(SearchParams* params);
|
||||
|
||||
// For debugging, a slow search loop that calls InlinedSearchLoop
|
||||
// directly -- because the booleans passed are not constants, the
|
||||
// loop is not specialized like the SearchFFF etc. versions, so it
|
||||
// runs much more slowly. Useful only for debugging.
|
||||
// cache_mutex_.r <= L < mutex_
|
||||
// Might unlock and relock cache_mutex_ via params->cache_lock.
|
||||
bool SlowSearchLoop(SearchParams* params);
|
||||
|
||||
// Looks up bytes in bytemap_ but handles case c == kByteEndText too.
|
||||
int ByteMap(int c) {
|
||||
@ -355,11 +334,14 @@ class DFA {
|
||||
// while holding cache_mutex_ for writing, to avoid interrupting other
|
||||
// readers. Any State* pointers are only valid while cache_mutex_
|
||||
// is held.
|
||||
Mutex cache_mutex_;
|
||||
CacheMutex cache_mutex_;
|
||||
int64_t mem_budget_; // Total memory budget for all States.
|
||||
int64_t state_budget_; // Amount of memory remaining for new States.
|
||||
StateSet state_cache_; // All States computed so far.
|
||||
StartInfo start_[kMaxStart];
|
||||
|
||||
DFA(const DFA&) = delete;
|
||||
DFA& operator=(const DFA&) = delete;
|
||||
};
|
||||
|
||||
// Shorthand for casting to uint8_t*.
|
||||
@ -442,7 +424,7 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
|
||||
q1_(NULL),
|
||||
mem_budget_(max_mem) {
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
|
||||
fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
|
||||
int nmark = 0;
|
||||
if (kind_ == Prog::kLongestMatch)
|
||||
nmark = prog_->size();
|
||||
@ -613,7 +595,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
// Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
|
||||
// those are the only operators with any effect in
|
||||
// RunWorkqOnEmptyString or RunWorkqOnByte.
|
||||
int* inst = new int[q->size()];
|
||||
PODArray<int> inst(q->size());
|
||||
int n = 0;
|
||||
uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
|
||||
bool sawmatch = false; // whether queue contains guaranteed kInstMatch
|
||||
@ -643,7 +625,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
(it == q->begin() && ip->greedy(prog_))) &&
|
||||
(kind_ != Prog::kLongestMatch || !sawmark) &&
|
||||
(flag & kFlagMatch)) {
|
||||
delete[] inst;
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, " -> FullMatchState\n");
|
||||
return FullMatchState;
|
||||
@ -690,7 +671,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
// the execution loop can stop early. This is only okay
|
||||
// if the state is *not* a matching state.
|
||||
if (n == 0 && flag == 0) {
|
||||
delete[] inst;
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, " -> DeadState\n");
|
||||
return DeadState;
|
||||
@ -700,7 +680,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
// unordered state sets separated by Marks. Sort each set
|
||||
// to canonicalize, to reduce the number of distinct sets stored.
|
||||
if (kind_ == Prog::kLongestMatch) {
|
||||
int* ip = inst;
|
||||
int* ip = inst.data();
|
||||
int* ep = ip + n;
|
||||
while (ip < ep) {
|
||||
int* markp = ip;
|
||||
@ -717,7 +697,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
// we have an unordered set of states (i.e. we don't have Marks)
|
||||
// and sorting will reduce the number of distinct sets stored.
|
||||
if (kind_ == Prog::kManyMatch) {
|
||||
int* ip = inst;
|
||||
int* ip = inst.data();
|
||||
int* ep = ip + n;
|
||||
std::sort(ip, ep);
|
||||
}
|
||||
@ -736,8 +716,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
// Save the needed empty-width flags in the top bits for use later.
|
||||
flag |= needflags << kFlagNeedShift;
|
||||
|
||||
State* state = CachedState(inst, n, flag);
|
||||
delete[] inst;
|
||||
State* state = CachedState(inst.data(), n, flag);
|
||||
return state;
|
||||
}
|
||||
|
||||
@ -971,8 +950,21 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
|
||||
break;
|
||||
|
||||
case kInstByteRange: // can follow if c is in range
|
||||
if (ip->Matches(c))
|
||||
if (!ip->Matches(c))
|
||||
break;
|
||||
AddToQueue(newq, ip->out(), flag);
|
||||
if (ip->hint() != 0) {
|
||||
// We have a hint, but we must cancel out the
|
||||
// increment that will occur after the break.
|
||||
i += ip->hint() - 1;
|
||||
} else {
|
||||
// We have no hint, so we must find the end
|
||||
// of the current list and then skip to it.
|
||||
Prog::Inst* ip0 = ip;
|
||||
while (!ip->last())
|
||||
++ip;
|
||||
i += ip - ip0;
|
||||
}
|
||||
break;
|
||||
|
||||
case kInstMatch:
|
||||
@ -989,8 +981,8 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
|
||||
}
|
||||
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(),
|
||||
c, flag, DumpWorkq(newq).c_str(), *ismatch);
|
||||
fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
|
||||
DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
|
||||
}
|
||||
|
||||
// Processes input byte c in state, returning new state.
|
||||
@ -1117,7 +1109,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
|
||||
|
||||
class DFA::RWLocker {
|
||||
public:
|
||||
explicit RWLocker(Mutex* mu);
|
||||
explicit RWLocker(CacheMutex* mu);
|
||||
~RWLocker();
|
||||
|
||||
// If the lock is only held for reading right now,
|
||||
@ -1127,19 +1119,19 @@ class DFA::RWLocker {
|
||||
void LockForWriting();
|
||||
|
||||
private:
|
||||
Mutex* mu_;
|
||||
CacheMutex* mu_;
|
||||
bool writing_;
|
||||
|
||||
RWLocker(const RWLocker&) = delete;
|
||||
RWLocker& operator=(const RWLocker&) = delete;
|
||||
};
|
||||
|
||||
DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) {
|
||||
DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
|
||||
mu_->ReaderLock();
|
||||
}
|
||||
|
||||
// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations
|
||||
// does not support lock upgrade.
|
||||
// This function is marked as NO_THREAD_SAFETY_ANALYSIS because
|
||||
// the annotations don't support lock upgrade.
|
||||
void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
|
||||
if (!writing_) {
|
||||
mu_->ReaderUnlock();
|
||||
@ -1171,11 +1163,14 @@ void DFA::ResetCache(RWLocker* cache_lock) {
|
||||
// Re-acquire the cache_mutex_ for writing (exclusive use).
|
||||
cache_lock->LockForWriting();
|
||||
|
||||
hooks::GetDFAStateCacheResetHook()({
|
||||
state_budget_,
|
||||
state_cache_.size(),
|
||||
});
|
||||
|
||||
// Clear the cache, reset the memory budget.
|
||||
for (int i = 0; i < kMaxStart; i++) {
|
||||
start_[i].start = NULL;
|
||||
start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
|
||||
}
|
||||
for (int i = 0; i < kMaxStart; i++)
|
||||
start_[i].start.store(NULL, std::memory_order_relaxed);
|
||||
ClearCache();
|
||||
mem_budget_ = state_budget_;
|
||||
}
|
||||
@ -1290,8 +1285,7 @@ DFA::State* DFA::StateSaver::Restore() {
|
||||
// situation, the DFA can do better than executing the simple loop.
|
||||
// Instead, it can call memchr to search very quickly for the byte c.
|
||||
// Whether the start state has this property is determined during a
|
||||
// pre-compilation pass, and if so, the byte b is passed to the search
|
||||
// loop as the "first_byte" argument, along with a boolean "have_first_byte".
|
||||
// pre-compilation pass and the "can_prefix_accel" argument is set.
|
||||
//
|
||||
// Fourth, the desired behavior is to search for the leftmost-best match
|
||||
// (approximately, the same one that Perl would find), which is not
|
||||
@ -1323,14 +1317,15 @@ DFA::State* DFA::StateSaver::Restore() {
|
||||
// The bools are equal to the same-named variables in params, but
|
||||
// making them function arguments lets the inliner specialize
|
||||
// this function to each combination (see two paragraphs above).
|
||||
inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
||||
bool have_first_byte,
|
||||
template <bool can_prefix_accel,
|
||||
bool want_earliest_match,
|
||||
bool run_forward) {
|
||||
bool run_forward>
|
||||
inline bool DFA::InlinedSearchLoop(SearchParams* params) {
|
||||
State* start = params->start;
|
||||
const uint8_t* bp = BytePtr(params->text.begin()); // start of text
|
||||
const uint8_t* bp = BytePtr(params->text.data()); // start of text
|
||||
const uint8_t* p = bp; // text scanning point
|
||||
const uint8_t* ep = BytePtr(params->text.end()); // end of text
|
||||
const uint8_t* ep = BytePtr(params->text.data() +
|
||||
params->text.size()); // end of text
|
||||
const uint8_t* resetp = NULL; // p at last cache reset
|
||||
if (!run_forward) {
|
||||
using std::swap;
|
||||
@ -1366,26 +1361,17 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
||||
|
||||
while (p != ep) {
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "@%td: %s\n",
|
||||
p - bp, DumpState(s).c_str());
|
||||
fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
|
||||
|
||||
if (have_first_byte && s == start) {
|
||||
// In start state, only way out is to find first_byte,
|
||||
// so use optimized assembly in memchr to skip ahead.
|
||||
// If first_byte isn't found, we can skip to the end
|
||||
// of the string.
|
||||
if (run_forward) {
|
||||
if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
|
||||
if (can_prefix_accel && s == start) {
|
||||
// In start state, only way out is to find the prefix,
|
||||
// so we use prefix accel (e.g. memchr) to skip ahead.
|
||||
// If not found, we can skip to the end of the string.
|
||||
p = BytePtr(prog_->PrefixAccel(p, ep - p));
|
||||
if (p == NULL) {
|
||||
p = ep;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if ((p = BytePtr(memrchr(ep, params->first_byte, p - ep))) == NULL) {
|
||||
p = ep;
|
||||
break;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
}
|
||||
|
||||
int c;
|
||||
@ -1475,8 +1461,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
||||
else
|
||||
lastmatch = p + 1;
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "match @%td! [%s]\n",
|
||||
lastmatch - bp, DumpState(s).c_str());
|
||||
fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
|
||||
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
|
||||
for (int i = s->ninst_ - 1; i >= 0; i--) {
|
||||
int id = s->inst_[i];
|
||||
@ -1560,36 +1545,28 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
||||
|
||||
// Inline specializations of the general loop.
|
||||
bool DFA::SearchFFF(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 0, 0, 0);
|
||||
return InlinedSearchLoop<false, false, false>(params);
|
||||
}
|
||||
bool DFA::SearchFFT(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 0, 0, 1);
|
||||
return InlinedSearchLoop<false, false, true>(params);
|
||||
}
|
||||
bool DFA::SearchFTF(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 0, 1, 0);
|
||||
return InlinedSearchLoop<false, true, false>(params);
|
||||
}
|
||||
bool DFA::SearchFTT(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 0, 1, 1);
|
||||
return InlinedSearchLoop<false, true, true>(params);
|
||||
}
|
||||
bool DFA::SearchTFF(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 1, 0, 0);
|
||||
return InlinedSearchLoop<true, false, false>(params);
|
||||
}
|
||||
bool DFA::SearchTFT(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 1, 0, 1);
|
||||
return InlinedSearchLoop<true, false, true>(params);
|
||||
}
|
||||
bool DFA::SearchTTF(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 1, 1, 0);
|
||||
return InlinedSearchLoop<true, true, false>(params);
|
||||
}
|
||||
bool DFA::SearchTTT(SearchParams* params) {
|
||||
return InlinedSearchLoop(params, 1, 1, 1);
|
||||
}
|
||||
|
||||
// For debugging, calls the general code directly.
|
||||
bool DFA::SlowSearchLoop(SearchParams* params) {
|
||||
return InlinedSearchLoop(params,
|
||||
params->first_byte >= 0,
|
||||
params->want_earliest_match,
|
||||
params->run_forward);
|
||||
return InlinedSearchLoop<true, true, true>(params);
|
||||
}
|
||||
|
||||
// For performance, calls the appropriate specialized version
|
||||
@ -1608,8 +1585,7 @@ bool DFA::FastSearchLoop(SearchParams* params) {
|
||||
&DFA::SearchTTT,
|
||||
};
|
||||
|
||||
bool have_first_byte = params->first_byte >= 0;
|
||||
int index = 4 * have_first_byte +
|
||||
int index = 4 * params->can_prefix_accel +
|
||||
2 * params->want_earliest_match +
|
||||
1 * params->run_forward;
|
||||
return (this->*Searches[index])(params);
|
||||
@ -1701,13 +1677,22 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
||||
}
|
||||
}
|
||||
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
|
||||
params->anchored, params->run_forward, flags,
|
||||
DumpState(info->start).c_str(), info->first_byte.load());
|
||||
params->start = info->start.load(std::memory_order_acquire);
|
||||
|
||||
params->start = info->start;
|
||||
params->first_byte = info->first_byte.load(std::memory_order_acquire);
|
||||
// Even if we could prefix accel, we cannot do so when anchored and,
|
||||
// less obviously, we cannot do so when we are going to need flags.
|
||||
// This trick works only when there is a single byte that leads to a
|
||||
// different state!
|
||||
if (prog_->can_prefix_accel() &&
|
||||
!params->anchored &&
|
||||
params->start > SpecialStateMax &&
|
||||
params->start->flag_ >> kFlagNeedShift == 0)
|
||||
params->can_prefix_accel = true;
|
||||
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
|
||||
params->anchored, params->run_forward, flags,
|
||||
DumpState(params->start).c_str(), params->can_prefix_accel);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1716,47 +1701,25 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
||||
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
|
||||
uint32_t flags) {
|
||||
// Quick check.
|
||||
int fb = info->first_byte.load(std::memory_order_acquire);
|
||||
if (fb != kFbUnknown)
|
||||
State* start = info->start.load(std::memory_order_acquire);
|
||||
if (start != NULL)
|
||||
return true;
|
||||
|
||||
MutexLock l(&mutex_);
|
||||
fb = info->first_byte.load(std::memory_order_relaxed);
|
||||
if (fb != kFbUnknown)
|
||||
start = info->start.load(std::memory_order_relaxed);
|
||||
if (start != NULL)
|
||||
return true;
|
||||
|
||||
q0_->clear();
|
||||
AddToQueue(q0_,
|
||||
params->anchored ? prog_->start() : prog_->start_unanchored(),
|
||||
flags);
|
||||
info->start = WorkqToCachedState(q0_, NULL, flags);
|
||||
if (info->start == NULL)
|
||||
start = WorkqToCachedState(q0_, NULL, flags);
|
||||
if (start == NULL)
|
||||
return false;
|
||||
|
||||
if (info->start == DeadState) {
|
||||
// Synchronize with "quick check" above.
|
||||
info->first_byte.store(kFbNone, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (info->start == FullMatchState) {
|
||||
// Synchronize with "quick check" above.
|
||||
info->first_byte.store(kFbNone, std::memory_order_release); // will be ignored
|
||||
return true;
|
||||
}
|
||||
|
||||
// Even if we have a first_byte, we cannot use it when anchored and,
|
||||
// less obviously, we cannot use it when we are going to need flags.
|
||||
// This trick works only when there is a single byte that leads to a
|
||||
// different state!
|
||||
int first_byte = prog_->first_byte();
|
||||
if (first_byte == -1 ||
|
||||
params->anchored ||
|
||||
info->start->flag_ >> kFlagNeedShift != 0)
|
||||
first_byte = kFbNone;
|
||||
|
||||
// Synchronize with "quick check" above.
|
||||
info->first_byte.store(first_byte, std::memory_order_release);
|
||||
info->start.store(start, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1779,8 +1742,7 @@ bool DFA::Search(const StringPiece& text,
|
||||
if (ExtraDebug) {
|
||||
fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
|
||||
fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
|
||||
std::string(text).c_str(), anchored, want_earliest_match,
|
||||
run_forward, kind_);
|
||||
std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
|
||||
}
|
||||
|
||||
RWLocker l(&cache_mutex_);
|
||||
@ -1798,9 +1760,9 @@ bool DFA::Search(const StringPiece& text,
|
||||
return false;
|
||||
if (params.start == FullMatchState) {
|
||||
if (run_forward == want_earliest_match)
|
||||
*epp = text.begin();
|
||||
*epp = text.data();
|
||||
else
|
||||
*epp = text.end();
|
||||
*epp = text.data() + text.size();
|
||||
return true;
|
||||
}
|
||||
if (ExtraDebug)
|
||||
@ -1863,15 +1825,15 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
|
||||
*failed = false;
|
||||
|
||||
StringPiece context = const_context;
|
||||
if (context.begin() == NULL)
|
||||
if (context.data() == NULL)
|
||||
context = text;
|
||||
bool carat = anchor_start();
|
||||
bool caret = anchor_start();
|
||||
bool dollar = anchor_end();
|
||||
if (reversed_) {
|
||||
using std::swap;
|
||||
swap(carat, dollar);
|
||||
swap(caret, dollar);
|
||||
}
|
||||
if (carat && context.begin() != text.begin())
|
||||
if (caret && context.begin() != text.begin())
|
||||
return false;
|
||||
if (dollar && context.end() != text.end())
|
||||
return false;
|
||||
@ -1906,11 +1868,15 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
|
||||
bool matched = dfa->Search(text, context, anchored,
|
||||
want_earliest_match, !reversed_,
|
||||
failed, &ep, matches);
|
||||
if (*failed)
|
||||
if (*failed) {
|
||||
hooks::GetDFASearchFailureHook()({
|
||||
// Nothing yet...
|
||||
});
|
||||
return false;
|
||||
}
|
||||
if (!matched)
|
||||
return false;
|
||||
if (endmatch && ep != (reversed_ ? text.begin() : text.end()))
|
||||
if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
|
||||
return false;
|
||||
|
||||
// If caller cares, record the boundary of the match.
|
||||
@ -1918,10 +1884,11 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
|
||||
// as the beginning.
|
||||
if (match0) {
|
||||
if (reversed_)
|
||||
*match0 = StringPiece(ep, static_cast<size_t>(text.end() - ep));
|
||||
*match0 =
|
||||
StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
|
||||
else
|
||||
*match0 =
|
||||
StringPiece(text.begin(), static_cast<size_t>(ep - text.begin()));
|
||||
StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
20
extern/re2/re2/filtered_re2.cc
vendored
20
extern/re2/re2/filtered_re2.cc
vendored
@ -6,6 +6,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
@ -27,7 +28,22 @@ FilteredRE2::FilteredRE2(int min_atom_len)
|
||||
FilteredRE2::~FilteredRE2() {
|
||||
for (size_t i = 0; i < re2_vec_.size(); i++)
|
||||
delete re2_vec_[i];
|
||||
delete prefilter_tree_;
|
||||
}
|
||||
|
||||
FilteredRE2::FilteredRE2(FilteredRE2&& other)
|
||||
: re2_vec_(std::move(other.re2_vec_)),
|
||||
compiled_(other.compiled_),
|
||||
prefilter_tree_(std::move(other.prefilter_tree_)) {
|
||||
other.re2_vec_.clear();
|
||||
other.re2_vec_.shrink_to_fit();
|
||||
other.compiled_ = false;
|
||||
other.prefilter_tree_.reset(new PrefilterTree());
|
||||
}
|
||||
|
||||
FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
|
||||
this->~FilteredRE2();
|
||||
(void) new (this) FilteredRE2(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
|
||||
RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
|
||||
@ -38,7 +54,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
|
||||
if (!re->ok()) {
|
||||
if (options.log_errors()) {
|
||||
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
|
||||
<< re << " due to error " << re->error();
|
||||
<< pattern << " due to error " << re->error();
|
||||
}
|
||||
delete re;
|
||||
} else {
|
||||
|
37
extern/re2/re2/filtered_re2.h
vendored
37
extern/re2/re2/filtered_re2.h
vendored
@ -10,17 +10,18 @@
|
||||
// number of regexps that need to be actually searched.
|
||||
//
|
||||
// By design, it does not include a string matching engine. This is to
|
||||
// allow the user of the class to use their favorite string match
|
||||
// allow the user of the class to use their favorite string matching
|
||||
// engine. The overall flow is: Add all the regexps using Add, then
|
||||
// Compile the FilteredRE2. The compile returns strings that need to
|
||||
// be matched. Note that all returned strings are lowercase. For
|
||||
// applying regexps to a search text, the caller does the string
|
||||
// matching using the strings returned. When doing the string match,
|
||||
// note that the caller has to do that on lower cased version of the
|
||||
// search text. Then call FirstMatch or AllMatches with a vector of
|
||||
// indices of strings that were found in the text to get the actual
|
||||
// regexp matches.
|
||||
// Compile the FilteredRE2. Compile returns strings that need to be
|
||||
// matched. Note that the returned strings are lowercased and distinct.
|
||||
// For applying regexps to a search text, the caller does the string
|
||||
// matching using the returned strings. When doing the string match,
|
||||
// note that the caller has to do that in a case-insensitive way or
|
||||
// on a lowercased version of the search text. Then call FirstMatch
|
||||
// or AllMatches with a vector of indices of strings that were found
|
||||
// in the text to get the actual regexp matches.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -36,6 +37,13 @@ class FilteredRE2 {
|
||||
explicit FilteredRE2(int min_atom_len);
|
||||
~FilteredRE2();
|
||||
|
||||
// Not copyable.
|
||||
FilteredRE2(const FilteredRE2&) = delete;
|
||||
FilteredRE2& operator=(const FilteredRE2&) = delete;
|
||||
// Movable.
|
||||
FilteredRE2(FilteredRE2&& other);
|
||||
FilteredRE2& operator=(FilteredRE2&& other);
|
||||
|
||||
// Uses RE2 constructor to create a RE2 object (re). Returns
|
||||
// re->error_code(). If error_code is other than NoError, then re is
|
||||
// deleted and not added to re2_vec_.
|
||||
@ -45,9 +53,9 @@ class FilteredRE2 {
|
||||
|
||||
// Prepares the regexps added by Add for filtering. Returns a set
|
||||
// of strings that the caller should check for in candidate texts.
|
||||
// The returned strings are lowercased. When doing string matching,
|
||||
// the search text should be lowercased first to find matching
|
||||
// strings from the set of strings returned by Compile. Call after
|
||||
// The returned strings are lowercased and distinct. When doing
|
||||
// string matching, it should be performed in a case-insensitive
|
||||
// way or the search text should be lowercased first. Call after
|
||||
// all Add calls are done.
|
||||
void Compile(std::vector<std::string>* strings_to_match);
|
||||
|
||||
@ -98,10 +106,7 @@ class FilteredRE2 {
|
||||
bool compiled_;
|
||||
|
||||
// An AND-OR tree of string atoms used for filtering regexps.
|
||||
PrefilterTree* prefilter_tree_;
|
||||
|
||||
FilteredRE2(const FilteredRE2&) = delete;
|
||||
FilteredRE2& operator=(const FilteredRE2&) = delete;
|
||||
std::unique_ptr<PrefilterTree> prefilter_tree_;
|
||||
};
|
||||
|
||||
} // namespace re2
|
||||
|
219
extern/re2/re2/fuzzing/compiler-rt/LICENSE
vendored
Normal file
219
extern/re2/re2/fuzzing/compiler-rt/LICENSE
vendored
Normal file
@ -0,0 +1,219 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
--- LLVM Exceptions to the Apache 2.0 License ----
|
||||
|
||||
As an exception, if, as a result of your compiling your source code, portions
|
||||
of this Software are embedded into an Object form of such source code, you
|
||||
may redistribute such embedded portions in such Object form without complying
|
||||
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
|
||||
|
||||
In addition, if you combine or link compiled forms of this Software with
|
||||
software that is licensed under the GPLv2 ("Combined Software") and if a
|
||||
court of competent jurisdiction determines that the patent provision (Section
|
||||
3), the indemnity provision (Section 9) or other Section of the License
|
||||
conflicts with the conditions of the GPLv2, you may retroactively and
|
||||
prospectively choose to deem waived or otherwise exclude such Section(s) of
|
||||
the License, but only in their entirety and only with respect to the Combined
|
||||
Software.
|
||||
|
305
extern/re2/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
vendored
Normal file
305
extern/re2/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
vendored
Normal file
@ -0,0 +1,305 @@
|
||||
//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// A single header library providing an utility class to break up an array of
|
||||
// bytes. Whenever run on the same input, provides the same output, as long as
|
||||
// its methods are called in the same order, with the same arguments.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
|
||||
#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// In addition to the comments below, the API is also briefly documented at
|
||||
// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
|
||||
class FuzzedDataProvider {
|
||||
public:
|
||||
// |data| is an array of length |size| that the FuzzedDataProvider wraps to
|
||||
// provide more granular access. |data| must outlive the FuzzedDataProvider.
|
||||
FuzzedDataProvider(const uint8_t *data, size_t size)
|
||||
: data_ptr_(data), remaining_bytes_(size) {}
|
||||
~FuzzedDataProvider() = default;
|
||||
|
||||
// Returns a std::vector containing |num_bytes| of input data. If fewer than
|
||||
// |num_bytes| of data remain, returns a shorter std::vector containing all
|
||||
// of the data that's left. Can be used with any byte sized type, such as
|
||||
// char, unsigned char, uint8_t, etc.
|
||||
template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
|
||||
num_bytes = std::min(num_bytes, remaining_bytes_);
|
||||
return ConsumeBytes<T>(num_bytes, num_bytes);
|
||||
}
|
||||
|
||||
// Similar to |ConsumeBytes|, but also appends the terminator value at the end
|
||||
// of the resulting vector. Useful, when a mutable null-terminated C-string is
|
||||
// needed, for example. But that is a rare case. Better avoid it, if possible,
|
||||
// and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
|
||||
template <typename T>
|
||||
std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
|
||||
T terminator = 0) {
|
||||
num_bytes = std::min(num_bytes, remaining_bytes_);
|
||||
std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
|
||||
result.back() = terminator;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns a std::string containing |num_bytes| of input data. Using this and
|
||||
// |.c_str()| on the resulting string is the best way to get an immutable
|
||||
// null-terminated C string. If fewer than |num_bytes| of data remain, returns
|
||||
// a shorter std::string containing all of the data that's left.
|
||||
std::string ConsumeBytesAsString(size_t num_bytes) {
|
||||
static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
|
||||
"ConsumeBytesAsString cannot convert the data to a string.");
|
||||
|
||||
num_bytes = std::min(num_bytes, remaining_bytes_);
|
||||
std::string result(
|
||||
reinterpret_cast<const std::string::value_type *>(data_ptr_),
|
||||
num_bytes);
|
||||
Advance(num_bytes);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns a number in the range [min, max] by consuming bytes from the
|
||||
// input data. The value might not be uniformly distributed in the given
|
||||
// range. If there's no input data left, always returns |min|. |min| must
|
||||
// be less than or equal to |max|.
|
||||
template <typename T> T ConsumeIntegralInRange(T min, T max) {
|
||||
static_assert(std::is_integral<T>::value, "An integral type is required.");
|
||||
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
|
||||
|
||||
if (min > max)
|
||||
abort();
|
||||
|
||||
// Use the biggest type possible to hold the range and the result.
|
||||
uint64_t range = static_cast<uint64_t>(max) - min;
|
||||
uint64_t result = 0;
|
||||
size_t offset = 0;
|
||||
|
||||
while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
|
||||
remaining_bytes_ != 0) {
|
||||
// Pull bytes off the end of the seed data. Experimentally, this seems to
|
||||
// allow the fuzzer to more easily explore the input space. This makes
|
||||
// sense, since it works by modifying inputs that caused new code to run,
|
||||
// and this data is often used to encode length of data read by
|
||||
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
|
||||
// contents of the data that is actually read.
|
||||
--remaining_bytes_;
|
||||
result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
|
||||
offset += CHAR_BIT;
|
||||
}
|
||||
|
||||
// Avoid division by 0, in case |range + 1| results in overflow.
|
||||
if (range != std::numeric_limits<decltype(range)>::max())
|
||||
result = result % (range + 1);
|
||||
|
||||
return static_cast<T>(min + result);
|
||||
}
|
||||
|
||||
// Returns a std::string of length from 0 to |max_length|. When it runs out of
|
||||
// input data, returns what remains of the input. Designed to be more stable
|
||||
// with respect to a fuzzer inserting characters than just picking a random
|
||||
// length and then consuming that many bytes with |ConsumeBytes|.
|
||||
std::string ConsumeRandomLengthString(size_t max_length) {
|
||||
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
|
||||
// followed by anything else to the end of the string. As a result of this
|
||||
// logic, a fuzzer can insert characters into the string, and the string
|
||||
// will be lengthened to include those new characters, resulting in a more
|
||||
// stable fuzzer than picking the length of a string independently from
|
||||
// picking its contents.
|
||||
std::string result;
|
||||
|
||||
// Reserve the anticipated capaticity to prevent several reallocations.
|
||||
result.reserve(std::min(max_length, remaining_bytes_));
|
||||
for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
|
||||
char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
|
||||
Advance(1);
|
||||
if (next == '\\' && remaining_bytes_ != 0) {
|
||||
next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
|
||||
Advance(1);
|
||||
if (next != '\\')
|
||||
break;
|
||||
}
|
||||
result += next;
|
||||
}
|
||||
|
||||
result.shrink_to_fit();
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns a std::vector containing all remaining bytes of the input data.
|
||||
template <typename T> std::vector<T> ConsumeRemainingBytes() {
|
||||
return ConsumeBytes<T>(remaining_bytes_);
|
||||
}
|
||||
|
||||
// Returns a std::string containing all remaining bytes of the input data.
|
||||
// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
|
||||
// object.
|
||||
std::string ConsumeRemainingBytesAsString() {
|
||||
return ConsumeBytesAsString(remaining_bytes_);
|
||||
}
|
||||
|
||||
// Returns a number in the range [Type's min, Type's max]. The value might
|
||||
// not be uniformly distributed in the given range. If there's no input data
|
||||
// left, always returns |min|.
|
||||
template <typename T> T ConsumeIntegral() {
|
||||
return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
|
||||
std::numeric_limits<T>::max());
|
||||
}
|
||||
|
||||
// Reads one byte and returns a bool, or false when no data remains.
|
||||
bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
|
||||
|
||||
// Returns a copy of the value selected from the given fixed-size |array|.
|
||||
template <typename T, size_t size>
|
||||
T PickValueInArray(const T (&array)[size]) {
|
||||
static_assert(size > 0, "The array must be non empty.");
|
||||
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T PickValueInArray(std::initializer_list<const T> list) {
|
||||
// TODO(Dor1s): switch to static_assert once C++14 is allowed.
|
||||
if (!list.size())
|
||||
abort();
|
||||
|
||||
return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
|
||||
}
|
||||
|
||||
// Returns an enum value. The enum must start at 0 and be contiguous. It must
|
||||
// also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
|
||||
// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
|
||||
template <typename T> T ConsumeEnum() {
|
||||
static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
|
||||
return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
|
||||
0, static_cast<uint32_t>(T::kMaxValue)));
|
||||
}
|
||||
|
||||
// Returns a floating point number in the range [0.0, 1.0]. If there's no
|
||||
// input data left, always returns 0.
|
||||
template <typename T> T ConsumeProbability() {
|
||||
static_assert(std::is_floating_point<T>::value,
|
||||
"A floating point type is required.");
|
||||
|
||||
// Use different integral types for different floating point types in order
|
||||
// to provide better density of the resulting values.
|
||||
using IntegralType =
|
||||
typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
|
||||
uint64_t>::type;
|
||||
|
||||
T result = static_cast<T>(ConsumeIntegral<IntegralType>());
|
||||
result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns a floating point value in the range [Type's lowest, Type's max] by
|
||||
// consuming bytes from the input data. If there's no input data left, always
|
||||
// returns approximately 0.
|
||||
template <typename T> T ConsumeFloatingPoint() {
|
||||
return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
|
||||
std::numeric_limits<T>::max());
|
||||
}
|
||||
|
||||
// Returns a floating point value in the given range by consuming bytes from
|
||||
// the input data. If there's no input data left, returns |min|. Note that
|
||||
// |min| must be less than or equal to |max|.
|
||||
template <typename T> T ConsumeFloatingPointInRange(T min, T max) {
|
||||
if (min > max)
|
||||
abort();
|
||||
|
||||
T range = .0;
|
||||
T result = min;
|
||||
constexpr T zero(.0);
|
||||
if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
|
||||
// The diff |max - min| would overflow the given floating point type. Use
|
||||
// the half of the diff as the range and consume a bool to decide whether
|
||||
// the result is in the first of the second part of the diff.
|
||||
range = (max / 2.0) - (min / 2.0);
|
||||
if (ConsumeBool()) {
|
||||
result += range;
|
||||
}
|
||||
} else {
|
||||
range = max - min;
|
||||
}
|
||||
|
||||
return result + range * ConsumeProbability<T>();
|
||||
}
|
||||
|
||||
// Reports the remaining bytes available for fuzzed input.
|
||||
size_t remaining_bytes() { return remaining_bytes_; }
|
||||
|
||||
private:
|
||||
FuzzedDataProvider(const FuzzedDataProvider &) = delete;
|
||||
FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
|
||||
|
||||
void Advance(size_t num_bytes) {
|
||||
if (num_bytes > remaining_bytes_)
|
||||
abort();
|
||||
|
||||
data_ptr_ += num_bytes;
|
||||
remaining_bytes_ -= num_bytes;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
|
||||
static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
|
||||
|
||||
// The point of using the size-based constructor below is to increase the
|
||||
// odds of having a vector object with capacity being equal to the length.
|
||||
// That part is always implementation specific, but at least both libc++ and
|
||||
// libstdc++ allocate the requested number of bytes in that constructor,
|
||||
// which seems to be a natural choice for other implementations as well.
|
||||
// To increase the odds even more, we also call |shrink_to_fit| below.
|
||||
std::vector<T> result(size);
|
||||
if (size == 0) {
|
||||
if (num_bytes_to_consume != 0)
|
||||
abort();
|
||||
return result;
|
||||
}
|
||||
|
||||
std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
|
||||
Advance(num_bytes_to_consume);
|
||||
|
||||
// Even though |shrink_to_fit| is also implementation specific, we expect it
|
||||
// to provide an additional assurance in case vector's constructor allocated
|
||||
// a buffer which is larger than the actual amount of data we put inside it.
|
||||
result.shrink_to_fit();
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
|
||||
static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
|
||||
static_assert(!std::numeric_limits<TU>::is_signed,
|
||||
"Source type must be unsigned.");
|
||||
|
||||
// TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
|
||||
if (std::numeric_limits<TS>::is_modulo)
|
||||
return static_cast<TS>(value);
|
||||
|
||||
// Avoid using implementation-defined unsigned to signer conversions.
|
||||
// To learn more, see https://stackoverflow.com/questions/13150449.
|
||||
if (value <= std::numeric_limits<TS>::max()) {
|
||||
return static_cast<TS>(value);
|
||||
} else {
|
||||
constexpr auto TS_min = std::numeric_limits<TS>::min();
|
||||
return TS_min + static_cast<char>(value - TS_min);
|
||||
}
|
||||
}
|
||||
|
||||
const uint8_t *data_ptr_;
|
||||
size_t remaining_bytes_;
|
||||
};
|
||||
|
||||
#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
|
122
extern/re2/re2/fuzzing/re2_fuzzer.cc
vendored
122
extern/re2/re2/fuzzing/re2_fuzzer.cc
vendored
@ -2,12 +2,13 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <fuzzer/FuzzedDataProvider.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "re2/prefilter.h"
|
||||
#include "re2/re2.h"
|
||||
@ -17,7 +18,38 @@ using re2::StringPiece;
|
||||
// NOT static, NOT signed.
|
||||
uint8_t dummy = 0;
|
||||
|
||||
void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
|
||||
void TestOneInput(StringPiece pattern, const RE2::Options& options,
|
||||
StringPiece text) {
|
||||
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
|
||||
// Otherwise, we will waste time on inputs that have long runs of various
|
||||
// character classes. The fuzzer has shown itself to be easily capable of
|
||||
// generating such patterns that fall within the other limits, but result
|
||||
// in timeouts nonetheless. The marginal cost is high - even more so when
|
||||
// counted repetition is involved - whereas the marginal benefit is zero.
|
||||
// TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
|
||||
int char_class = 0;
|
||||
int backslash_p = 0; // very expensive, so handle specially
|
||||
for (size_t i = 0; i < pattern.size(); i++) {
|
||||
if (pattern[i] == '.')
|
||||
char_class++;
|
||||
if (pattern[i] != '\\')
|
||||
continue;
|
||||
i++;
|
||||
if (i >= pattern.size())
|
||||
break;
|
||||
if (pattern[i] == 'p' || pattern[i] == 'P' ||
|
||||
pattern[i] == 'd' || pattern[i] == 'D' ||
|
||||
pattern[i] == 's' || pattern[i] == 'S' ||
|
||||
pattern[i] == 'w' || pattern[i] == 'W')
|
||||
char_class++;
|
||||
if (pattern[i] == 'p' || pattern[i] == 'P')
|
||||
backslash_p++;
|
||||
}
|
||||
if (char_class > 9)
|
||||
return;
|
||||
if (backslash_p > 1)
|
||||
return;
|
||||
|
||||
RE2 re(pattern, options);
|
||||
if (!re.ok())
|
||||
return;
|
||||
@ -55,7 +87,7 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
|
||||
|
||||
// Don't waste time fuzzing high-fanout programs.
|
||||
// They can cause bug reports due to fuzzer timeouts.
|
||||
std::map<int, int> histogram;
|
||||
std::vector<int> histogram;
|
||||
int fanout = re.ProgramFanout(&histogram);
|
||||
if (fanout > 9)
|
||||
return;
|
||||
@ -102,72 +134,38 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
|
||||
|
||||
// Entry point for libFuzzer.
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
if (size == 0 || size > 999)
|
||||
// An input larger than 4 KiB probably isn't interesting. (This limit
|
||||
// allows for fdp.ConsumeRandomLengthString()'s backslash behaviour.)
|
||||
if (size == 0 || size > 4096)
|
||||
return 0;
|
||||
|
||||
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
|
||||
// Otherwise, we will waste time on inputs that have long runs of various
|
||||
// character classes. The fuzzer has shown itself to be easily capable of
|
||||
// generating such patterns that fall within the other limits, but result
|
||||
// in timeouts nonetheless. The marginal cost is high - even more so when
|
||||
// counted repetition is involved - whereas the marginal benefit is zero.
|
||||
// TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
|
||||
int char_class = 0;
|
||||
int backslash_p = 0; // very expensive, so handle specially
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (data[i] == '.')
|
||||
char_class++;
|
||||
if (data[i] != '\\')
|
||||
continue;
|
||||
i++;
|
||||
if (i >= size)
|
||||
break;
|
||||
if (data[i] == 'p' || data[i] == 'P' ||
|
||||
data[i] == 'd' || data[i] == 'D' ||
|
||||
data[i] == 's' || data[i] == 'S' ||
|
||||
data[i] == 'w' || data[i] == 'W')
|
||||
char_class++;
|
||||
if (data[i] == 'p' || data[i] == 'P')
|
||||
backslash_p++;
|
||||
}
|
||||
if (char_class > 9)
|
||||
return 0;
|
||||
if (backslash_p > 1)
|
||||
return 0;
|
||||
|
||||
// The one-at-a-time hash by Bob Jenkins.
|
||||
uint32_t hash = 0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
hash += data[i];
|
||||
hash += (hash << 10);
|
||||
hash ^= (hash >> 6);
|
||||
}
|
||||
hash += (hash << 3);
|
||||
hash ^= (hash >> 11);
|
||||
hash += (hash << 15);
|
||||
FuzzedDataProvider fdp(data, size);
|
||||
|
||||
// The convention here is that fdp.ConsumeBool() returning false sets
|
||||
// the default value whereas returning true sets the alternate value:
|
||||
// most options default to false and so can be set directly; encoding
|
||||
// defaults to UTF-8; case_sensitive defaults to true. We do NOT want
|
||||
// to log errors. max_mem is 64 MiB because we can afford to use more
|
||||
// RAM in exchange for (hopefully) faster fuzzing.
|
||||
RE2::Options options;
|
||||
options.set_encoding(fdp.ConsumeBool() ? RE2::Options::EncodingLatin1
|
||||
: RE2::Options::EncodingUTF8);
|
||||
options.set_posix_syntax(fdp.ConsumeBool());
|
||||
options.set_longest_match(fdp.ConsumeBool());
|
||||
options.set_log_errors(false);
|
||||
options.set_max_mem(64 << 20);
|
||||
options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
|
||||
: RE2::Options::EncodingUTF8);
|
||||
options.set_posix_syntax(hash & 2);
|
||||
options.set_longest_match(hash & 4);
|
||||
options.set_literal(hash & 8);
|
||||
options.set_never_nl(hash & 16);
|
||||
options.set_dot_nl(hash & 32);
|
||||
options.set_never_capture(hash & 64);
|
||||
options.set_case_sensitive(hash & 128);
|
||||
options.set_perl_classes(hash & 256);
|
||||
options.set_word_boundary(hash & 512);
|
||||
options.set_one_line(hash & 1024);
|
||||
options.set_literal(fdp.ConsumeBool());
|
||||
options.set_never_nl(fdp.ConsumeBool());
|
||||
options.set_dot_nl(fdp.ConsumeBool());
|
||||
options.set_never_capture(fdp.ConsumeBool());
|
||||
options.set_case_sensitive(!fdp.ConsumeBool());
|
||||
options.set_perl_classes(fdp.ConsumeBool());
|
||||
options.set_word_boundary(fdp.ConsumeBool());
|
||||
options.set_one_line(fdp.ConsumeBool());
|
||||
|
||||
const char* ptr = reinterpret_cast<const char*>(data);
|
||||
int len = static_cast<int>(size);
|
||||
|
||||
StringPiece pattern(ptr, len);
|
||||
StringPiece text(ptr, len);
|
||||
Test(pattern, options, text);
|
||||
std::string pattern = fdp.ConsumeRandomLengthString(999);
|
||||
std::string text = fdp.ConsumeRandomLengthString(999);
|
||||
|
||||
TestOneInput(pattern, options, text);
|
||||
return 0;
|
||||
}
|
||||
|
2
extern/re2/re2/make_perl_groups.pl
vendored
Normal file → Executable file
2
extern/re2/re2/make_perl_groups.pl
vendored
Normal file → Executable file
@ -76,7 +76,7 @@ sub PrintClass($$@) {
|
||||
} else {
|
||||
$negname =~ y/a-z/A-Z/;
|
||||
}
|
||||
return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }";
|
||||
return "{ \"$escname\", +1, code$cnum, $n, 0, 0 }", "{ \"$negname\", -1, code$cnum, $n, 0, 0 }";
|
||||
}
|
||||
|
||||
my $cnum = 0;
|
||||
|
0
extern/re2/re2/make_unicode_casefold.py
vendored
Normal file → Executable file
0
extern/re2/re2/make_unicode_casefold.py
vendored
Normal file → Executable file
0
extern/re2/re2/make_unicode_groups.py
vendored
Normal file → Executable file
0
extern/re2/re2/make_unicode_groups.py
vendored
Normal file → Executable file
26
extern/re2/re2/mimics_pcre.cc
vendored
26
extern/re2/re2/mimics_pcre.cc
vendored
@ -38,14 +38,21 @@ static bool CanBeEmptyString(Regexp *re);
|
||||
class PCREWalker : public Regexp::Walker<bool> {
|
||||
public:
|
||||
PCREWalker() {}
|
||||
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,
|
||||
int nchild_args);
|
||||
|
||||
bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk not WalkExponential.
|
||||
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
||||
virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||
bool* child_args, int nchild_args);
|
||||
|
||||
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "PCREWalker::ShortVisit called";
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
|
||||
private:
|
||||
PCREWalker(const PCREWalker&) = delete;
|
||||
PCREWalker& operator=(const PCREWalker&) = delete;
|
||||
};
|
||||
|
||||
// Called after visiting each of re's children and accumulating
|
||||
@ -115,12 +122,15 @@ bool Regexp::MimicsPCRE() {
|
||||
class EmptyStringWalker : public Regexp::Walker<bool> {
|
||||
public:
|
||||
EmptyStringWalker() {}
|
||||
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||
|
||||
virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||
bool* child_args, int nchild_args);
|
||||
|
||||
bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk not WalkExponential.
|
||||
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
|
||||
|
214
extern/re2/re2/nfa.cc
vendored
214
extern/re2/re2/nfa.cc
vendored
@ -27,17 +27,18 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "util/strutil.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "util/sparse_array.h"
|
||||
#include "util/sparse_set.h"
|
||||
#include "util/strutil.h"
|
||||
#include "re2/sparse_array.h"
|
||||
#include "re2/sparse_set.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -107,18 +108,21 @@ class NFA {
|
||||
// Returns text version of capture information, for debugging.
|
||||
std::string FormatCapture(const char** capture);
|
||||
|
||||
inline void CopyCapture(const char** dst, const char** src);
|
||||
void CopyCapture(const char** dst, const char** src) {
|
||||
memmove(dst, src, ncapture_*sizeof src[0]);
|
||||
}
|
||||
|
||||
Prog* prog_; // underlying program
|
||||
int start_; // start instruction in program
|
||||
int ncapture_; // number of submatches to track
|
||||
bool longest_; // whether searching for longest match
|
||||
bool endmatch_; // whether match must end at text.end()
|
||||
const char* btext_; // beginning of text being matched (for FormatSubmatch)
|
||||
const char* etext_; // end of text being matched (for endmatch_)
|
||||
const char* btext_; // beginning of text (for FormatSubmatch)
|
||||
const char* etext_; // end of text (for endmatch_)
|
||||
Threadq q0_, q1_; // pre-allocated for Search.
|
||||
PODArray<AddState> stack_; // pre-allocated for AddToThreadq
|
||||
Thread* free_threads_; // free list
|
||||
std::deque<Thread> arena_; // thread arena
|
||||
Thread* freelist_; // thread freelist
|
||||
const char** match_; // best match so far
|
||||
bool matched_; // any match so far?
|
||||
|
||||
@ -141,31 +145,30 @@ NFA::NFA(Prog* prog) {
|
||||
prog_->inst_count(kInstEmptyWidth) +
|
||||
prog_->inst_count(kInstNop) + 1; // + 1 for start inst
|
||||
stack_ = PODArray<AddState>(nstack);
|
||||
free_threads_ = NULL;
|
||||
freelist_ = NULL;
|
||||
match_ = NULL;
|
||||
matched_ = false;
|
||||
}
|
||||
|
||||
NFA::~NFA() {
|
||||
delete[] match_;
|
||||
Thread* next;
|
||||
for (Thread* t = free_threads_; t; t = next) {
|
||||
next = t->next;
|
||||
delete[] t->capture;
|
||||
delete t;
|
||||
}
|
||||
for (const Thread& t : arena_)
|
||||
delete[] t.capture;
|
||||
}
|
||||
|
||||
NFA::Thread* NFA::AllocThread() {
|
||||
Thread* t = free_threads_;
|
||||
if (t == NULL) {
|
||||
t = new Thread;
|
||||
Thread* t = freelist_;
|
||||
if (t != NULL) {
|
||||
freelist_ = t->next;
|
||||
t->ref = 1;
|
||||
t->capture = new const char*[ncapture_];
|
||||
// We don't need to touch t->capture because
|
||||
// the caller will immediately overwrite it.
|
||||
return t;
|
||||
}
|
||||
free_threads_ = t->next;
|
||||
arena_.emplace_back();
|
||||
t = &arena_.back();
|
||||
t->ref = 1;
|
||||
t->capture = new const char*[ncapture_];
|
||||
return t;
|
||||
}
|
||||
|
||||
@ -176,21 +179,13 @@ NFA::Thread* NFA::Incref(Thread* t) {
|
||||
}
|
||||
|
||||
void NFA::Decref(Thread* t) {
|
||||
if (t == NULL)
|
||||
return;
|
||||
DCHECK(t != NULL);
|
||||
t->ref--;
|
||||
if (t->ref > 0)
|
||||
return;
|
||||
DCHECK_EQ(t->ref, 0);
|
||||
t->next = free_threads_;
|
||||
free_threads_ = t;
|
||||
}
|
||||
|
||||
void NFA::CopyCapture(const char** dst, const char** src) {
|
||||
for (int i = 0; i < ncapture_; i+=2) {
|
||||
dst[i] = src[i];
|
||||
dst[i+1] = src[i+1];
|
||||
}
|
||||
t->next = freelist_;
|
||||
freelist_ = t;
|
||||
}
|
||||
|
||||
// Follows all empty arrows from id0 and enqueues all the states reached.
|
||||
@ -372,8 +367,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
|
||||
matched_ = true;
|
||||
|
||||
Decref(t);
|
||||
for (++i; i != runq->end(); ++i)
|
||||
for (++i; i != runq->end(); ++i) {
|
||||
if (i->value() != NULL)
|
||||
Decref(i->value());
|
||||
}
|
||||
runq->clear();
|
||||
if (ip->greedy(prog_))
|
||||
return ip->out1();
|
||||
@ -382,10 +379,15 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
|
||||
break;
|
||||
|
||||
case kInstMatch: {
|
||||
// Avoid invoking undefined behavior when p happens
|
||||
// to be null - and p-1 would be meaningless anyway.
|
||||
if (p == NULL)
|
||||
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||
// by storing p instead of p-1. (What would the latter even mean?!)
|
||||
// This complements the special case in NFA::Search().
|
||||
if (p == NULL) {
|
||||
CopyCapture(match_, t->capture);
|
||||
match_[1] = p;
|
||||
matched_ = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (endmatch_ && p-1 != etext_)
|
||||
break;
|
||||
@ -411,8 +413,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
|
||||
// worse than the one we just found: don't run the
|
||||
// rest of the current Threadq.
|
||||
Decref(t);
|
||||
for (++i; i != runq->end(); ++i)
|
||||
for (++i; i != runq->end(); ++i) {
|
||||
if (i->value() != NULL)
|
||||
Decref(i->value());
|
||||
}
|
||||
runq->clear();
|
||||
return 0;
|
||||
}
|
||||
@ -431,12 +435,12 @@ std::string NFA::FormatCapture(const char** capture) {
|
||||
if (capture[i] == NULL)
|
||||
s += "(?,?)";
|
||||
else if (capture[i+1] == NULL)
|
||||
s += StringPrintf("(%d,?)",
|
||||
(int)(capture[i] - btext_));
|
||||
s += StringPrintf("(%td,?)",
|
||||
capture[i] - btext_);
|
||||
else
|
||||
s += StringPrintf("(%d,%d)",
|
||||
(int)(capture[i] - btext_),
|
||||
(int)(capture[i+1] - btext_));
|
||||
s += StringPrintf("(%td,%td)",
|
||||
capture[i] - btext_,
|
||||
capture[i+1] - btext_);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@ -448,7 +452,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
||||
return false;
|
||||
|
||||
StringPiece context = const_context;
|
||||
if (context.begin() == NULL)
|
||||
if (context.data() == NULL)
|
||||
context = text;
|
||||
|
||||
// Sanity check: make sure that text lies within context.
|
||||
@ -465,7 +469,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
||||
if (prog_->anchor_end()) {
|
||||
longest = true;
|
||||
endmatch_ = true;
|
||||
etext_ = text.end();
|
||||
}
|
||||
|
||||
if (nsubmatch < 0) {
|
||||
@ -485,32 +488,33 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
||||
}
|
||||
|
||||
match_ = new const char*[ncapture_];
|
||||
memset(match_, 0, ncapture_*sizeof match_[0]);
|
||||
matched_ = false;
|
||||
|
||||
// For debugging prints.
|
||||
btext_ = context.begin();
|
||||
btext_ = context.data();
|
||||
// For convenience.
|
||||
etext_ = text.data() + text.size();
|
||||
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
|
||||
std::string(text).c_str(), std::string(context).c_str(), anchored,
|
||||
longest);
|
||||
std::string(text).c_str(), std::string(context).c_str(), anchored, longest);
|
||||
|
||||
// Set up search.
|
||||
Threadq* runq = &q0_;
|
||||
Threadq* nextq = &q1_;
|
||||
runq->clear();
|
||||
nextq->clear();
|
||||
memset(&match_[0], 0, ncapture_*sizeof match_[0]);
|
||||
|
||||
// Loop over the text, stepping the machine.
|
||||
for (const char* p = text.begin();; p++) {
|
||||
for (const char* p = text.data();; p++) {
|
||||
if (ExtraDebug) {
|
||||
int c = 0;
|
||||
if (p == context.begin())
|
||||
if (p == btext_)
|
||||
c = '^';
|
||||
else if (p > text.end())
|
||||
else if (p > etext_)
|
||||
c = '$';
|
||||
else if (p < text.end())
|
||||
else if (p < etext_)
|
||||
c = p[0] & 0xFF;
|
||||
|
||||
fprintf(stderr, "%c:", c);
|
||||
@ -524,14 +528,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
||||
}
|
||||
|
||||
// This is a no-op the first time around the loop because runq is empty.
|
||||
int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p);
|
||||
int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
|
||||
DCHECK_EQ(runq->size(), 0);
|
||||
using std::swap;
|
||||
swap(nextq, runq);
|
||||
nextq->clear();
|
||||
if (id != 0) {
|
||||
// We're done: full match ahead.
|
||||
p = text.end();
|
||||
p = etext_;
|
||||
for (;;) {
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
switch (ip->opcode()) {
|
||||
@ -559,30 +563,28 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
||||
break;
|
||||
}
|
||||
|
||||
if (p > text.end())
|
||||
if (p > etext_)
|
||||
break;
|
||||
|
||||
// Start a new thread if there have not been any matches.
|
||||
// (No point in starting a new thread if there have been
|
||||
// matches, since it would be to the right of the match
|
||||
// we already found.)
|
||||
if (!matched_ && (!anchored || p == text.begin())) {
|
||||
// If there's a required first byte for an unanchored search
|
||||
// and we're not in the middle of any possible matches,
|
||||
// use memchr to search for the byte quickly.
|
||||
int fb = prog_->first_byte();
|
||||
if (!matched_ && (!anchored || p == text.data())) {
|
||||
// Try to use prefix accel (e.g. memchr) to skip ahead.
|
||||
// The search must be unanchored and there must be zero
|
||||
// possible matches already.
|
||||
if (!anchored && runq->size() == 0 &&
|
||||
fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
|
||||
p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
|
||||
if (p == NULL) {
|
||||
p = text.end();
|
||||
}
|
||||
p < etext_ && prog_->can_prefix_accel()) {
|
||||
p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
|
||||
if (p == NULL)
|
||||
p = etext_;
|
||||
}
|
||||
|
||||
Thread* t = AllocThread();
|
||||
CopyCapture(t->capture, match_);
|
||||
t->capture[0] = p;
|
||||
AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p,
|
||||
AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
|
||||
t);
|
||||
Decref(t);
|
||||
}
|
||||
@ -593,10 +595,24 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
||||
fprintf(stderr, "dead\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||
// by simply not continuing the loop.
|
||||
// This complements the special case in NFA::Step().
|
||||
if (p == NULL) {
|
||||
(void) Step(runq, nextq, -1, context, p);
|
||||
DCHECK_EQ(runq->size(), 0);
|
||||
using std::swap;
|
||||
swap(nextq, runq);
|
||||
nextq->clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
|
||||
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
|
||||
if (i->value() != NULL)
|
||||
Decref(i->value());
|
||||
}
|
||||
|
||||
if (matched_) {
|
||||
for (int i = 0; i < nsubmatch; i++)
|
||||
@ -605,73 +621,13 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
|
||||
static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
|
||||
if (ExtraDebug)
|
||||
fprintf(stderr, "match (%td,%td)\n",
|
||||
match_[0] - btext_, match_[1] - btext_);
|
||||
match_[0] - btext_,
|
||||
match_[1] - btext_);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Computes whether all successful matches have a common first byte,
|
||||
// and if so, returns that byte. If not, returns -1.
|
||||
int Prog::ComputeFirstByte() {
|
||||
int b = -1;
|
||||
SparseSet q(size());
|
||||
q.insert(start());
|
||||
for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) {
|
||||
int id = *it;
|
||||
Prog::Inst* ip = inst(id);
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte";
|
||||
break;
|
||||
|
||||
case kInstMatch:
|
||||
// The empty string matches: no first byte.
|
||||
return -1;
|
||||
|
||||
case kInstByteRange:
|
||||
if (!ip->last())
|
||||
q.insert(id+1);
|
||||
|
||||
// Must match only a single byte
|
||||
if (ip->lo() != ip->hi())
|
||||
return -1;
|
||||
if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')
|
||||
return -1;
|
||||
// If we haven't seen any bytes yet, record it;
|
||||
// otherwise must match the one we saw before.
|
||||
if (b == -1)
|
||||
b = ip->lo();
|
||||
else if (b != ip->lo())
|
||||
return -1;
|
||||
break;
|
||||
|
||||
case kInstNop:
|
||||
case kInstCapture:
|
||||
case kInstEmptyWidth:
|
||||
if (!ip->last())
|
||||
q.insert(id+1);
|
||||
|
||||
// Continue on.
|
||||
// Ignore ip->empty() flags for kInstEmptyWidth
|
||||
// in order to be as conservative as possible
|
||||
// (assume all possible empty-width flags are true).
|
||||
if (ip->out())
|
||||
q.insert(ip->out());
|
||||
break;
|
||||
|
||||
case kInstAltMatch:
|
||||
DCHECK(!ip->last());
|
||||
q.insert(id+1);
|
||||
break;
|
||||
|
||||
case kInstFail:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
bool
|
||||
Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
|
||||
Anchor anchor, MatchKind kind,
|
||||
|
14
extern/re2/re2/onepass.cc
vendored
14
extern/re2/re2/onepass.cc
vendored
@ -59,11 +59,11 @@
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "util/sparse_set.h"
|
||||
#include "util/strutil.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/sparse_set.h"
|
||||
#include "re2/stringpiece.h"
|
||||
|
||||
// Silence "zero-sized array in struct/union" warning for OneState::action.
|
||||
@ -235,7 +235,7 @@ bool Prog::SearchOnePass(const StringPiece& text,
|
||||
matchcap[i] = NULL;
|
||||
|
||||
StringPiece context = const_context;
|
||||
if (context.begin() == NULL)
|
||||
if (context.data() == NULL)
|
||||
context = text;
|
||||
if (anchor_start() && context.begin() != text.begin())
|
||||
return false;
|
||||
@ -249,8 +249,8 @@ bool Prog::SearchOnePass(const StringPiece& text,
|
||||
// start() is always mapped to the zeroth OneState.
|
||||
OneState* state = IndexToNode(nodes, statesize, 0);
|
||||
uint8_t* bytemap = bytemap_;
|
||||
const char* bp = text.begin();
|
||||
const char* ep = text.end();
|
||||
const char* bp = text.data();
|
||||
const char* ep = text.data() + text.size();
|
||||
const char* p;
|
||||
bool matched = false;
|
||||
matchcap[0] = bp;
|
||||
@ -550,7 +550,7 @@ bool Prog::IsOnePass() {
|
||||
if (!AddQ(&workq, ip->out())) {
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << StringPrintf(
|
||||
"Not OnePass: multiple paths %d -> %d\n", *it, ip->out());
|
||||
"Not OnePass: multiple paths %d -> %d", *it, ip->out());
|
||||
goto fail;
|
||||
}
|
||||
id = ip->out();
|
||||
@ -561,7 +561,7 @@ bool Prog::IsOnePass() {
|
||||
// (3) is violated
|
||||
if (ExtraDebug)
|
||||
LOG(ERROR) << StringPrintf(
|
||||
"Not OnePass: multiple matches from %d\n", *it);
|
||||
"Not OnePass: multiple matches from %d", *it);
|
||||
goto fail;
|
||||
}
|
||||
matched = true;
|
||||
|
113
extern/re2/re2/parse.cc
vendored
113
extern/re2/re2/parse.cc
vendored
@ -27,9 +27,9 @@
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "util/strutil.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/stringpiece.h"
|
||||
#include "re2/unicode_casefold.h"
|
||||
@ -93,7 +93,7 @@ class Regexp::ParseState {
|
||||
bool PushSimpleOp(RegexpOp op);
|
||||
|
||||
// Pushes a ^ onto the stack.
|
||||
bool PushCarat();
|
||||
bool PushCaret();
|
||||
|
||||
// Pushes a \b (word == true) or \B (word == false) onto the stack.
|
||||
bool PushWordBoundary(bool word);
|
||||
@ -423,7 +423,7 @@ bool Regexp::ParseState::PushLiteral(Rune r) {
|
||||
}
|
||||
|
||||
// Pushes a ^ onto the stack.
|
||||
bool Regexp::ParseState::PushCarat() {
|
||||
bool Regexp::ParseState::PushCaret() {
|
||||
if (flags_ & OneLine) {
|
||||
return PushSimpleOp(kRegexpBeginText);
|
||||
}
|
||||
@ -556,9 +556,10 @@ int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
|
||||
}
|
||||
|
||||
int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
|
||||
// This should never be called, since we use Walk and not
|
||||
// WalkExponential.
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -684,7 +685,7 @@ bool Regexp::ParseState::DoRightParen() {
|
||||
if ((r1 = stacktop_) == NULL ||
|
||||
(r2 = r1->down_) == NULL ||
|
||||
r2->op() != kLeftParen) {
|
||||
status_->set_code(kRegexpMissingParen);
|
||||
status_->set_code(kRegexpUnexpectedParen);
|
||||
status_->set_error_arg(whole_regexp_);
|
||||
return false;
|
||||
}
|
||||
@ -1323,14 +1324,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
|
||||
// Parses a decimal integer, storing it in *np.
|
||||
// Sets *s to span the remainder of the string.
|
||||
static bool ParseInteger(StringPiece* s, int* np) {
|
||||
if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
|
||||
if (s->empty() || !isdigit((*s)[0] & 0xFF))
|
||||
return false;
|
||||
// Disallow leading zeros.
|
||||
if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
|
||||
return false;
|
||||
int n = 0;
|
||||
int c;
|
||||
while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
|
||||
while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
|
||||
// Avoid overflow.
|
||||
if (n >= 100000000)
|
||||
return false;
|
||||
@ -1352,16 +1353,16 @@ static bool ParseInteger(StringPiece* s, int* np) {
|
||||
// s must NOT be edited unless MaybeParseRepetition returns true.
|
||||
static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
|
||||
StringPiece s = *sp;
|
||||
if (s.size() == 0 || s[0] != '{')
|
||||
if (s.empty() || s[0] != '{')
|
||||
return false;
|
||||
s.remove_prefix(1); // '{'
|
||||
if (!ParseInteger(&s, lo))
|
||||
return false;
|
||||
if (s.size() == 0)
|
||||
if (s.empty())
|
||||
return false;
|
||||
if (s[0] == ',') {
|
||||
s.remove_prefix(1); // ','
|
||||
if (s.size() == 0)
|
||||
if (s.empty())
|
||||
return false;
|
||||
if (s[0] == '}') {
|
||||
// {2,} means at least 2
|
||||
@ -1375,7 +1376,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
|
||||
// {2} means exactly two
|
||||
*hi = *lo;
|
||||
}
|
||||
if (s.size() == 0 || s[0] != '}')
|
||||
if (s.empty() || s[0] != '}')
|
||||
return false;
|
||||
s.remove_prefix(1); // '}'
|
||||
*sp = s;
|
||||
@ -1416,7 +1417,7 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
|
||||
static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
|
||||
StringPiece t = s;
|
||||
Rune r;
|
||||
while (t.size() > 0) {
|
||||
while (!t.empty()) {
|
||||
if (StringPieceToRune(&r, &t, status) < 0)
|
||||
return false;
|
||||
}
|
||||
@ -1447,14 +1448,14 @@ static int UnHex(int c) {
|
||||
// Sets *rp to the named character.
|
||||
static bool ParseEscape(StringPiece* s, Rune* rp,
|
||||
RegexpStatus* status, int rune_max) {
|
||||
const char* begin = s->begin();
|
||||
if (s->size() < 1 || (*s)[0] != '\\') {
|
||||
const char* begin = s->data();
|
||||
if (s->empty() || (*s)[0] != '\\') {
|
||||
// Should not happen - caller always checks.
|
||||
status->set_code(kRegexpInternalError);
|
||||
status->set_error_arg(StringPiece());
|
||||
return false;
|
||||
}
|
||||
if (s->size() < 2) {
|
||||
if (s->size() == 1) {
|
||||
status->set_code(kRegexpTrailingBackslash);
|
||||
status->set_error_arg(StringPiece());
|
||||
return false;
|
||||
@ -1485,16 +1486,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
||||
case '6':
|
||||
case '7':
|
||||
// Single non-zero octal digit is a backreference; not supported.
|
||||
if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7')
|
||||
if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
|
||||
goto BadEscape;
|
||||
FALLTHROUGH_INTENDED;
|
||||
case '0':
|
||||
// consume up to three octal digits; already have one.
|
||||
code = c - '0';
|
||||
if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') {
|
||||
if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
|
||||
code = code * 8 + c - '0';
|
||||
s->remove_prefix(1); // digit
|
||||
if (s->size() > 0) {
|
||||
if (!s->empty()) {
|
||||
c = (*s)[0];
|
||||
if ('0' <= c && c <= '7') {
|
||||
code = code * 8 + c - '0';
|
||||
@ -1509,7 +1510,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
||||
|
||||
// Hexadecimal escapes
|
||||
case 'x':
|
||||
if (s->size() == 0)
|
||||
if (s->empty())
|
||||
goto BadEscape;
|
||||
if (StringPieceToRune(&c, s, status) < 0)
|
||||
return false;
|
||||
@ -1529,7 +1530,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
||||
code = code * 16 + UnHex(c);
|
||||
if (code > rune_max)
|
||||
goto BadEscape;
|
||||
if (s->size() == 0)
|
||||
if (s->empty())
|
||||
goto BadEscape;
|
||||
if (StringPieceToRune(&c, s, status) < 0)
|
||||
return false;
|
||||
@ -1540,7 +1541,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
||||
return true;
|
||||
}
|
||||
// Easy case: two hex digits.
|
||||
if (s->size() == 0)
|
||||
if (s->empty())
|
||||
goto BadEscape;
|
||||
if (StringPieceToRune(&c1, s, status) < 0)
|
||||
return false;
|
||||
@ -1590,7 +1591,7 @@ BadEscape:
|
||||
// Unrecognized escape sequence.
|
||||
status->set_code(kRegexpBadEscape);
|
||||
status->set_error_arg(
|
||||
StringPiece(begin, static_cast<size_t>(s->begin() - begin)));
|
||||
StringPiece(begin, static_cast<size_t>(s->data() - begin)));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1710,7 +1711,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl
|
||||
return NULL;
|
||||
// Could use StringPieceToRune, but there aren't
|
||||
// any non-ASCII Perl group names.
|
||||
StringPiece name(s->begin(), 2);
|
||||
StringPiece name(s->data(), 2);
|
||||
const UGroup *g = LookupPerlGroup(name);
|
||||
if (g == NULL)
|
||||
return NULL;
|
||||
@ -1750,8 +1751,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
|
||||
return kParseError;
|
||||
if (c != '{') {
|
||||
// Name is the bit of string we just skipped over for c.
|
||||
const char* p = seq.begin() + 2;
|
||||
name = StringPiece(p, static_cast<size_t>(s->begin() - p));
|
||||
const char* p = seq.data() + 2;
|
||||
name = StringPiece(p, static_cast<size_t>(s->data() - p));
|
||||
} else {
|
||||
// Name is in braces. Look for closing }
|
||||
size_t end = s->find('}', 0);
|
||||
@ -1762,16 +1763,16 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
|
||||
status->set_error_arg(seq);
|
||||
return kParseError;
|
||||
}
|
||||
name = StringPiece(s->begin(), end); // without '}'
|
||||
name = StringPiece(s->data(), end); // without '}'
|
||||
s->remove_prefix(end + 1); // with '}'
|
||||
if (!IsValidUTF8(name, status))
|
||||
return kParseError;
|
||||
}
|
||||
|
||||
// Chop seq where s now begins.
|
||||
seq = StringPiece(seq.begin(), static_cast<size_t>(s->begin() - seq.begin()));
|
||||
seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
|
||||
|
||||
if (name.size() > 0 && name[0] == '^') {
|
||||
if (!name.empty() && name[0] == '^') {
|
||||
sign = -sign;
|
||||
name.remove_prefix(1); // '^'
|
||||
}
|
||||
@ -1801,14 +1802,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
|
||||
|
||||
// Convert the UnicodeSet to a URange32 and UGroup that we can add.
|
||||
int nr = uset.getRangeCount();
|
||||
URange32* r = new URange32[nr];
|
||||
PODArray<URange32> r(nr);
|
||||
for (int i = 0; i < nr; i++) {
|
||||
r[i].lo = uset.getRangeStart(i);
|
||||
r[i].hi = uset.getRangeEnd(i);
|
||||
}
|
||||
UGroup g = {"", +1, 0, 0, r, nr};
|
||||
UGroup g = {"", +1, 0, 0, r.data(), nr};
|
||||
AddUGroup(cc, &g, sign, parse_flags);
|
||||
delete[] r;
|
||||
#endif
|
||||
|
||||
return kParseOk;
|
||||
@ -1858,7 +1858,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
|
||||
bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
|
||||
const StringPiece& whole_class,
|
||||
RegexpStatus* status) {
|
||||
if (s->size() == 0) {
|
||||
if (s->empty()) {
|
||||
status->set_code(kRegexpMissingBracket);
|
||||
status->set_error_arg(whole_class);
|
||||
return false;
|
||||
@ -1866,7 +1866,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
|
||||
|
||||
// Allow regular escape sequences even though
|
||||
// many need not be escaped in this context.
|
||||
if (s->size() >= 1 && (*s)[0] == '\\')
|
||||
if ((*s)[0] == '\\')
|
||||
return ParseEscape(s, rp, status, rune_max_);
|
||||
|
||||
// Otherwise take the next rune.
|
||||
@ -1908,7 +1908,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
||||
Regexp** out_re,
|
||||
RegexpStatus* status) {
|
||||
StringPiece whole_class = *s;
|
||||
if (s->size() == 0 || (*s)[0] != '[') {
|
||||
if (s->empty() || (*s)[0] != '[') {
|
||||
// Caller checked this.
|
||||
status->set_code(kRegexpInternalError);
|
||||
status->set_error_arg(StringPiece());
|
||||
@ -1918,7 +1918,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
||||
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
|
||||
re->ccb_ = new CharClassBuilder;
|
||||
s->remove_prefix(1); // '['
|
||||
if (s->size() > 0 && (*s)[0] == '^') {
|
||||
if (!s->empty() && (*s)[0] == '^') {
|
||||
s->remove_prefix(1); // '^'
|
||||
negated = true;
|
||||
if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
|
||||
@ -1928,7 +1928,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
||||
}
|
||||
}
|
||||
bool first = true; // ] is okay as first char in class
|
||||
while (s->size() > 0 && ((*s)[0] != ']' || first)) {
|
||||
while (!s->empty() && ((*s)[0] != ']' || first)) {
|
||||
// - is only okay unescaped as first or last in class.
|
||||
// Except that Perl allows - anywhere.
|
||||
if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
|
||||
@ -1996,7 +1996,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
||||
// in the flags.
|
||||
re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
|
||||
}
|
||||
if (s->size() == 0) {
|
||||
if (s->empty()) {
|
||||
status->set_code(kRegexpMissingBracket);
|
||||
status->set_error_arg(whole_class);
|
||||
re->Decref();
|
||||
@ -2016,7 +2016,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
|
||||
// Python rejects names starting with digits.
|
||||
// We don't enforce either of those.
|
||||
static bool IsValidCaptureName(const StringPiece& name) {
|
||||
if (name.size() == 0)
|
||||
if (name.empty())
|
||||
return false;
|
||||
for (size_t i = 0; i < name.size(); i++) {
|
||||
int c = name[i];
|
||||
@ -2074,8 +2074,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
||||
}
|
||||
|
||||
// t is "P<name>...", t[end] == '>'
|
||||
StringPiece capture(t.begin()-2, end+3); // "(?P<name>"
|
||||
StringPiece name(t.begin()+2, end-2); // "name"
|
||||
StringPiece capture(t.data()-2, end+3); // "(?P<name>"
|
||||
StringPiece name(t.data()+2, end-2); // "name"
|
||||
if (!IsValidUTF8(name, status_))
|
||||
return false;
|
||||
if (!IsValidCaptureName(name)) {
|
||||
@ -2089,7 +2089,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
||||
return false;
|
||||
}
|
||||
|
||||
s->remove_prefix(static_cast<size_t>(capture.end() - s->begin()));
|
||||
s->remove_prefix(
|
||||
static_cast<size_t>(capture.data() + capture.size() - s->data()));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2098,7 +2099,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
||||
int nflags = flags_;
|
||||
Rune c;
|
||||
for (bool done = false; !done; ) {
|
||||
if (t.size() == 0)
|
||||
if (t.empty())
|
||||
goto BadPerlOp;
|
||||
if (StringPieceToRune(&c, &t, status_) < 0)
|
||||
return false;
|
||||
@ -2173,7 +2174,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
||||
BadPerlOp:
|
||||
status_->set_code(kRegexpBadPerlOp);
|
||||
status_->set_error_arg(
|
||||
StringPiece(s->begin(), static_cast<size_t>(t.begin() - s->begin())));
|
||||
StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -2216,7 +2217,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
||||
|
||||
if (global_flags & Literal) {
|
||||
// Special parse loop for literal string.
|
||||
while (t.size() > 0) {
|
||||
while (!t.empty()) {
|
||||
Rune r;
|
||||
if (StringPieceToRune(&r, &t, status) < 0)
|
||||
return NULL;
|
||||
@ -2227,7 +2228,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
||||
}
|
||||
|
||||
StringPiece lastunary = StringPiece();
|
||||
while (t.size() > 0) {
|
||||
while (!t.empty()) {
|
||||
StringPiece isunary = StringPiece();
|
||||
switch (t[0]) {
|
||||
default: {
|
||||
@ -2270,7 +2271,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
||||
break;
|
||||
|
||||
case '^': // Beginning of line.
|
||||
if (!ps.PushCarat())
|
||||
if (!ps.PushCaret())
|
||||
return NULL;
|
||||
t.remove_prefix(1); // '^'
|
||||
break;
|
||||
@ -2311,18 +2312,18 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
||||
bool nongreedy = false;
|
||||
t.remove_prefix(1); // '*' or '+' or '?'
|
||||
if (ps.flags() & PerlX) {
|
||||
if (t.size() > 0 && t[0] == '?') {
|
||||
if (!t.empty() && t[0] == '?') {
|
||||
nongreedy = true;
|
||||
t.remove_prefix(1); // '?'
|
||||
}
|
||||
if (lastunary.size() > 0) {
|
||||
if (!lastunary.empty()) {
|
||||
// In Perl it is not allowed to stack repetition operators:
|
||||
// a** is a syntax error, not a double-star.
|
||||
// (and a++ means something else entirely, which we don't support!)
|
||||
status->set_code(kRegexpRepeatOp);
|
||||
status->set_error_arg(StringPiece(
|
||||
lastunary.begin(),
|
||||
static_cast<size_t>(t.begin() - lastunary.begin())));
|
||||
lastunary.data(),
|
||||
static_cast<size_t>(t.data() - lastunary.data())));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -2346,16 +2347,16 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
||||
}
|
||||
bool nongreedy = false;
|
||||
if (ps.flags() & PerlX) {
|
||||
if (t.size() > 0 && t[0] == '?') {
|
||||
if (!t.empty() && t[0] == '?') {
|
||||
nongreedy = true;
|
||||
t.remove_prefix(1); // '?'
|
||||
}
|
||||
if (lastunary.size() > 0) {
|
||||
if (!lastunary.empty()) {
|
||||
// Not allowed to stack repetition operators.
|
||||
status->set_code(kRegexpRepeatOp);
|
||||
status->set_error_arg(StringPiece(
|
||||
lastunary.begin(),
|
||||
static_cast<size_t>(t.begin() - lastunary.begin())));
|
||||
lastunary.data(),
|
||||
static_cast<size_t>(t.data() - lastunary.data())));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -2404,7 +2405,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
|
||||
|
||||
if (t[1] == 'Q') { // \Q ... \E: the ... is always literals
|
||||
t.remove_prefix(2); // '\\', 'Q'
|
||||
while (t.size() > 0) {
|
||||
while (!t.empty()) {
|
||||
if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
|
||||
t.remove_prefix(2); // '\\', 'E'
|
||||
break;
|
||||
|
68
extern/re2/re2/perl_groups.cc
vendored
68
extern/re2/re2/perl_groups.cc
vendored
@ -20,12 +20,12 @@ static const URange16 code3[] = { /* \w */
|
||||
{ 0x61, 0x7a },
|
||||
};
|
||||
const UGroup perl_groups[] = {
|
||||
{ "\\d", +1, code1, 1 },
|
||||
{ "\\D", -1, code1, 1 },
|
||||
{ "\\s", +1, code2, 3 },
|
||||
{ "\\S", -1, code2, 3 },
|
||||
{ "\\w", +1, code3, 4 },
|
||||
{ "\\W", -1, code3, 4 },
|
||||
{ "\\d", +1, code1, 1, 0, 0 },
|
||||
{ "\\D", -1, code1, 1, 0, 0 },
|
||||
{ "\\s", +1, code2, 3, 0, 0 },
|
||||
{ "\\S", -1, code2, 3, 0, 0 },
|
||||
{ "\\w", +1, code3, 4, 0, 0 },
|
||||
{ "\\W", -1, code3, 4, 0, 0 },
|
||||
};
|
||||
const int num_perl_groups = 6;
|
||||
static const URange16 code4[] = { /* [:alnum:] */
|
||||
@ -85,34 +85,34 @@ static const URange16 code17[] = { /* [:xdigit:] */
|
||||
{ 0x61, 0x66 },
|
||||
};
|
||||
const UGroup posix_groups[] = {
|
||||
{ "[:alnum:]", +1, code4, 3 },
|
||||
{ "[:^alnum:]", -1, code4, 3 },
|
||||
{ "[:alpha:]", +1, code5, 2 },
|
||||
{ "[:^alpha:]", -1, code5, 2 },
|
||||
{ "[:ascii:]", +1, code6, 1 },
|
||||
{ "[:^ascii:]", -1, code6, 1 },
|
||||
{ "[:blank:]", +1, code7, 2 },
|
||||
{ "[:^blank:]", -1, code7, 2 },
|
||||
{ "[:cntrl:]", +1, code8, 2 },
|
||||
{ "[:^cntrl:]", -1, code8, 2 },
|
||||
{ "[:digit:]", +1, code9, 1 },
|
||||
{ "[:^digit:]", -1, code9, 1 },
|
||||
{ "[:graph:]", +1, code10, 1 },
|
||||
{ "[:^graph:]", -1, code10, 1 },
|
||||
{ "[:lower:]", +1, code11, 1 },
|
||||
{ "[:^lower:]", -1, code11, 1 },
|
||||
{ "[:print:]", +1, code12, 1 },
|
||||
{ "[:^print:]", -1, code12, 1 },
|
||||
{ "[:punct:]", +1, code13, 4 },
|
||||
{ "[:^punct:]", -1, code13, 4 },
|
||||
{ "[:space:]", +1, code14, 2 },
|
||||
{ "[:^space:]", -1, code14, 2 },
|
||||
{ "[:upper:]", +1, code15, 1 },
|
||||
{ "[:^upper:]", -1, code15, 1 },
|
||||
{ "[:word:]", +1, code16, 4 },
|
||||
{ "[:^word:]", -1, code16, 4 },
|
||||
{ "[:xdigit:]", +1, code17, 3 },
|
||||
{ "[:^xdigit:]", -1, code17, 3 },
|
||||
{ "[:alnum:]", +1, code4, 3, 0, 0 },
|
||||
{ "[:^alnum:]", -1, code4, 3, 0, 0 },
|
||||
{ "[:alpha:]", +1, code5, 2, 0, 0 },
|
||||
{ "[:^alpha:]", -1, code5, 2, 0, 0 },
|
||||
{ "[:ascii:]", +1, code6, 1, 0, 0 },
|
||||
{ "[:^ascii:]", -1, code6, 1, 0, 0 },
|
||||
{ "[:blank:]", +1, code7, 2, 0, 0 },
|
||||
{ "[:^blank:]", -1, code7, 2, 0, 0 },
|
||||
{ "[:cntrl:]", +1, code8, 2, 0, 0 },
|
||||
{ "[:^cntrl:]", -1, code8, 2, 0, 0 },
|
||||
{ "[:digit:]", +1, code9, 1, 0, 0 },
|
||||
{ "[:^digit:]", -1, code9, 1, 0, 0 },
|
||||
{ "[:graph:]", +1, code10, 1, 0, 0 },
|
||||
{ "[:^graph:]", -1, code10, 1, 0, 0 },
|
||||
{ "[:lower:]", +1, code11, 1, 0, 0 },
|
||||
{ "[:^lower:]", -1, code11, 1, 0, 0 },
|
||||
{ "[:print:]", +1, code12, 1, 0, 0 },
|
||||
{ "[:^print:]", -1, code12, 1, 0, 0 },
|
||||
{ "[:punct:]", +1, code13, 4, 0, 0 },
|
||||
{ "[:^punct:]", -1, code13, 4, 0, 0 },
|
||||
{ "[:space:]", +1, code14, 2, 0, 0 },
|
||||
{ "[:^space:]", -1, code14, 2, 0, 0 },
|
||||
{ "[:upper:]", +1, code15, 1, 0, 0 },
|
||||
{ "[:^upper:]", -1, code15, 1, 0, 0 },
|
||||
{ "[:word:]", +1, code16, 4, 0, 0 },
|
||||
{ "[:^word:]", -1, code16, 4, 0, 0 },
|
||||
{ "[:xdigit:]", +1, code17, 3, 0, 0 },
|
||||
{ "[:^xdigit:]", -1, code17, 3, 0, 0 },
|
||||
};
|
||||
const int num_posix_groups = 28;
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef UTIL_POD_ARRAY_H_
|
||||
#define UTIL_POD_ARRAY_H_
|
||||
#ifndef RE2_POD_ARRAY_H_
|
||||
#define RE2_POD_ARRAY_H_
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
@ -13,7 +13,7 @@ namespace re2 {
|
||||
template <typename T>
|
||||
class PODArray {
|
||||
public:
|
||||
static_assert(std::is_pod<T>::value,
|
||||
static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
|
||||
"T must be POD");
|
||||
|
||||
PODArray()
|
||||
@ -52,4 +52,4 @@ class PODArray {
|
||||
|
||||
} // namespace re2
|
||||
|
||||
#endif // UTIL_POD_ARRAY_H_
|
||||
#endif // RE2_POD_ARRAY_H_
|
5
extern/re2/re2/prefilter.cc
vendored
5
extern/re2/re2/prefilter.cc
vendored
@ -648,14 +648,15 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
|
||||
return NULL;
|
||||
|
||||
Regexp* simple = re->Simplify();
|
||||
Prefilter::Info *info = BuildInfo(simple);
|
||||
if (simple == NULL)
|
||||
return NULL;
|
||||
|
||||
Prefilter::Info* info = BuildInfo(simple);
|
||||
simple->Decref();
|
||||
if (info == NULL)
|
||||
return NULL;
|
||||
|
||||
Prefilter* m = info->TakeMatch();
|
||||
|
||||
delete info;
|
||||
return m;
|
||||
}
|
||||
|
4
extern/re2/re2/prefilter_tree.cc
vendored
4
extern/re2/re2/prefilter_tree.cc
vendored
@ -107,7 +107,7 @@ void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
|
||||
|
||||
Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
|
||||
std::string node_string = NodeString(node);
|
||||
std::map<std::string, Prefilter*>::iterator iter = nodes->find(node_string);
|
||||
NodeMap::iterator iter = nodes->find(node_string);
|
||||
if (iter == nodes->end())
|
||||
return NULL;
|
||||
return (*iter).second;
|
||||
@ -377,7 +377,7 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
|
||||
LOG(ERROR) << it->first;
|
||||
}
|
||||
LOG(ERROR) << "Map:";
|
||||
for (std::map<std::string, Prefilter*>::const_iterator iter = nodes->begin();
|
||||
for (NodeMap::const_iterator iter = nodes->begin();
|
||||
iter != nodes->end(); ++iter)
|
||||
LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
|
||||
<< " Str: " << (*iter).first;
|
||||
|
2
extern/re2/re2/prefilter_tree.h
vendored
2
extern/re2/re2/prefilter_tree.h
vendored
@ -21,8 +21,8 @@
|
||||
#include <vector>
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/sparse_array.h"
|
||||
#include "re2/prefilter.h"
|
||||
#include "re2/sparse_array.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
|
149
extern/re2/re2/prog.cc
vendored
149
extern/re2/re2/prog.cc
vendored
@ -7,6 +7,12 @@
|
||||
|
||||
#include "re2/prog.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#include <immintrin.h>
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
@ -109,8 +115,9 @@ Prog::Prog()
|
||||
start_unanchored_(0),
|
||||
size_(0),
|
||||
bytemap_range_(0),
|
||||
first_byte_(-1),
|
||||
flags_(0),
|
||||
prefix_size_(0),
|
||||
prefix_front_(-1),
|
||||
prefix_back_(-1),
|
||||
list_count_(0),
|
||||
dfa_mem_(0),
|
||||
dfa_first_(NULL),
|
||||
@ -185,14 +192,31 @@ std::string Prog::DumpByteMap() {
|
||||
return map;
|
||||
}
|
||||
|
||||
int Prog::first_byte() {
|
||||
std::call_once(first_byte_once_, [](Prog* prog) {
|
||||
prog->first_byte_ = prog->ComputeFirstByte();
|
||||
}, this);
|
||||
return first_byte_;
|
||||
}
|
||||
// Is ip a guaranteed match at end of text, perhaps after some capturing?
|
||||
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
|
||||
for (;;) {
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
||||
return false;
|
||||
|
||||
static bool IsMatch(Prog*, Prog::Inst*);
|
||||
case kInstAlt:
|
||||
case kInstAltMatch:
|
||||
case kInstByteRange:
|
||||
case kInstFail:
|
||||
case kInstEmptyWidth:
|
||||
return false;
|
||||
|
||||
case kInstCapture:
|
||||
case kInstNop:
|
||||
ip = prog->inst(ip->out());
|
||||
break;
|
||||
|
||||
case kInstMatch:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Peep-hole optimizer.
|
||||
void Prog::Optimize() {
|
||||
@ -258,54 +282,28 @@ void Prog::Optimize() {
|
||||
}
|
||||
}
|
||||
|
||||
// Is ip a guaranteed match at end of text, perhaps after some capturing?
|
||||
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
|
||||
for (;;) {
|
||||
switch (ip->opcode()) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
|
||||
return false;
|
||||
|
||||
case kInstAlt:
|
||||
case kInstAltMatch:
|
||||
case kInstByteRange:
|
||||
case kInstFail:
|
||||
case kInstEmptyWidth:
|
||||
return false;
|
||||
|
||||
case kInstCapture:
|
||||
case kInstNop:
|
||||
ip = prog->inst(ip->out());
|
||||
break;
|
||||
|
||||
case kInstMatch:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
|
||||
int flags = 0;
|
||||
|
||||
// ^ and \A
|
||||
if (p == text.begin())
|
||||
if (p == text.data())
|
||||
flags |= kEmptyBeginText | kEmptyBeginLine;
|
||||
else if (p[-1] == '\n')
|
||||
flags |= kEmptyBeginLine;
|
||||
|
||||
// $ and \z
|
||||
if (p == text.end())
|
||||
if (p == text.data() + text.size())
|
||||
flags |= kEmptyEndText | kEmptyEndLine;
|
||||
else if (p < text.end() && p[0] == '\n')
|
||||
else if (p < text.data() + text.size() && p[0] == '\n')
|
||||
flags |= kEmptyEndLine;
|
||||
|
||||
// \b and \B
|
||||
if (p == text.begin() && p == text.end()) {
|
||||
if (p == text.data() && p == text.data() + text.size()) {
|
||||
// no word boundary here
|
||||
} else if (p == text.begin()) {
|
||||
} else if (p == text.data()) {
|
||||
if (IsWordChar(p[0]))
|
||||
flags |= kEmptyWordBoundary;
|
||||
} else if (p == text.end()) {
|
||||
} else if (p == text.data() + text.size()) {
|
||||
if (IsWordChar(p[-1]))
|
||||
flags |= kEmptyWordBoundary;
|
||||
} else {
|
||||
@ -918,4 +916,73 @@ void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// Finds the least significant non-zero bit in n.
|
||||
static int FindLSBSet(uint32_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
#if defined(__GNUC__)
|
||||
return __builtin_ctz(n);
|
||||
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
||||
unsigned long c;
|
||||
_BitScanForward(&c, n);
|
||||
return static_cast<int>(c);
|
||||
#else
|
||||
int c = 31;
|
||||
for (int shift = 1 << 4; shift != 0; shift >>= 1) {
|
||||
uint32_t word = n << shift;
|
||||
if (word != 0) {
|
||||
n = word;
|
||||
c -= shift;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
|
||||
DCHECK_GE(prefix_size_, 2);
|
||||
if (size < prefix_size_)
|
||||
return NULL;
|
||||
// Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
|
||||
// This also means that probing for prefix_back_ doesn't go out of bounds.
|
||||
size -= prefix_size_-1;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
|
||||
if (size >= sizeof(__m256i)) {
|
||||
const __m256i* fp = reinterpret_cast<const __m256i*>(
|
||||
reinterpret_cast<const char*>(data));
|
||||
const __m256i* bp = reinterpret_cast<const __m256i*>(
|
||||
reinterpret_cast<const char*>(data) + prefix_size_-1);
|
||||
const __m256i* endfp = fp + size/sizeof(__m256i);
|
||||
const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
|
||||
const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
|
||||
while (fp != endfp) {
|
||||
const __m256i f_loadu = _mm256_loadu_si256(fp++);
|
||||
const __m256i b_loadu = _mm256_loadu_si256(bp++);
|
||||
const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
|
||||
const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
|
||||
const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
|
||||
if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
|
||||
const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
|
||||
const int fb_movemask = _mm256_movemask_epi8(fb_and);
|
||||
const int fb_ctz = FindLSBSet(fb_movemask);
|
||||
return reinterpret_cast<const char*>(fp-1) + fb_ctz;
|
||||
}
|
||||
}
|
||||
data = fp;
|
||||
size = size%sizeof(__m256i);
|
||||
}
|
||||
#endif
|
||||
|
||||
const char* p0 = reinterpret_cast<const char*>(data);
|
||||
for (const char* p = p0;; p++) {
|
||||
DCHECK_GE(size, static_cast<size_t>(p-p0));
|
||||
p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
|
||||
if (p == NULL || p[prefix_size_-1] == prefix_back_)
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
36
extern/re2/re2/prog.h
vendored
36
extern/re2/re2/prog.h
vendored
@ -18,10 +18,10 @@
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "util/sparse_array.h"
|
||||
#include "util/sparse_set.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/sparse_array.h"
|
||||
#include "re2/sparse_set.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -198,8 +198,8 @@ class Prog {
|
||||
|
||||
Inst *inst(int id) { return &inst_[id]; }
|
||||
int start() { return start_; }
|
||||
int start_unanchored() { return start_unanchored_; }
|
||||
void set_start(int start) { start_ = start; }
|
||||
int start_unanchored() { return start_unanchored_; }
|
||||
void set_start_unanchored(int start) { start_unanchored_ = start; }
|
||||
int size() { return size_; }
|
||||
bool reversed() { return reversed_; }
|
||||
@ -207,19 +207,27 @@ class Prog {
|
||||
int list_count() { return list_count_; }
|
||||
int inst_count(InstOp op) { return inst_count_[op]; }
|
||||
uint16_t* list_heads() { return list_heads_.data(); }
|
||||
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
|
||||
int64_t dfa_mem() { return dfa_mem_; }
|
||||
int flags() { return flags_; }
|
||||
void set_flags(int flags) { flags_ = flags; }
|
||||
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
|
||||
bool anchor_start() { return anchor_start_; }
|
||||
void set_anchor_start(bool b) { anchor_start_ = b; }
|
||||
bool anchor_end() { return anchor_end_; }
|
||||
void set_anchor_end(bool b) { anchor_end_ = b; }
|
||||
int bytemap_range() { return bytemap_range_; }
|
||||
const uint8_t* bytemap() { return bytemap_; }
|
||||
bool can_prefix_accel() { return prefix_size_ != 0; }
|
||||
|
||||
// Lazily computed.
|
||||
int first_byte();
|
||||
// Accelerates to the first likely occurrence of the prefix.
|
||||
// Returns a pointer to the first byte or NULL if not found.
|
||||
const void* PrefixAccel(const void* data, size_t size) {
|
||||
DCHECK_GE(prefix_size_, 1);
|
||||
return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
|
||||
: PrefixAccel_FrontAndBack(data, size);
|
||||
}
|
||||
|
||||
// An implementation of prefix accel that looks for prefix_front_ and
|
||||
// prefix_back_ to return fewer false positives than memchr(3) alone.
|
||||
const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
|
||||
|
||||
// Returns string representation of program for debugging.
|
||||
std::string Dump();
|
||||
@ -297,10 +305,6 @@ class Prog {
|
||||
// Compute bytemap.
|
||||
void ComputeByteMap();
|
||||
|
||||
// Computes whether all matches must begin with the same first
|
||||
// byte, and if so, returns that byte. If not, returns -1.
|
||||
int ComputeFirstByte();
|
||||
|
||||
// Run peep-hole optimizer on program.
|
||||
void Optimize();
|
||||
|
||||
@ -402,8 +406,9 @@ class Prog {
|
||||
int start_unanchored_; // unanchored entry point for program
|
||||
int size_; // number of instructions
|
||||
int bytemap_range_; // bytemap_[x] < bytemap_range_
|
||||
int first_byte_; // required first byte for match, or -1 if none
|
||||
int flags_; // regexp parse flags
|
||||
size_t prefix_size_; // size of prefix (0 if no prefix)
|
||||
int prefix_front_; // first byte of prefix (-1 if no prefix)
|
||||
int prefix_back_; // last byte of prefix (-1 if no prefix)
|
||||
|
||||
int list_count_; // count of lists (see above)
|
||||
int inst_count_[kNumInst]; // count of instructions by opcode
|
||||
@ -419,7 +424,6 @@ class Prog {
|
||||
|
||||
uint8_t bytemap_[256]; // map from input bytes to byte classes
|
||||
|
||||
std::once_flag first_byte_once_;
|
||||
std::once_flag dfa_first_once_;
|
||||
std::once_flag dfa_longest_once_;
|
||||
|
||||
|
405
extern/re2/re2/re2.cc
vendored
405
extern/re2/re2/re2.cc
vendored
@ -12,10 +12,14 @@
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
@ -24,11 +28,11 @@
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/sparse_array.h"
|
||||
#include "util/strutil.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/sparse_array.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -79,6 +83,8 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
|
||||
return RE2::ErrorMissingBracket;
|
||||
case re2::kRegexpMissingParen:
|
||||
return RE2::ErrorMissingParen;
|
||||
case re2::kRegexpUnexpectedParen:
|
||||
return RE2::ErrorUnexpectedParen;
|
||||
case re2::kRegexpTrailingBackslash:
|
||||
return RE2::ErrorTrailingBackslash;
|
||||
case re2::kRegexpRepeatArgument:
|
||||
@ -172,15 +178,20 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
|
||||
empty_group_names = new std::map<int, std::string>;
|
||||
});
|
||||
|
||||
pattern_ = std::string(pattern);
|
||||
pattern_.assign(pattern.data(), pattern.size());
|
||||
options_.Copy(options);
|
||||
entire_regexp_ = NULL;
|
||||
error_ = empty_string;
|
||||
error_code_ = NoError;
|
||||
error_arg_.clear();
|
||||
prefix_.clear();
|
||||
prefix_foldcase_ = false;
|
||||
suffix_regexp_ = NULL;
|
||||
prog_ = NULL;
|
||||
num_captures_ = -1;
|
||||
is_one_pass_ = false;
|
||||
|
||||
rprog_ = NULL;
|
||||
error_ = empty_string;
|
||||
error_code_ = NoError;
|
||||
named_groups_ = NULL;
|
||||
group_names_ = NULL;
|
||||
|
||||
@ -239,9 +250,11 @@ re2::Prog* RE2::ReverseProg() const {
|
||||
if (re->rprog_ == NULL) {
|
||||
if (re->options_.log_errors())
|
||||
LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
|
||||
re->error_ =
|
||||
new std::string("pattern too large - reverse compile failed");
|
||||
re->error_code_ = RE2::ErrorPatternTooLarge;
|
||||
// We no longer touch error_ and error_code_ because failing to compile
|
||||
// the reverse Prog is not a showstopper: falling back to NFA execution
|
||||
// is fine. More importantly, an RE2 object is supposed to be logically
|
||||
// immutable: whatever ok() would have returned after Init() completed,
|
||||
// it should continue to return that no matter what ReverseProg() does.
|
||||
}
|
||||
}, this);
|
||||
return rprog_;
|
||||
@ -277,28 +290,54 @@ int RE2::ReverseProgramSize() const {
|
||||
return prog->size();
|
||||
}
|
||||
|
||||
static int Fanout(Prog* prog, std::map<int, int>* histogram) {
|
||||
SparseArray<int> fanout(prog->size());
|
||||
prog->Fanout(&fanout);
|
||||
histogram->clear();
|
||||
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
|
||||
// TODO(junyer): Optimise this?
|
||||
int bucket = 0;
|
||||
while (1 << bucket < i->value()) {
|
||||
bucket++;
|
||||
// Finds the most significant non-zero bit in n.
|
||||
static int FindMSBSet(uint32_t n) {
|
||||
DCHECK_NE(n, 0);
|
||||
#if defined(__GNUC__)
|
||||
return 31 ^ __builtin_clz(n);
|
||||
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
||||
unsigned long c;
|
||||
_BitScanReverse(&c, n);
|
||||
return static_cast<int>(c);
|
||||
#else
|
||||
int c = 0;
|
||||
for (int shift = 1 << 4; shift != 0; shift >>= 1) {
|
||||
uint32_t word = n >> shift;
|
||||
if (word != 0) {
|
||||
n = word;
|
||||
c += shift;
|
||||
}
|
||||
(*histogram)[bucket]++;
|
||||
}
|
||||
return histogram->rbegin()->first;
|
||||
return c;
|
||||
#endif
|
||||
}
|
||||
|
||||
int RE2::ProgramFanout(std::map<int, int>* histogram) const {
|
||||
static int Fanout(Prog* prog, std::vector<int>* histogram) {
|
||||
SparseArray<int> fanout(prog->size());
|
||||
prog->Fanout(&fanout);
|
||||
int data[32] = {};
|
||||
int size = 0;
|
||||
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
|
||||
if (i->value() == 0)
|
||||
continue;
|
||||
uint32_t value = i->value();
|
||||
int bucket = FindMSBSet(value);
|
||||
bucket += value & (value-1) ? 1 : 0;
|
||||
++data[bucket];
|
||||
size = std::max(size, bucket+1);
|
||||
}
|
||||
if (histogram != NULL)
|
||||
histogram->assign(data, data+size);
|
||||
return size-1;
|
||||
}
|
||||
|
||||
int RE2::ProgramFanout(std::vector<int>* histogram) const {
|
||||
if (prog_ == NULL)
|
||||
return -1;
|
||||
return Fanout(prog_, histogram);
|
||||
}
|
||||
|
||||
int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
|
||||
int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
|
||||
if (prog_ == NULL)
|
||||
return -1;
|
||||
Prog* prog = ReverseProg();
|
||||
@ -368,6 +407,8 @@ bool RE2::Replace(std::string* str,
|
||||
const StringPiece& rewrite) {
|
||||
StringPiece vec[kVecSize];
|
||||
int nvec = 1 + MaxSubmatch(rewrite);
|
||||
if (nvec > 1 + re.NumberOfCapturingGroups())
|
||||
return false;
|
||||
if (nvec > static_cast<int>(arraysize(vec)))
|
||||
return false;
|
||||
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
|
||||
@ -377,8 +418,8 @@ bool RE2::Replace(std::string* str,
|
||||
if (!re.Rewrite(&s, rewrite, vec, nvec))
|
||||
return false;
|
||||
|
||||
assert(vec[0].begin() >= str->data());
|
||||
assert(vec[0].end() <= str->data()+str->size());
|
||||
assert(vec[0].data() >= str->data());
|
||||
assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
|
||||
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
|
||||
return true;
|
||||
}
|
||||
@ -388,6 +429,8 @@ int RE2::GlobalReplace(std::string* str,
|
||||
const StringPiece& rewrite) {
|
||||
StringPiece vec[kVecSize];
|
||||
int nvec = 1 + MaxSubmatch(rewrite);
|
||||
if (nvec > 1 + re.NumberOfCapturingGroups())
|
||||
return false;
|
||||
if (nvec > static_cast<int>(arraysize(vec)))
|
||||
return false;
|
||||
|
||||
@ -406,9 +449,9 @@ int RE2::GlobalReplace(std::string* str,
|
||||
if (!re.Match(*str, static_cast<size_t>(p - str->data()),
|
||||
str->size(), UNANCHORED, vec, nvec))
|
||||
break;
|
||||
if (p < vec[0].begin())
|
||||
out.append(p, vec[0].begin() - p);
|
||||
if (vec[0].begin() == lastend && vec[0].size() == 0) {
|
||||
if (p < vec[0].data())
|
||||
out.append(p, vec[0].data() - p);
|
||||
if (vec[0].data() == lastend && vec[0].empty()) {
|
||||
// Disallow empty match at end of last match: skip ahead.
|
||||
//
|
||||
// fullrune() takes int, not ptrdiff_t. However, it just looks
|
||||
@ -439,7 +482,7 @@ int RE2::GlobalReplace(std::string* str,
|
||||
continue;
|
||||
}
|
||||
re.Rewrite(&out, rewrite, vec, nvec);
|
||||
p = vec[0].end();
|
||||
p = vec[0].data() + vec[0].size();
|
||||
lastend = p;
|
||||
count++;
|
||||
}
|
||||
@ -460,9 +503,10 @@ bool RE2::Extract(const StringPiece& text,
|
||||
std::string* out) {
|
||||
StringPiece vec[kVecSize];
|
||||
int nvec = 1 + MaxSubmatch(rewrite);
|
||||
if (nvec > 1 + re.NumberOfCapturingGroups())
|
||||
return false;
|
||||
if (nvec > static_cast<int>(arraysize(vec)))
|
||||
return false;
|
||||
|
||||
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
|
||||
return false;
|
||||
|
||||
@ -610,6 +654,8 @@ bool RE2::Match(const StringPiece& text,
|
||||
// If the regexp is anchored explicitly, must not be in middle of text.
|
||||
if (prog_->anchor_start() && startpos != 0)
|
||||
return false;
|
||||
if (prog_->anchor_end() && endpos != text.size())
|
||||
return false;
|
||||
|
||||
// If the regexp is anchored explicitly, update re_anchor
|
||||
// so that we can potentially fall into a faster case below.
|
||||
@ -643,7 +689,6 @@ bool RE2::Match(const StringPiece& text,
|
||||
Prog::MatchKind kind = Prog::kFirstMatch;
|
||||
if (options_.longest_match())
|
||||
kind = Prog::kLongestMatch;
|
||||
bool skipped_test = false;
|
||||
|
||||
bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
|
||||
|
||||
@ -655,38 +700,82 @@ bool RE2::Match(const StringPiece& text,
|
||||
bool can_bit_state = prog_->CanBitState();
|
||||
size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
|
||||
|
||||
#ifdef RE2_HAVE_THREAD_LOCAL
|
||||
hooks::context = this;
|
||||
#endif
|
||||
bool dfa_failed = false;
|
||||
bool skipped_test = false;
|
||||
switch (re_anchor) {
|
||||
default:
|
||||
LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
|
||||
return false;
|
||||
|
||||
case UNANCHORED: {
|
||||
if (!prog_->SearchDFA(subtext, text, anchor, kind,
|
||||
matchp, &dfa_failed, NULL)) {
|
||||
if (prog_->anchor_end()) {
|
||||
// This is a very special case: we don't need the forward DFA because
|
||||
// we already know where the match must end! Instead, the reverse DFA
|
||||
// can say whether there is a match and (optionally) where it starts.
|
||||
Prog* prog = ReverseProg();
|
||||
if (prog == NULL) {
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
}
|
||||
if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
|
||||
Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range() << ", "
|
||||
<< "list count " << prog_->list_count();
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_.size() << ", "
|
||||
<< "program size " << prog->size() << ", "
|
||||
<< "list count " << prog->list_count() << ", "
|
||||
<< "bytemap range " << prog->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (matchp == NULL) // Matched. Don't care where
|
||||
if (matchp == NULL) // Matched. Don't care where.
|
||||
return true;
|
||||
// SearchDFA set match[0].end() but didn't know where the
|
||||
// match started. Run the regexp backward from match[0].end()
|
||||
break;
|
||||
}
|
||||
|
||||
if (!prog_->SearchDFA(subtext, text, anchor, kind,
|
||||
matchp, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_.size() << ", "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (matchp == NULL) // Matched. Don't care where.
|
||||
return true;
|
||||
// SearchDFA set match.end() but didn't know where the
|
||||
// match started. Run the regexp backward from match.end()
|
||||
// to find the longest possible match -- that's where it started.
|
||||
Prog* prog = ReverseProg();
|
||||
if (prog == NULL)
|
||||
return false;
|
||||
if (prog == NULL) {
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
}
|
||||
if (!prog->SearchDFA(match, text, Prog::kAnchored,
|
||||
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: size " << prog->size() << ", "
|
||||
<< "bytemap range " << prog->bytemap_range() << ", "
|
||||
<< "list count " << prog->list_count();
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_.size() << ", "
|
||||
<< "program size " << prog->size() << ", "
|
||||
<< "list count " << prog->list_count() << ", "
|
||||
<< "bytemap range " << prog->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
@ -724,9 +813,11 @@ bool RE2::Match(const StringPiece& text,
|
||||
&match, &dfa_failed, NULL)) {
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range() << ", "
|
||||
<< "list count " << prog_->list_count();
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "pattern length " << pattern_.size() << ", "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
// Fall back to NFA below.
|
||||
skipped_test = true;
|
||||
break;
|
||||
@ -928,13 +1019,13 @@ bool RE2::Rewrite(std::string* out,
|
||||
int n = (c - '0');
|
||||
if (n >= veclen) {
|
||||
if (options_.log_errors()) {
|
||||
LOG(ERROR) << "requested group " << n
|
||||
<< " in regexp " << rewrite.data();
|
||||
LOG(ERROR) << "invalid substitution \\" << n
|
||||
<< " from " << veclen << " groups";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
StringPiece snip = vec[n];
|
||||
if (snip.size() > 0)
|
||||
if (!snip.empty())
|
||||
out->append(snip.data(), snip.size());
|
||||
} else if (c == '\\') {
|
||||
out->push_back('\\');
|
||||
@ -949,41 +1040,49 @@ bool RE2::Rewrite(std::string* out,
|
||||
|
||||
/***** Parsers for various types *****/
|
||||
|
||||
bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
|
||||
namespace re2_internal {
|
||||
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, void* dest) {
|
||||
// We fail if somebody asked us to store into a non-NULL void* pointer
|
||||
return (dest == NULL);
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, std::string* dest) {
|
||||
if (dest == NULL) return true;
|
||||
reinterpret_cast<std::string*>(dest)->assign(str, n);
|
||||
dest->assign(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, StringPiece* dest) {
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
|
||||
*dest = StringPiece(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, char* dest) {
|
||||
if (n != 1) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<char*>(dest)) = str[0];
|
||||
*dest = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, signed char* dest) {
|
||||
if (n != 1) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<signed char*>(dest)) = str[0];
|
||||
*dest = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, unsigned char* dest) {
|
||||
if (n != 1) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned char*>(dest)) = str[0];
|
||||
*dest = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1047,10 +1146,40 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
|
||||
return buf;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_long_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, float* dest) {
|
||||
if (n == 0) return false;
|
||||
static const int kMaxLength = 200;
|
||||
char buf[kMaxLength+1];
|
||||
str = TerminateNumber(buf, sizeof buf, str, &n, true);
|
||||
char* end;
|
||||
errno = 0;
|
||||
float r = strtof(str, &end);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*dest = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, double* dest) {
|
||||
if (n == 0) return false;
|
||||
static const int kMaxLength = 200;
|
||||
char buf[kMaxLength+1];
|
||||
str = TerminateNumber(buf, sizeof buf, str, &n, true);
|
||||
char* end;
|
||||
errno = 0;
|
||||
double r = strtod(str, &end);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*dest = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, long* dest, int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||
@ -1060,14 +1189,12 @@ bool RE2::Arg::parse_long_radix(const char* str,
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<long*>(dest)) = r;
|
||||
*dest = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_ulong_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||
@ -1083,62 +1210,52 @@ bool RE2::Arg::parse_ulong_radix(const char* str,
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned long*>(dest)) = r;
|
||||
*dest = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_short_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, short* dest, int radix) {
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||
if ((short)r != r) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<short*>(dest)) = (short)r;
|
||||
*dest = (short)r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_ushort_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||
if ((unsigned short)r != r) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
|
||||
*dest = (unsigned short)r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_int_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, int* dest, int radix) {
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||
if ((int)r != r) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<int*>(dest)) = (int)r;
|
||||
*dest = (int)r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_uint_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (!Parse(str, n, &r, radix)) return false; // Could not parse
|
||||
if ((unsigned int)r != r) return false; // Out of range
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
|
||||
*dest = (unsigned int)r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_longlong_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, long long* dest, int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||
@ -1148,14 +1265,12 @@ bool RE2::Arg::parse_longlong_radix(const char* str,
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<long long*>(dest)) = r;
|
||||
*dest = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE2::Arg::parse_ulonglong_radix(const char* str,
|
||||
size_t n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
template <>
|
||||
bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
||||
@ -1170,67 +1285,47 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str,
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
*(reinterpret_cast<unsigned long long*>(dest)) = r;
|
||||
*dest = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_double_float(const char* str, size_t n, bool isfloat,
|
||||
void* dest) {
|
||||
if (n == 0) return false;
|
||||
static const int kMaxLength = 200;
|
||||
char buf[kMaxLength+1];
|
||||
str = TerminateNumber(buf, sizeof buf, str, &n, true);
|
||||
char* end;
|
||||
errno = 0;
|
||||
double r;
|
||||
if (isfloat) {
|
||||
r = strtof(str, &end);
|
||||
} else {
|
||||
r = strtod(str, &end);
|
||||
}
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
if (isfloat) {
|
||||
*(reinterpret_cast<float*>(dest)) = (float)r;
|
||||
} else {
|
||||
*(reinterpret_cast<double*>(dest)) = r;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace re2_internal
|
||||
|
||||
bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) {
|
||||
return parse_double_float(str, n, false, dest);
|
||||
}
|
||||
namespace hooks {
|
||||
|
||||
bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
|
||||
return parse_double_float(str, n, true, dest);
|
||||
}
|
||||
#ifdef RE2_HAVE_THREAD_LOCAL
|
||||
thread_local const RE2* context = NULL;
|
||||
#endif
|
||||
|
||||
#define DEFINE_INTEGER_PARSER(name) \
|
||||
bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 10); \
|
||||
} \
|
||||
bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 16); \
|
||||
} \
|
||||
bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 8); \
|
||||
} \
|
||||
bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n, \
|
||||
void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 0); \
|
||||
}
|
||||
template <typename T>
|
||||
union Hook {
|
||||
void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
|
||||
T* Load() const { return cb_.load(std::memory_order_acquire); }
|
||||
|
||||
DEFINE_INTEGER_PARSER(short);
|
||||
DEFINE_INTEGER_PARSER(ushort);
|
||||
DEFINE_INTEGER_PARSER(int);
|
||||
DEFINE_INTEGER_PARSER(uint);
|
||||
DEFINE_INTEGER_PARSER(long);
|
||||
DEFINE_INTEGER_PARSER(ulong);
|
||||
DEFINE_INTEGER_PARSER(longlong);
|
||||
DEFINE_INTEGER_PARSER(ulonglong);
|
||||
#if !defined(__clang__) && defined(_MSC_VER)
|
||||
// Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
|
||||
// this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
|
||||
static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
|
||||
"std::atomic<T*> must be always lock-free");
|
||||
T* cb_for_constinit_;
|
||||
#endif
|
||||
|
||||
#undef DEFINE_INTEGER_PARSER
|
||||
std::atomic<T*> cb_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
static void DoNothing(const T&) {}
|
||||
|
||||
#define DEFINE_HOOK(type, name) \
|
||||
static Hook<type##Callback> name##_hook = {{&DoNothing<type>}}; \
|
||||
void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
|
||||
type##Callback* Get##type##Hook() { return name##_hook.Load(); }
|
||||
|
||||
DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
|
||||
DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
|
||||
|
||||
#undef DEFINE_HOOK
|
||||
|
||||
} // namespace hooks
|
||||
|
||||
} // namespace re2
|
||||
|
405
extern/re2/re2/re2.h
vendored
405
extern/re2/re2/re2.h
vendored
@ -30,6 +30,19 @@
|
||||
// "(?i)hello" -- (?i) turns on case-insensitive matching
|
||||
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
|
||||
//
|
||||
// The double backslashes are needed when writing C++ string literals.
|
||||
// However, they should NOT be used when writing C++11 raw string literals:
|
||||
//
|
||||
// R"(hello (\w+) world)" -- \w matches a "word" character
|
||||
// R"(version (\d+))" -- \d matches a digit
|
||||
// R"(hello\s+world)" -- \s matches any whitespace character
|
||||
// R"(\b(\w+)\b)" -- \b matches non-empty string at word boundary
|
||||
// R"((?i)hello)" -- (?i) turns on case-insensitive matching
|
||||
// R"(/\*(.*?)\*/)" -- .*? matches . minimum no. of times possible
|
||||
//
|
||||
// When using UTF-8 encoding, case-insensitive matching will perform
|
||||
// simple case folding, not full case folding.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// MATCHING INTERFACE:
|
||||
//
|
||||
@ -195,6 +208,12 @@
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#include "re2/stringpiece.h"
|
||||
|
||||
@ -229,6 +248,7 @@ class RE2 {
|
||||
ErrorBadCharRange, // bad character class range
|
||||
ErrorMissingBracket, // missing closing ]
|
||||
ErrorMissingParen, // missing closing )
|
||||
ErrorUnexpectedParen, // unexpected closing )
|
||||
ErrorTrailingBackslash, // trailing \ at end of regexp
|
||||
ErrorRepeatArgument, // repeat argument missing, e.g. "*"
|
||||
ErrorRepeatSize, // bad repetition argument
|
||||
@ -287,11 +307,11 @@ class RE2 {
|
||||
int ProgramSize() const;
|
||||
int ReverseProgramSize() const;
|
||||
|
||||
// EXPERIMENTAL! SUBJECT TO CHANGE!
|
||||
// Outputs the program fanout as a histogram bucketed by powers of 2.
|
||||
// If histogram is not null, outputs the program fanout
|
||||
// as a histogram bucketed by powers of 2.
|
||||
// Returns the number of the largest non-empty bucket.
|
||||
int ProgramFanout(std::map<int, int>* histogram) const;
|
||||
int ReverseProgramFanout(std::map<int, int>* histogram) const;
|
||||
int ProgramFanout(std::vector<int>* histogram) const;
|
||||
int ReverseProgramFanout(std::vector<int>* histogram) const;
|
||||
|
||||
// Returns the underlying Regexp; not for general use.
|
||||
// Returns entire_regexp_ so that callers don't need
|
||||
@ -349,12 +369,12 @@ class RE2 {
|
||||
// (void*)NULL (the corresponding matched sub-pattern is not copied)
|
||||
//
|
||||
// Returns true iff all of the following conditions are satisfied:
|
||||
// a. "text" matches "re" exactly
|
||||
// b. The number of matched sub-patterns is >= number of supplied pointers
|
||||
// a. "text" matches "re" fully - from the beginning to the end of "text".
|
||||
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||
// c. The "i"th argument has a suitable type for holding the
|
||||
// string captured as the "i"th sub-pattern. If you pass in
|
||||
// NULL for the "i"th argument, or pass fewer arguments than
|
||||
// number of sub-patterns, "i"th captured sub-pattern is
|
||||
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||
// ignored.
|
||||
//
|
||||
// CAVEAT: An optional sub-pattern that does not exist in the
|
||||
@ -368,8 +388,17 @@ class RE2 {
|
||||
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
|
||||
}
|
||||
|
||||
// Exactly like FullMatch(), except that "re" is allowed to match
|
||||
// a substring of "text".
|
||||
// Like FullMatch(), except that "re" is allowed to match a substring
|
||||
// of "text".
|
||||
//
|
||||
// Returns true iff all of the following conditions are satisfied:
|
||||
// a. "text" matches "re" partially - for some substring of "text".
|
||||
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||
// c. The "i"th argument has a suitable type for holding the
|
||||
// string captured as the "i"th sub-pattern. If you pass in
|
||||
// NULL for the "i"th argument, or pass fewer arguments than
|
||||
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||
// ignored.
|
||||
template <typename... A>
|
||||
static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
|
||||
return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
|
||||
@ -378,7 +407,16 @@ class RE2 {
|
||||
// Like FullMatch() and PartialMatch(), except that "re" has to match
|
||||
// a prefix of the text, and "input" is advanced past the matched
|
||||
// text. Note: "input" is modified iff this routine returns true
|
||||
// and "re" matched a non-empty substring of "text".
|
||||
// and "re" matched a non-empty substring of "input".
|
||||
//
|
||||
// Returns true iff all of the following conditions are satisfied:
|
||||
// a. "input" matches "re" partially - for some prefix of "input".
|
||||
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||
// c. The "i"th argument has a suitable type for holding the
|
||||
// string captured as the "i"th sub-pattern. If you pass in
|
||||
// NULL for the "i"th argument, or pass fewer arguments than
|
||||
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||
// ignored.
|
||||
template <typename... A>
|
||||
static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
|
||||
return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
|
||||
@ -388,6 +426,15 @@ class RE2 {
|
||||
// the text. That is, "re" need not start its match at the beginning
|
||||
// of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds
|
||||
// the next word in "s" and stores it in "word".
|
||||
//
|
||||
// Returns true iff all of the following conditions are satisfied:
|
||||
// a. "input" matches "re" partially - for some substring of "input".
|
||||
// b. The number of matched sub-patterns is >= number of supplied pointers.
|
||||
// c. The "i"th argument has a suitable type for holding the
|
||||
// string captured as the "i"th sub-pattern. If you pass in
|
||||
// NULL for the "i"th argument, or pass fewer arguments than
|
||||
// number of sub-patterns, the "i"th captured sub-pattern is
|
||||
// ignored.
|
||||
template <typename... A>
|
||||
static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
|
||||
return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
|
||||
@ -443,7 +490,7 @@ class RE2 {
|
||||
|
||||
// Escapes all potentially meaningful regexp characters in
|
||||
// 'unquoted'. The returned string, used as a regular expression,
|
||||
// will exactly match the original string. For example,
|
||||
// will match exactly the original string. For example,
|
||||
// 1.5-2.0?
|
||||
// may become:
|
||||
// 1\.5\-2\.0\?
|
||||
@ -626,17 +673,6 @@ class RE2 {
|
||||
Encoding encoding() const { return encoding_; }
|
||||
void set_encoding(Encoding encoding) { encoding_ = encoding; }
|
||||
|
||||
// Legacy interface to encoding.
|
||||
// TODO(rsc): Remove once clients have been converted.
|
||||
bool utf8() const { return encoding_ == EncodingUTF8; }
|
||||
void set_utf8(bool b) {
|
||||
if (b) {
|
||||
encoding_ = EncodingUTF8;
|
||||
} else {
|
||||
encoding_ = EncodingLatin1;
|
||||
}
|
||||
}
|
||||
|
||||
bool posix_syntax() const { return posix_syntax_; }
|
||||
void set_posix_syntax(bool b) { posix_syntax_ = b; }
|
||||
|
||||
@ -699,32 +735,12 @@ class RE2 {
|
||||
const Options& options() const { return options_; }
|
||||
|
||||
// Argument converters; see below.
|
||||
static inline Arg CRadix(short* x);
|
||||
static inline Arg CRadix(unsigned short* x);
|
||||
static inline Arg CRadix(int* x);
|
||||
static inline Arg CRadix(unsigned int* x);
|
||||
static inline Arg CRadix(long* x);
|
||||
static inline Arg CRadix(unsigned long* x);
|
||||
static inline Arg CRadix(long long* x);
|
||||
static inline Arg CRadix(unsigned long long* x);
|
||||
|
||||
static inline Arg Hex(short* x);
|
||||
static inline Arg Hex(unsigned short* x);
|
||||
static inline Arg Hex(int* x);
|
||||
static inline Arg Hex(unsigned int* x);
|
||||
static inline Arg Hex(long* x);
|
||||
static inline Arg Hex(unsigned long* x);
|
||||
static inline Arg Hex(long long* x);
|
||||
static inline Arg Hex(unsigned long long* x);
|
||||
|
||||
static inline Arg Octal(short* x);
|
||||
static inline Arg Octal(unsigned short* x);
|
||||
static inline Arg Octal(int* x);
|
||||
static inline Arg Octal(unsigned int* x);
|
||||
static inline Arg Octal(long* x);
|
||||
static inline Arg Octal(unsigned long* x);
|
||||
static inline Arg Octal(long long* x);
|
||||
static inline Arg Octal(unsigned long long* x);
|
||||
template <typename T>
|
||||
static Arg CRadix(T* ptr);
|
||||
template <typename T>
|
||||
static Arg Hex(T* ptr);
|
||||
template <typename T>
|
||||
static Arg Octal(T* ptr);
|
||||
|
||||
private:
|
||||
void Init(const StringPiece& pattern, const Options& options);
|
||||
@ -739,27 +755,24 @@ class RE2 {
|
||||
|
||||
std::string pattern_; // string regular expression
|
||||
Options options_; // option flags
|
||||
std::string prefix_; // required prefix (before regexp_)
|
||||
bool prefix_foldcase_; // prefix is ASCII case-insensitive
|
||||
re2::Regexp* entire_regexp_; // parsed regular expression
|
||||
re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed
|
||||
const std::string* error_; // error indicator (or points to empty string)
|
||||
ErrorCode error_code_; // error code
|
||||
std::string error_arg_; // fragment of regexp showing error
|
||||
std::string prefix_; // required prefix (before suffix_regexp_)
|
||||
bool prefix_foldcase_; // prefix_ is ASCII case-insensitive
|
||||
re2::Regexp* suffix_regexp_; // parsed regular expression, prefix_ removed
|
||||
re2::Prog* prog_; // compiled program for regexp
|
||||
int num_captures_; // Number of capturing groups
|
||||
int num_captures_; // number of capturing groups
|
||||
bool is_one_pass_; // can use prog_->SearchOnePass?
|
||||
|
||||
mutable re2::Prog* rprog_; // reverse program for regexp
|
||||
mutable const std::string* error_; // Error indicator
|
||||
// (or points to empty string)
|
||||
mutable ErrorCode error_code_; // Error code
|
||||
mutable std::string error_arg_; // Fragment of regexp showing error
|
||||
|
||||
// Reverse Prog for DFA execution only
|
||||
mutable re2::Prog* rprog_;
|
||||
// Map from capture names to indices
|
||||
mutable const std::map<std::string, int>* named_groups_;
|
||||
|
||||
// Map from capture indices to names
|
||||
mutable const std::map<int, std::string>* group_names_;
|
||||
|
||||
// Onces for lazy computations.
|
||||
mutable std::once_flag rprog_once_;
|
||||
mutable std::once_flag named_groups_once_;
|
||||
mutable std::once_flag group_names_once_;
|
||||
@ -770,137 +783,134 @@ class RE2 {
|
||||
|
||||
/***** Implementation details *****/
|
||||
|
||||
// Hex/Octal/Binary?
|
||||
namespace re2_internal {
|
||||
|
||||
// Special class for parsing into objects that define a ParseFrom() method
|
||||
template <class T>
|
||||
class _RE2_MatchObject {
|
||||
public:
|
||||
static inline bool Parse(const char* str, size_t n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
T* object = reinterpret_cast<T*>(dest);
|
||||
return object->ParseFrom(str, n);
|
||||
}
|
||||
};
|
||||
// Types for which the 3-ary Parse() function template has specializations.
|
||||
template <typename T> struct Parse3ary : public std::false_type {};
|
||||
template <> struct Parse3ary<void> : public std::true_type {};
|
||||
template <> struct Parse3ary<std::string> : public std::true_type {};
|
||||
template <> struct Parse3ary<StringPiece> : public std::true_type {};
|
||||
template <> struct Parse3ary<char> : public std::true_type {};
|
||||
template <> struct Parse3ary<signed char> : public std::true_type {};
|
||||
template <> struct Parse3ary<unsigned char> : public std::true_type {};
|
||||
template <> struct Parse3ary<float> : public std::true_type {};
|
||||
template <> struct Parse3ary<double> : public std::true_type {};
|
||||
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, T* dest);
|
||||
|
||||
// Types for which the 4-ary Parse() function template has specializations.
|
||||
template <typename T> struct Parse4ary : public std::false_type {};
|
||||
template <> struct Parse4ary<long> : public std::true_type {};
|
||||
template <> struct Parse4ary<unsigned long> : public std::true_type {};
|
||||
template <> struct Parse4ary<short> : public std::true_type {};
|
||||
template <> struct Parse4ary<unsigned short> : public std::true_type {};
|
||||
template <> struct Parse4ary<int> : public std::true_type {};
|
||||
template <> struct Parse4ary<unsigned int> : public std::true_type {};
|
||||
template <> struct Parse4ary<long long> : public std::true_type {};
|
||||
template <> struct Parse4ary<unsigned long long> : public std::true_type {};
|
||||
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, T* dest, int radix);
|
||||
|
||||
} // namespace re2_internal
|
||||
|
||||
class RE2::Arg {
|
||||
public:
|
||||
// Empty constructor so we can declare arrays of RE2::Arg
|
||||
Arg();
|
||||
private:
|
||||
template <typename T>
|
||||
using CanParse3ary = typename std::enable_if<
|
||||
re2_internal::Parse3ary<T>::value,
|
||||
int>::type;
|
||||
|
||||
// Constructor specially designed for NULL arguments
|
||||
Arg(void*);
|
||||
Arg(std::nullptr_t);
|
||||
template <typename T>
|
||||
using CanParse4ary = typename std::enable_if<
|
||||
re2_internal::Parse4ary<T>::value,
|
||||
int>::type;
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
template <typename T>
|
||||
using CanParseFrom = typename std::enable_if<
|
||||
std::is_member_function_pointer<
|
||||
decltype(static_cast<bool (T::*)(const char*, size_t)>(
|
||||
&T::ParseFrom))>::value,
|
||||
int>::type;
|
||||
#endif
|
||||
|
||||
public:
|
||||
Arg() : Arg(nullptr) {}
|
||||
Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {}
|
||||
|
||||
template <typename T, CanParse3ary<T> = 0>
|
||||
Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {}
|
||||
|
||||
template <typename T, CanParse4ary<T> = 0>
|
||||
Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
template <typename T, CanParseFrom<T> = 0>
|
||||
Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
|
||||
#endif
|
||||
|
||||
typedef bool (*Parser)(const char* str, size_t n, void* dest);
|
||||
|
||||
// Type-specific parsers
|
||||
#define MAKE_PARSER(type, name) \
|
||||
Arg(type* p) : arg_(p), parser_(name) {} \
|
||||
Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
|
||||
template <typename T>
|
||||
Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {}
|
||||
|
||||
MAKE_PARSER(char, parse_char)
|
||||
MAKE_PARSER(signed char, parse_schar)
|
||||
MAKE_PARSER(unsigned char, parse_uchar)
|
||||
MAKE_PARSER(float, parse_float)
|
||||
MAKE_PARSER(double, parse_double)
|
||||
MAKE_PARSER(std::string, parse_string)
|
||||
MAKE_PARSER(StringPiece, parse_stringpiece)
|
||||
|
||||
MAKE_PARSER(short, parse_short)
|
||||
MAKE_PARSER(unsigned short, parse_ushort)
|
||||
MAKE_PARSER(int, parse_int)
|
||||
MAKE_PARSER(unsigned int, parse_uint)
|
||||
MAKE_PARSER(long, parse_long)
|
||||
MAKE_PARSER(unsigned long, parse_ulong)
|
||||
MAKE_PARSER(long long, parse_longlong)
|
||||
MAKE_PARSER(unsigned long long, parse_ulonglong)
|
||||
|
||||
#undef MAKE_PARSER
|
||||
|
||||
// Generic constructor templates
|
||||
template <class T> Arg(T* p)
|
||||
: arg_(p), parser_(_RE2_MatchObject<T>::Parse) { }
|
||||
template <class T> Arg(T* p, Parser parser)
|
||||
: arg_(p), parser_(parser) { }
|
||||
|
||||
// Parse the data
|
||||
bool Parse(const char* str, size_t n) const;
|
||||
|
||||
private:
|
||||
void* arg_;
|
||||
Parser parser_;
|
||||
|
||||
static bool parse_null (const char* str, size_t n, void* dest);
|
||||
static bool parse_char (const char* str, size_t n, void* dest);
|
||||
static bool parse_schar (const char* str, size_t n, void* dest);
|
||||
static bool parse_uchar (const char* str, size_t n, void* dest);
|
||||
static bool parse_float (const char* str, size_t n, void* dest);
|
||||
static bool parse_double (const char* str, size_t n, void* dest);
|
||||
static bool parse_string (const char* str, size_t n, void* dest);
|
||||
static bool parse_stringpiece (const char* str, size_t n, void* dest);
|
||||
|
||||
#define DECLARE_INTEGER_PARSER(name) \
|
||||
private: \
|
||||
static bool parse_##name(const char* str, size_t n, void* dest); \
|
||||
static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
|
||||
int radix); \
|
||||
\
|
||||
public: \
|
||||
static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
|
||||
static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
|
||||
static bool parse_##name##_cradix(const char* str, size_t n, void* dest);
|
||||
|
||||
DECLARE_INTEGER_PARSER(short)
|
||||
DECLARE_INTEGER_PARSER(ushort)
|
||||
DECLARE_INTEGER_PARSER(int)
|
||||
DECLARE_INTEGER_PARSER(uint)
|
||||
DECLARE_INTEGER_PARSER(long)
|
||||
DECLARE_INTEGER_PARSER(ulong)
|
||||
DECLARE_INTEGER_PARSER(longlong)
|
||||
DECLARE_INTEGER_PARSER(ulonglong)
|
||||
|
||||
#undef DECLARE_INTEGER_PARSER
|
||||
|
||||
};
|
||||
|
||||
inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
|
||||
inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
|
||||
inline RE2::Arg::Arg(std::nullptr_t p) : arg_(p), parser_(parse_null) { }
|
||||
|
||||
inline bool RE2::Arg::Parse(const char* str, size_t n) const {
|
||||
bool Parse(const char* str, size_t n) const {
|
||||
return (*parser_)(str, n, arg_);
|
||||
}
|
||||
|
||||
// This part of the parser, appropriate only for ints, deals with bases
|
||||
#define MAKE_INTEGER_PARSER(type, name) \
|
||||
inline RE2::Arg RE2::Hex(type* ptr) { \
|
||||
return RE2::Arg(ptr, RE2::Arg::parse_##name##_hex); \
|
||||
} \
|
||||
inline RE2::Arg RE2::Octal(type* ptr) { \
|
||||
return RE2::Arg(ptr, RE2::Arg::parse_##name##_octal); \
|
||||
} \
|
||||
inline RE2::Arg RE2::CRadix(type* ptr) { \
|
||||
return RE2::Arg(ptr, RE2::Arg::parse_##name##_cradix); \
|
||||
private:
|
||||
static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) {
|
||||
return true;
|
||||
}
|
||||
|
||||
MAKE_INTEGER_PARSER(short, short)
|
||||
MAKE_INTEGER_PARSER(unsigned short, ushort)
|
||||
MAKE_INTEGER_PARSER(int, int)
|
||||
MAKE_INTEGER_PARSER(unsigned int, uint)
|
||||
MAKE_INTEGER_PARSER(long, long)
|
||||
MAKE_INTEGER_PARSER(unsigned long, ulong)
|
||||
MAKE_INTEGER_PARSER(long long, longlong)
|
||||
MAKE_INTEGER_PARSER(unsigned long long, ulonglong)
|
||||
template <typename T>
|
||||
static bool DoParse3ary(const char* str, size_t n, void* dest) {
|
||||
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));
|
||||
}
|
||||
|
||||
#undef MAKE_INTEGER_PARSER
|
||||
template <typename T>
|
||||
static bool DoParse4ary(const char* str, size_t n, void* dest) {
|
||||
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
|
||||
}
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
template <typename T>
|
||||
static bool DoParseFrom(const char* str, size_t n, void* dest) {
|
||||
if (dest == NULL) return true;
|
||||
return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
void* arg_;
|
||||
Parser parser_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline RE2::Arg RE2::CRadix(T* ptr) {
|
||||
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
|
||||
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline RE2::Arg RE2::Hex(T* ptr) {
|
||||
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
|
||||
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline RE2::Arg RE2::Octal(T* ptr) {
|
||||
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
|
||||
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);
|
||||
});
|
||||
}
|
||||
|
||||
#ifndef SWIG
|
||||
|
||||
// Silence warnings about missing initializers for members of LazyRE2.
|
||||
// Note that we test for Clang first because it defines __GNUC__ as well.
|
||||
#if defined(__clang__)
|
||||
#elif defined(__GNUC__) && __GNUC__ >= 6
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
|
||||
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||
#endif
|
||||
|
||||
@ -949,7 +959,52 @@ class LazyRE2 {
|
||||
|
||||
void operator=(const LazyRE2&); // disallowed
|
||||
};
|
||||
#endif // SWIG
|
||||
#endif
|
||||
|
||||
namespace hooks {
|
||||
|
||||
// Most platforms support thread_local. Older versions of iOS don't support
|
||||
// thread_local, but for the sake of brevity, we lump together all versions
|
||||
// of Apple platforms that aren't macOS. If an iOS application really needs
|
||||
// the context pointee someday, we can get more specific then...
|
||||
#define RE2_HAVE_THREAD_LOCAL
|
||||
#if defined(__APPLE__) && !TARGET_OS_OSX
|
||||
#undef RE2_HAVE_THREAD_LOCAL
|
||||
#endif
|
||||
|
||||
// A hook must not make any assumptions regarding the lifetime of the context
|
||||
// pointee beyond the current invocation of the hook. Pointers and references
|
||||
// obtained via the context pointee should be considered invalidated when the
|
||||
// hook returns. Hence, any data about the context pointee (e.g. its pattern)
|
||||
// would have to be copied in order for it to be kept for an indefinite time.
|
||||
//
|
||||
// A hook must not use RE2 for matching. Control flow reentering RE2::Match()
|
||||
// could result in infinite mutual recursion. To discourage that possibility,
|
||||
// RE2 will not maintain the context pointer correctly when used in that way.
|
||||
#ifdef RE2_HAVE_THREAD_LOCAL
|
||||
extern thread_local const RE2* context;
|
||||
#endif
|
||||
|
||||
struct DFAStateCacheReset {
|
||||
int64_t state_budget;
|
||||
size_t state_cache_size;
|
||||
};
|
||||
|
||||
struct DFASearchFailure {
|
||||
// Nothing yet...
|
||||
};
|
||||
|
||||
#define DECLARE_HOOK(type) \
|
||||
using type##Callback = void(const type&); \
|
||||
void Set##type##Hook(type##Callback* cb); \
|
||||
type##Callback* Get##type##Hook();
|
||||
|
||||
DECLARE_HOOK(DFAStateCacheReset)
|
||||
DECLARE_HOOK(DFASearchFailure)
|
||||
|
||||
#undef DECLARE_HOOK
|
||||
|
||||
} // namespace hooks
|
||||
|
||||
} // namespace re2
|
||||
|
||||
|
148
extern/re2/re2/regexp.cc
vendored
148
extern/re2/re2/regexp.cc
vendored
@ -20,6 +20,7 @@
|
||||
#include "util/logging.h"
|
||||
#include "util/mutex.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/stringpiece.h"
|
||||
#include "re2/walker-inl.h"
|
||||
|
||||
@ -243,16 +244,15 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
|
||||
return new Regexp(kRegexpEmptyMatch, flags);
|
||||
}
|
||||
|
||||
Regexp** subcopy = NULL;
|
||||
PODArray<Regexp*> subcopy;
|
||||
if (op == kRegexpAlternate && can_factor) {
|
||||
// Going to edit sub; make a copy so we don't step on caller.
|
||||
subcopy = new Regexp*[nsub];
|
||||
memmove(subcopy, sub, nsub * sizeof sub[0]);
|
||||
sub = subcopy;
|
||||
subcopy = PODArray<Regexp*>(nsub);
|
||||
memmove(subcopy.data(), sub, nsub * sizeof sub[0]);
|
||||
sub = subcopy.data();
|
||||
nsub = FactorAlternation(sub, nsub, flags);
|
||||
if (nsub == 1) {
|
||||
Regexp* re = sub[0];
|
||||
delete[] subcopy;
|
||||
return re;
|
||||
}
|
||||
}
|
||||
@ -269,7 +269,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
|
||||
subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,
|
||||
nsub - (nbigsub-1)*kMaxNsub, flags,
|
||||
false);
|
||||
delete[] subcopy;
|
||||
return re;
|
||||
}
|
||||
|
||||
@ -278,8 +277,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
|
||||
Regexp** subs = re->sub();
|
||||
for (int i = 0; i < nsub; i++)
|
||||
subs[i] = sub[i];
|
||||
|
||||
delete[] subcopy;
|
||||
return re;
|
||||
}
|
||||
|
||||
@ -501,6 +498,7 @@ static const char *kErrorStrings[] = {
|
||||
"invalid character class range",
|
||||
"missing ]",
|
||||
"missing )",
|
||||
"unexpected )",
|
||||
"trailing \\",
|
||||
"no argument for repetition operator",
|
||||
"invalid repetition size",
|
||||
@ -544,9 +542,12 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> {
|
||||
ncapture_++;
|
||||
return ignored;
|
||||
}
|
||||
|
||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||
// Should never be called: we use Walk not WalkExponential.
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
|
||||
#endif
|
||||
return ignored;
|
||||
}
|
||||
|
||||
@ -575,7 +576,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
|
||||
return m;
|
||||
}
|
||||
|
||||
Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
||||
virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
||||
if (re->op() == kRegexpCapture && re->name() != NULL) {
|
||||
// Allocate map once we find a name.
|
||||
if (map_ == NULL)
|
||||
@ -591,8 +592,10 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
|
||||
}
|
||||
|
||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||
// Should never be called: we use Walk not WalkExponential.
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
|
||||
#endif
|
||||
return ignored;
|
||||
}
|
||||
|
||||
@ -621,7 +624,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
|
||||
return m;
|
||||
}
|
||||
|
||||
Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
||||
virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
|
||||
if (re->op() == kRegexpCapture && re->name() != NULL) {
|
||||
// Allocate map once we find a name.
|
||||
if (map_ == NULL)
|
||||
@ -633,8 +636,10 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
|
||||
}
|
||||
|
||||
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
|
||||
// Should never be called: we use Walk not WalkExponential.
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
|
||||
#endif
|
||||
return ignored;
|
||||
}
|
||||
|
||||
@ -651,78 +656,89 @@ std::map<int, std::string>* Regexp::CaptureNames() {
|
||||
return w.TakeMap();
|
||||
}
|
||||
|
||||
void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes,
|
||||
std::string* bytes) {
|
||||
if (latin1) {
|
||||
bytes->resize(nrunes);
|
||||
for (int i = 0; i < nrunes; i++)
|
||||
(*bytes)[i] = static_cast<char>(runes[i]);
|
||||
} else {
|
||||
bytes->resize(nrunes * UTFmax); // worst case
|
||||
char* p = &(*bytes)[0];
|
||||
for (int i = 0; i < nrunes; i++)
|
||||
p += runetochar(p, &runes[i]);
|
||||
bytes->resize(p - &(*bytes)[0]);
|
||||
bytes->shrink_to_fit();
|
||||
}
|
||||
}
|
||||
|
||||
// Determines whether regexp matches must be anchored
|
||||
// with a fixed string prefix. If so, returns the prefix and
|
||||
// the regexp that remains after the prefix. The prefix might
|
||||
// be ASCII case-insensitive.
|
||||
bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
|
||||
Regexp** suffix) {
|
||||
prefix->clear();
|
||||
*foldcase = false;
|
||||
*suffix = NULL;
|
||||
|
||||
// No need for a walker: the regexp must be of the form
|
||||
// 1. some number of ^ anchors
|
||||
// 2. a literal char or string
|
||||
// 3. the rest
|
||||
prefix->clear();
|
||||
*foldcase = false;
|
||||
*suffix = NULL;
|
||||
if (op_ != kRegexpConcat)
|
||||
return false;
|
||||
|
||||
// Some number of anchors, then a literal or concatenation.
|
||||
int i = 0;
|
||||
Regexp** sub = this->sub();
|
||||
while (i < nsub_ && sub[i]->op_ == kRegexpBeginText)
|
||||
while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
|
||||
i++;
|
||||
if (i == 0 || i >= nsub_)
|
||||
return false;
|
||||
|
||||
Regexp* re = sub[i];
|
||||
switch (re->op_) {
|
||||
default:
|
||||
Regexp* re = sub()[i];
|
||||
if (re->op_ != kRegexpLiteral &&
|
||||
re->op_ != kRegexpLiteralString)
|
||||
return false;
|
||||
|
||||
case kRegexpLiteralString:
|
||||
// Convert to string in proper encoding.
|
||||
if (re->parse_flags() & Latin1) {
|
||||
prefix->resize(re->nrunes_);
|
||||
for (int j = 0; j < re->nrunes_; j++)
|
||||
(*prefix)[j] = static_cast<char>(re->runes_[j]);
|
||||
} else {
|
||||
// Convert to UTF-8 in place.
|
||||
// Assume worst-case space and then trim.
|
||||
prefix->resize(re->nrunes_ * UTFmax);
|
||||
char *p = &(*prefix)[0];
|
||||
for (int j = 0; j < re->nrunes_; j++) {
|
||||
Rune r = re->runes_[j];
|
||||
if (r < Runeself)
|
||||
*p++ = static_cast<char>(r);
|
||||
else
|
||||
p += runetochar(p, &r);
|
||||
}
|
||||
prefix->resize(p - &(*prefix)[0]);
|
||||
}
|
||||
break;
|
||||
|
||||
case kRegexpLiteral:
|
||||
if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) {
|
||||
prefix->append(1, static_cast<char>(re->rune_));
|
||||
} else {
|
||||
char buf[UTFmax];
|
||||
prefix->append(buf, runetochar(buf, &re->rune_));
|
||||
}
|
||||
break;
|
||||
}
|
||||
*foldcase = (sub[i]->parse_flags() & FoldCase) != 0;
|
||||
i++;
|
||||
|
||||
// The rest.
|
||||
if (i < nsub_) {
|
||||
for (int j = i; j < nsub_; j++)
|
||||
sub[j]->Incref();
|
||||
re = Concat(sub + i, nsub_ - i, parse_flags());
|
||||
sub()[j]->Incref();
|
||||
*suffix = Concat(sub() + i, nsub_ - i, parse_flags());
|
||||
} else {
|
||||
re = new Regexp(kRegexpEmptyMatch, parse_flags());
|
||||
*suffix = new Regexp(kRegexpEmptyMatch, parse_flags());
|
||||
}
|
||||
*suffix = re;
|
||||
|
||||
bool latin1 = (re->parse_flags() & Latin1) != 0;
|
||||
Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
|
||||
int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
|
||||
ConvertRunesToBytes(latin1, runes, nrunes, prefix);
|
||||
*foldcase = (re->parse_flags() & FoldCase) != 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Determines whether regexp matches must be unanchored
|
||||
// with a fixed string prefix. If so, returns the prefix.
|
||||
// The prefix might be ASCII case-insensitive.
|
||||
bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) {
|
||||
prefix->clear();
|
||||
*foldcase = false;
|
||||
|
||||
// No need for a walker: the regexp must either begin with or be
|
||||
// a literal char or string. We "see through" capturing groups,
|
||||
// but make no effort to glue multiple prefix fragments together.
|
||||
Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
|
||||
while (re->op_ == kRegexpCapture) {
|
||||
re = re->sub()[0];
|
||||
if (re->op_ == kRegexpConcat && re->nsub_ > 0)
|
||||
re = re->sub()[0];
|
||||
}
|
||||
if (re->op_ != kRegexpLiteral &&
|
||||
re->op_ != kRegexpLiteralString)
|
||||
return false;
|
||||
|
||||
bool latin1 = (re->parse_flags() & Latin1) != 0;
|
||||
Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
|
||||
int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
|
||||
ConvertRunesToBytes(latin1, runes, nrunes, prefix);
|
||||
*foldcase = (re->parse_flags() & FoldCase) != 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -903,7 +919,7 @@ void CharClassBuilder::Negate() {
|
||||
// The ranges are allocated in the same block as the header,
|
||||
// necessitating a special allocator and Delete method.
|
||||
|
||||
CharClass* CharClass::New(int maxranges) {
|
||||
CharClass* CharClass::New(size_t maxranges) {
|
||||
CharClass* cc;
|
||||
uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
|
||||
cc = reinterpret_cast<CharClass*>(data);
|
||||
@ -920,7 +936,7 @@ void CharClass::Delete() {
|
||||
}
|
||||
|
||||
CharClass* CharClass::Negate() {
|
||||
CharClass* cc = CharClass::New(nranges_+1);
|
||||
CharClass* cc = CharClass::New(static_cast<size_t>(nranges_+1));
|
||||
cc->folds_ascii_ = folds_ascii_;
|
||||
cc->nrunes_ = Runemax + 1 - nrunes_;
|
||||
int n = 0;
|
||||
@ -957,7 +973,7 @@ bool CharClass::Contains(Rune r) {
|
||||
}
|
||||
|
||||
CharClass* CharClassBuilder::GetCharClass() {
|
||||
CharClass* cc = CharClass::New(static_cast<int>(ranges_.size()));
|
||||
CharClass* cc = CharClass::New(ranges_.size());
|
||||
int n = 0;
|
||||
for (iterator it = begin(); it != end(); ++it)
|
||||
cc->ranges_[n++] = *it;
|
||||
|
11
extern/re2/re2/regexp.h
vendored
11
extern/re2/re2/regexp.h
vendored
@ -86,6 +86,7 @@
|
||||
// form accessible to clients, so that client code can analyze the
|
||||
// parsed regular expressions.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <map>
|
||||
#include <set>
|
||||
@ -177,6 +178,7 @@ enum RegexpStatusCode {
|
||||
kRegexpBadCharRange, // bad character class range
|
||||
kRegexpMissingBracket, // missing closing ]
|
||||
kRegexpMissingParen, // missing closing )
|
||||
kRegexpUnexpectedParen, // unexpected closing )
|
||||
kRegexpTrailingBackslash, // at end of regexp
|
||||
kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
|
||||
kRegexpRepeatSize, // bad repetition argument
|
||||
@ -258,7 +260,7 @@ class CharClass {
|
||||
private:
|
||||
CharClass(); // not implemented
|
||||
~CharClass(); // not implemented
|
||||
static CharClass* New(int maxranges);
|
||||
static CharClass* New(size_t maxranges);
|
||||
|
||||
friend class CharClassBuilder;
|
||||
|
||||
@ -440,6 +442,13 @@ class Regexp {
|
||||
bool RequiredPrefix(std::string* prefix, bool* foldcase,
|
||||
Regexp** suffix);
|
||||
|
||||
// Whether every match of this regexp must be unanchored and
|
||||
// begin with a non-empty fixed string (perhaps after ASCII
|
||||
// case-folding). If so, returns the prefix.
|
||||
// Callers should expect *prefix and *foldcase to be "zeroed"
|
||||
// regardless of the return value.
|
||||
bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
|
||||
|
||||
private:
|
||||
// Constructor allocates vectors as appropriate for operator.
|
||||
explicit Regexp(RegexpOp op, ParseFlags parse_flags);
|
||||
|
48
extern/re2/re2/set.cc
vendored
48
extern/re2/re2/set.cc
vendored
@ -7,30 +7,49 @@
|
||||
#include <stddef.h>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "re2/stringpiece.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/stringpiece.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
|
||||
options_.Copy(options);
|
||||
RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
|
||||
: options_(options),
|
||||
anchor_(anchor),
|
||||
compiled_(false),
|
||||
size_(0) {
|
||||
options_.set_never_capture(true); // might unblock some optimisations
|
||||
anchor_ = anchor;
|
||||
prog_ = NULL;
|
||||
compiled_ = false;
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
RE2::Set::~Set() {
|
||||
for (size_t i = 0; i < elem_.size(); i++)
|
||||
elem_[i].second->Decref();
|
||||
delete prog_;
|
||||
}
|
||||
|
||||
RE2::Set::Set(Set&& other)
|
||||
: options_(other.options_),
|
||||
anchor_(other.anchor_),
|
||||
elem_(std::move(other.elem_)),
|
||||
compiled_(other.compiled_),
|
||||
size_(other.size_),
|
||||
prog_(std::move(other.prog_)) {
|
||||
other.elem_.clear();
|
||||
other.elem_.shrink_to_fit();
|
||||
other.compiled_ = false;
|
||||
other.size_ = 0;
|
||||
other.prog_.reset();
|
||||
}
|
||||
|
||||
RE2::Set& RE2::Set::operator=(Set&& other) {
|
||||
this->~Set();
|
||||
(void) new (this) Set(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
|
||||
int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
|
||||
@ -97,9 +116,9 @@ bool RE2::Set::Compile() {
|
||||
options_.ParseFlags());
|
||||
re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
|
||||
|
||||
prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
|
||||
prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
|
||||
re->Decref();
|
||||
return prog_ != NULL;
|
||||
return prog_ != nullptr;
|
||||
}
|
||||
|
||||
bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
|
||||
@ -124,9 +143,10 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
|
||||
NULL, &dfa_failed, matches.get());
|
||||
if (dfa_failed) {
|
||||
if (options_.log_errors())
|
||||
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range() << ", "
|
||||
<< "list count " << prog_->list_count();
|
||||
LOG(ERROR) << "DFA out of memory: "
|
||||
<< "program size " << prog_->size() << ", "
|
||||
<< "list count " << prog_->list_count() << ", "
|
||||
<< "bytemap range " << prog_->bytemap_range();
|
||||
if (error_info != NULL)
|
||||
error_info->kind = kOutOfMemory;
|
||||
return false;
|
||||
|
13
extern/re2/re2/set.h
vendored
13
extern/re2/re2/set.h
vendored
@ -5,6 +5,7 @@
|
||||
#ifndef RE2_SET_H_
|
||||
#define RE2_SET_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -36,6 +37,13 @@ class RE2::Set {
|
||||
Set(const RE2::Options& options, RE2::Anchor anchor);
|
||||
~Set();
|
||||
|
||||
// Not copyable.
|
||||
Set(const Set&) = delete;
|
||||
Set& operator=(const Set&) = delete;
|
||||
// Movable.
|
||||
Set(Set&& other);
|
||||
Set& operator=(Set&& other);
|
||||
|
||||
// Adds pattern to the set using the options passed to the constructor.
|
||||
// Returns the index that will identify the regexp in the output of Match(),
|
||||
// or -1 if the regexp cannot be parsed.
|
||||
@ -67,12 +75,9 @@ class RE2::Set {
|
||||
RE2::Options options_;
|
||||
RE2::Anchor anchor_;
|
||||
std::vector<Elem> elem_;
|
||||
re2::Prog* prog_;
|
||||
bool compiled_;
|
||||
int size_;
|
||||
|
||||
Set(const Set&) = delete;
|
||||
Set& operator=(const Set&) = delete;
|
||||
std::unique_ptr<re2::Prog> prog_;
|
||||
};
|
||||
|
||||
} // namespace re2
|
||||
|
26
extern/re2/re2/simplify.cc
vendored
26
extern/re2/re2/simplify.cc
vendored
@ -10,8 +10,8 @@
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/pod_array.h"
|
||||
#include "util/utf.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/regexp.h"
|
||||
#include "re2/walker-inl.h"
|
||||
|
||||
@ -28,8 +28,6 @@ bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
|
||||
Regexp* sre = re->Simplify();
|
||||
re->Decref();
|
||||
if (sre == NULL) {
|
||||
// Should not happen, since Simplify never fails.
|
||||
LOG(ERROR) << "Simplify failed on " << src;
|
||||
if (status) {
|
||||
status->set_code(kRegexpInternalError);
|
||||
status->set_error_arg(src);
|
||||
@ -180,10 +178,20 @@ Regexp* Regexp::Simplify() {
|
||||
CoalesceWalker cw;
|
||||
Regexp* cre = cw.Walk(this, NULL);
|
||||
if (cre == NULL)
|
||||
return cre;
|
||||
return NULL;
|
||||
if (cw.stopped_early()) {
|
||||
cre->Decref();
|
||||
return NULL;
|
||||
}
|
||||
SimplifyWalker sw;
|
||||
Regexp* sre = sw.Walk(cre, NULL);
|
||||
cre->Decref();
|
||||
if (sre == NULL)
|
||||
return NULL;
|
||||
if (sw.stopped_early()) {
|
||||
sre->Decref();
|
||||
return NULL;
|
||||
}
|
||||
return sre;
|
||||
}
|
||||
|
||||
@ -212,9 +220,10 @@ Regexp* CoalesceWalker::Copy(Regexp* re) {
|
||||
}
|
||||
|
||||
Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
||||
// This should never be called, since we use Walk and not
|
||||
// WalkExponential.
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
|
||||
#endif
|
||||
return re->Incref();
|
||||
}
|
||||
|
||||
@ -437,9 +446,10 @@ Regexp* SimplifyWalker::Copy(Regexp* re) {
|
||||
}
|
||||
|
||||
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
|
||||
// This should never be called, since we use Walk and not
|
||||
// WalkExponential.
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
|
||||
#endif
|
||||
return re->Incref();
|
||||
}
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef UTIL_SPARSE_ARRAY_H_
|
||||
#define UTIL_SPARSE_ARRAY_H_
|
||||
#ifndef RE2_SPARSE_ARRAY_H_
|
||||
#define RE2_SPARSE_ARRAY_H_
|
||||
|
||||
// DESCRIPTION
|
||||
//
|
||||
@ -102,7 +102,7 @@
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "util/pod_array.h"
|
||||
#include "re2/pod_array.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -389,4 +389,4 @@ template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,
|
||||
|
||||
} // namespace re2
|
||||
|
||||
#endif // UTIL_SPARSE_ARRAY_H_
|
||||
#endif // RE2_SPARSE_ARRAY_H_
|
@ -2,8 +2,8 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef UTIL_SPARSE_SET_H_
|
||||
#define UTIL_SPARSE_SET_H_
|
||||
#ifndef RE2_SPARSE_SET_H_
|
||||
#define RE2_SPARSE_SET_H_
|
||||
|
||||
// DESCRIPTION
|
||||
//
|
||||
@ -61,7 +61,7 @@
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "util/pod_array.h"
|
||||
#include "re2/pod_array.h"
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -261,4 +261,4 @@ typedef SparseSetT<void> SparseSet;
|
||||
|
||||
} // namespace re2
|
||||
|
||||
#endif // UTIL_SPARSE_SET_H_
|
||||
#endif // RE2_SPARSE_SET_H_
|
52
extern/re2/re2/testing/backtrack.cc
vendored
52
extern/re2/re2/testing/backtrack.cc
vendored
@ -29,6 +29,7 @@
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/pod_array.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
@ -53,7 +54,6 @@ namespace re2 {
|
||||
class Backtracker {
|
||||
public:
|
||||
explicit Backtracker(Prog* prog);
|
||||
~Backtracker();
|
||||
|
||||
bool Search(const StringPiece& text, const StringPiece& context,
|
||||
bool anchored, bool longest,
|
||||
@ -80,8 +80,10 @@ class Backtracker {
|
||||
|
||||
// Search state
|
||||
const char* cap_[64]; // capture registers
|
||||
uint32_t *visited_; // bitmap: (Inst*, char*) pairs already backtracked
|
||||
size_t nvisited_; // # of words in bitmap
|
||||
PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited
|
||||
|
||||
Backtracker(const Backtracker&) = delete;
|
||||
Backtracker& operator=(const Backtracker&) = delete;
|
||||
};
|
||||
|
||||
Backtracker::Backtracker(Prog* prog)
|
||||
@ -90,13 +92,7 @@ Backtracker::Backtracker(Prog* prog)
|
||||
longest_(false),
|
||||
endmatch_(false),
|
||||
submatch_(NULL),
|
||||
nsubmatch_(0),
|
||||
visited_(NULL),
|
||||
nvisited_(0) {
|
||||
}
|
||||
|
||||
Backtracker::~Backtracker() {
|
||||
delete[] visited_;
|
||||
nsubmatch_(0) {
|
||||
}
|
||||
|
||||
// Runs a backtracking search.
|
||||
@ -105,7 +101,7 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
|
||||
StringPiece* submatch, int nsubmatch) {
|
||||
text_ = text;
|
||||
context_ = context;
|
||||
if (context_.begin() == NULL)
|
||||
if (context_.data() == NULL)
|
||||
context_ = text;
|
||||
if (prog_->anchor_start() && text.begin() > context_.begin())
|
||||
return false;
|
||||
@ -130,24 +126,28 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
|
||||
|
||||
// Allocate new visited_ bitmap -- size is proportional
|
||||
// to text, so have to reallocate on each call to Search.
|
||||
delete[] visited_;
|
||||
nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
|
||||
visited_ = new uint32_t[nvisited_];
|
||||
memset(visited_, 0, nvisited_*sizeof visited_[0]);
|
||||
int nvisited = prog_->size() * static_cast<int>(text.size()+1);
|
||||
nvisited = (nvisited + 31) / 32;
|
||||
visited_ = PODArray<uint32_t>(nvisited);
|
||||
memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
|
||||
|
||||
// Anchored search must start at text.begin().
|
||||
if (anchored_) {
|
||||
cap_[0] = text.begin();
|
||||
return Visit(prog_->start(), text.begin());
|
||||
cap_[0] = text.data();
|
||||
return Visit(prog_->start(), text.data());
|
||||
}
|
||||
|
||||
// Unanchored search, starting from each possible text position.
|
||||
// Notice that we have to try the empty string at the end of
|
||||
// the text, so the loop condition is p <= text.end(), not p < text.end().
|
||||
for (const char* p = text.begin(); p <= text.end(); p++) {
|
||||
for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
|
||||
cap_[0] = p;
|
||||
if (Visit(prog_->start(), p)) // Match must be leftmost; done.
|
||||
return true;
|
||||
// Avoid invoking undefined behavior (arithmetic on a null pointer)
|
||||
// by simply not continuing the loop.
|
||||
if (p == NULL)
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -158,9 +158,10 @@ bool Backtracker::Visit(int id, const char* p) {
|
||||
// Check bitmap. If we've already explored from here,
|
||||
// either it didn't match or it did but we're hoping for a better match.
|
||||
// Either way, don't go down that road again.
|
||||
CHECK(p <= text_.end());
|
||||
size_t n = id*(text_.size()+1) + (p - text_.begin());
|
||||
CHECK_LT(n/32, nvisited_);
|
||||
CHECK(p <= text_.data() + text_.size());
|
||||
int n = id * static_cast<int>(text_.size()+1) +
|
||||
static_cast<int>(p-text_.data());
|
||||
CHECK_LT(n/32, visited_.size());
|
||||
if (visited_[n/32] & (1 << (n&31)))
|
||||
return false;
|
||||
visited_[n/32] |= 1 << (n&31);
|
||||
@ -182,7 +183,7 @@ bool Backtracker::Try(int id, const char* p) {
|
||||
// Pick out byte at current position. If at end of string,
|
||||
// have to explore in hope of finishing a match. Use impossible byte -1.
|
||||
int c = -1;
|
||||
if (p < text_.end())
|
||||
if (p < text_.data() + text_.size())
|
||||
c = *p & 0xFF;
|
||||
|
||||
Prog::Inst* ip = prog_->inst(id);
|
||||
@ -224,11 +225,12 @@ bool Backtracker::Try(int id, const char* p) {
|
||||
case kInstMatch:
|
||||
// We found a match. If it's the best so far, record the
|
||||
// parameters in the caller's submatch_ array.
|
||||
if (endmatch_ && p != context_.end())
|
||||
if (endmatch_ && p != context_.data() + context_.size())
|
||||
return false;
|
||||
cap_[1] = p;
|
||||
if (submatch_[0].data() == NULL || // First match so far ...
|
||||
(longest_ && p > submatch_[0].end())) { // ... or better match
|
||||
if (submatch_[0].data() == NULL ||
|
||||
(longest_ && p > submatch_[0].data() + submatch_[0].size())) {
|
||||
// First match so far - or better match.
|
||||
for (int i = 0; i < nsubmatch_; i++)
|
||||
submatch_[i] = StringPiece(
|
||||
cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
|
||||
|
4
extern/re2/re2/testing/charclass_test.cc
vendored
4
extern/re2/re2/testing/charclass_test.cc
vendored
@ -85,7 +85,7 @@ static CCTest tests[] = {
|
||||
{ {-1} } },
|
||||
};
|
||||
|
||||
template<class CharClass>
|
||||
template <typename CharClass>
|
||||
static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
|
||||
if (t == NULL) {
|
||||
printf("\t%s:", desc);
|
||||
@ -136,7 +136,7 @@ void Delete(CharClassBuilder* cc) {
|
||||
delete cc;
|
||||
}
|
||||
|
||||
template<class CharClass>
|
||||
template <typename CharClass>
|
||||
bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
|
||||
typename CharClass::iterator it = cc->begin();
|
||||
int size = 0;
|
||||
|
52
extern/re2/re2/testing/compile_test.cc
vendored
52
extern/re2/re2/testing/compile_test.cc
vendored
@ -147,10 +147,19 @@ static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
|
||||
Regexp* re = Regexp::Parse(pattern, flags, NULL);
|
||||
EXPECT_TRUE(re != NULL);
|
||||
|
||||
{
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
EXPECT_TRUE(prog != NULL);
|
||||
*bytemap = prog->DumpByteMap();
|
||||
delete prog;
|
||||
}
|
||||
|
||||
{
|
||||
Prog* prog = re->CompileToReverseProg(0);
|
||||
EXPECT_TRUE(prog != NULL);
|
||||
EXPECT_EQ(*bytemap, prog->DumpByteMap());
|
||||
delete prog;
|
||||
}
|
||||
|
||||
re->Decref();
|
||||
}
|
||||
@ -213,16 +222,11 @@ TEST(TestCompile, UTF8Ranges) {
|
||||
EXPECT_EQ("[00-09] -> 0\n"
|
||||
"[0a-0a] -> 1\n"
|
||||
"[0b-7f] -> 0\n"
|
||||
"[80-8f] -> 2\n"
|
||||
"[90-9f] -> 3\n"
|
||||
"[a0-bf] -> 4\n"
|
||||
"[80-bf] -> 2\n"
|
||||
"[c0-c1] -> 1\n"
|
||||
"[c2-df] -> 5\n"
|
||||
"[e0-e0] -> 6\n"
|
||||
"[e1-ef] -> 7\n"
|
||||
"[f0-f0] -> 8\n"
|
||||
"[f1-f3] -> 9\n"
|
||||
"[f4-f4] -> 10\n"
|
||||
"[c2-df] -> 3\n"
|
||||
"[e0-ef] -> 4\n"
|
||||
"[f0-f4] -> 5\n"
|
||||
"[f5-ff] -> 1\n",
|
||||
bytemap);
|
||||
}
|
||||
@ -232,7 +236,7 @@ TEST(TestCompile, InsufficientMemory) {
|
||||
"^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
|
||||
Regexp::LikePerl, NULL);
|
||||
EXPECT_TRUE(re != NULL);
|
||||
Prog* prog = re->CompileToProg(920);
|
||||
Prog* prog = re->CompileToProg(850);
|
||||
// If the memory budget has been exhausted, compilation should fail
|
||||
// and return NULL instead of trying to do anything with NoMatch().
|
||||
EXPECT_TRUE(prog == NULL);
|
||||
@ -299,20 +303,22 @@ TEST(TestCompile, Bug26705922) {
|
||||
"8. byte [f0-f0] 0 -> 7\n",
|
||||
reverse);
|
||||
|
||||
Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse);
|
||||
EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
|
||||
"4+ byte [c2-df] 0 -> 7\n"
|
||||
"5+ byte [a0-bf] 1 -> 8\n"
|
||||
"6. byte [80-bf] 0 -> 9\n"
|
||||
Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, &forward, &reverse);
|
||||
EXPECT_EQ("3+ byte [c2-df] 0 -> 6\n"
|
||||
"4+ byte [e0-ef] 0 -> 8\n"
|
||||
"5. byte [f0-f4] 0 -> 9\n"
|
||||
"6. byte [80-bf] 0 -> 7\n"
|
||||
"7. match! 0\n"
|
||||
"8. byte [e0-e0] 0 -> 7\n"
|
||||
"9+ byte [e1-ef] 0 -> 7\n"
|
||||
"10+ byte [90-bf] 1 -> 13\n"
|
||||
"11+ byte [80-bf] 1 -> 14\n"
|
||||
"12. byte [80-8f] 0 -> 15\n"
|
||||
"13. byte [f0-f0] 0 -> 7\n"
|
||||
"14. byte [f1-f3] 0 -> 7\n"
|
||||
"15. byte [f4-f4] 0 -> 7\n",
|
||||
"8. byte [80-bf] 0 -> 6\n"
|
||||
"9. byte [80-bf] 0 -> 8\n",
|
||||
forward);
|
||||
EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
|
||||
"4+ byte [c2-df] 0 -> 6\n"
|
||||
"5. byte [80-bf] 0 -> 7\n"
|
||||
"6. match! 0\n"
|
||||
"7+ byte [e0-ef] 0 -> 6\n"
|
||||
"8. byte [80-bf] 0 -> 9\n"
|
||||
"9. byte [f0-f4] 0 -> 6\n",
|
||||
reverse);
|
||||
}
|
||||
|
||||
|
82
extern/re2/re2/testing/dfa_test.cc
vendored
82
extern/re2/re2/testing/dfa_test.cc
vendored
@ -8,7 +8,9 @@
|
||||
#include <vector>
|
||||
|
||||
#include "util/test.h"
|
||||
#include "util/flags.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/malloc_counter.h"
|
||||
#include "util/strutil.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/re2.h"
|
||||
@ -18,12 +20,26 @@
|
||||
|
||||
static const bool UsingMallocCounter = false;
|
||||
|
||||
DEFINE_int32(size, 8, "log2(number of DFA nodes)");
|
||||
DEFINE_int32(repeat, 2, "Repetition count.");
|
||||
DEFINE_int32(threads, 4, "number of threads");
|
||||
DEFINE_FLAG(int, size, 8, "log2(number of DFA nodes)");
|
||||
DEFINE_FLAG(int, repeat, 2, "Repetition count.");
|
||||
DEFINE_FLAG(int, threads, 4, "number of threads");
|
||||
|
||||
namespace re2 {
|
||||
|
||||
static int state_cache_resets = 0;
|
||||
static int search_failures = 0;
|
||||
|
||||
struct SetHooks {
|
||||
SetHooks() {
|
||||
hooks::SetDFAStateCacheResetHook([](const hooks::DFAStateCacheReset&) {
|
||||
++state_cache_resets;
|
||||
});
|
||||
hooks::SetDFASearchFailureHook([](const hooks::DFASearchFailure&) {
|
||||
++search_failures;
|
||||
});
|
||||
}
|
||||
} set_hooks;
|
||||
|
||||
// Check that multithreaded access to DFA class works.
|
||||
|
||||
// Helper function: builds entire DFA for prog.
|
||||
@ -34,7 +50,7 @@ static void DoBuild(Prog* prog) {
|
||||
TEST(Multithreaded, BuildEntireDFA) {
|
||||
// Create regexp with 2^FLAGS_size states in DFA.
|
||||
std::string s = "a";
|
||||
for (int i = 0; i < FLAGS_size; i++)
|
||||
for (int i = 0; i < GetFlag(FLAGS_size); i++)
|
||||
s += "[ab]";
|
||||
s += "b";
|
||||
Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
|
||||
@ -52,14 +68,14 @@ TEST(Multithreaded, BuildEntireDFA) {
|
||||
}
|
||||
|
||||
// Build the DFA simultaneously in a bunch of threads.
|
||||
for (int i = 0; i < FLAGS_repeat; i++) {
|
||||
for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
ASSERT_TRUE(prog != NULL);
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
for (int j = 0; j < FLAGS_threads; j++)
|
||||
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||
threads.emplace_back(DoBuild, prog);
|
||||
for (int j = 0; j < FLAGS_threads; j++)
|
||||
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||
threads[j].join();
|
||||
|
||||
// One more compile, to make sure everything is okay.
|
||||
@ -106,44 +122,6 @@ TEST(SingleThreaded, BuildEntireDFA) {
|
||||
re->Decref();
|
||||
}
|
||||
|
||||
// Generates and returns a string over binary alphabet {0,1} that contains
|
||||
// all possible binary sequences of length n as subsequences. The obvious
|
||||
// brute force method would generate a string of length n * 2^n, but this
|
||||
// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
|
||||
// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
|
||||
// Such a string is useful for testing a DFA. If you have a DFA
|
||||
// where distinct last n bytes implies distinct states, then running on a
|
||||
// DeBruijn string causes the DFA to need to create a new state at every
|
||||
// position in the input, never reusing any states until it gets to the
|
||||
// end of the string. This is the worst possible case for DFA execution.
|
||||
static std::string DeBruijnString(int n) {
|
||||
CHECK_LT(n, static_cast<int>(8*sizeof(int)));
|
||||
CHECK_GT(n, 0);
|
||||
|
||||
std::vector<bool> did(size_t{1}<<n);
|
||||
for (int i = 0; i < 1<<n; i++)
|
||||
did[i] = false;
|
||||
|
||||
std::string s;
|
||||
for (int i = 0; i < n-1; i++)
|
||||
s.append("0");
|
||||
int bits = 0;
|
||||
int mask = (1<<n) - 1;
|
||||
for (int i = 0; i < (1<<n); i++) {
|
||||
bits <<= 1;
|
||||
bits &= mask;
|
||||
if (!did[bits|1]) {
|
||||
bits |= 1;
|
||||
s.append("1");
|
||||
} else {
|
||||
s.append("0");
|
||||
}
|
||||
CHECK(!did[bits]);
|
||||
did[bits] = true;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// Test that the DFA gets the right result even if it runs
|
||||
// out of memory during a search. The regular expression
|
||||
// 0[01]{n}$ matches a binary string of 0s and 1s only if
|
||||
@ -166,6 +144,8 @@ TEST(SingleThreaded, SearchDFA) {
|
||||
// if it can't get a good cache hit rate.)
|
||||
// Tell the DFA to trudge along instead.
|
||||
Prog::TEST_dfa_should_bail_when_slow(false);
|
||||
state_cache_resets = 0;
|
||||
search_failures = 0;
|
||||
|
||||
// Choice of n is mostly arbitrary, except that:
|
||||
// * making n too big makes the test run for too long.
|
||||
@ -215,6 +195,8 @@ TEST(SingleThreaded, SearchDFA) {
|
||||
|
||||
// Reset to original behaviour.
|
||||
Prog::TEST_dfa_should_bail_when_slow(true);
|
||||
ASSERT_GT(state_cache_resets, 0);
|
||||
ASSERT_EQ(search_failures, 0);
|
||||
}
|
||||
|
||||
// Helper function: searches for match, which should match,
|
||||
@ -237,6 +219,8 @@ static void DoSearch(Prog* prog, const StringPiece& match,
|
||||
|
||||
TEST(Multithreaded, SearchDFA) {
|
||||
Prog::TEST_dfa_should_bail_when_slow(false);
|
||||
state_cache_resets = 0;
|
||||
search_failures = 0;
|
||||
|
||||
// Same as single-threaded test above.
|
||||
const int n = 18;
|
||||
@ -259,14 +243,14 @@ TEST(Multithreaded, SearchDFA) {
|
||||
|
||||
// Run the search simultaneously in a bunch of threads.
|
||||
// Reuse same flags for Multithreaded.BuildDFA above.
|
||||
for (int i = 0; i < FLAGS_repeat; i++) {
|
||||
for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
|
||||
Prog* prog = re->CompileToProg(1<<n);
|
||||
ASSERT_TRUE(prog != NULL);
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
for (int j = 0; j < FLAGS_threads; j++)
|
||||
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||
threads.emplace_back(DoSearch, prog, match, no_match);
|
||||
for (int j = 0; j < FLAGS_threads; j++)
|
||||
for (int j = 0; j < GetFlag(FLAGS_threads); j++)
|
||||
threads[j].join();
|
||||
|
||||
delete prog;
|
||||
@ -276,6 +260,8 @@ TEST(Multithreaded, SearchDFA) {
|
||||
|
||||
// Reset to original behaviour.
|
||||
Prog::TEST_dfa_should_bail_when_slow(true);
|
||||
ASSERT_GT(state_cache_resets, 0);
|
||||
ASSERT_EQ(search_failures, 0);
|
||||
}
|
||||
|
||||
struct ReverseTest {
|
||||
|
14
extern/re2/re2/testing/dump.cc
vendored
14
extern/re2/re2/testing/dump.cc
vendored
@ -25,9 +25,6 @@
|
||||
#include "re2/stringpiece.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
// Cause a link error if this file is used outside of testing.
|
||||
DECLARE_string(test_tmpdir);
|
||||
|
||||
namespace re2 {
|
||||
|
||||
static const char* kOpcodeNames[] = {
|
||||
@ -154,14 +151,11 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
|
||||
}
|
||||
|
||||
std::string Regexp::Dump() {
|
||||
// Make sure that we are being called from a unit test.
|
||||
// Should cause a link error if used outside of testing.
|
||||
CHECK(!::testing::TempDir().empty());
|
||||
|
||||
std::string s;
|
||||
|
||||
// Make sure being called from a unit test.
|
||||
if (FLAGS_test_tmpdir.empty()) {
|
||||
LOG(ERROR) << "Cannot use except for testing.";
|
||||
return s;
|
||||
}
|
||||
|
||||
DumpRegexpAppending(this, &s);
|
||||
return s;
|
||||
}
|
||||
|
5
extern/re2/re2/testing/exhaustive1_test.cc
vendored
5
extern/re2/re2/testing/exhaustive1_test.cc
vendored
@ -10,8 +10,6 @@
|
||||
#include "util/test.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
|
||||
DECLARE_string(regexp_engines);
|
||||
|
||||
namespace re2 {
|
||||
|
||||
// Test simple repetition operators
|
||||
@ -34,9 +32,6 @@ TEST(Repetition, Capturing) {
|
||||
"%s* %s+ %s? %s*? %s+? %s??");
|
||||
ExhaustiveTest(3, 2, Split(" ", "a (a) b"), ops,
|
||||
7, Explode("ab"), "(?:%s)", "");
|
||||
|
||||
// This would be a great test, but it runs forever when PCRE is enabled.
|
||||
if (FLAGS_regexp_engines.find("PCRE") == std::string::npos)
|
||||
ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
|
||||
50, Explode("a"), "(?:%s)", "");
|
||||
}
|
||||
|
1
extern/re2/re2/testing/exhaustive2_test.cc
vendored
1
extern/re2/re2/testing/exhaustive2_test.cc
vendored
@ -10,7 +10,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include "util/test.h"
|
||||
#include "re2/re2.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
|
||||
namespace re2 {
|
||||
|
17
extern/re2/re2/testing/exhaustive_tester.cc
vendored
17
extern/re2/re2/testing/exhaustive_tester.cc
vendored
@ -14,6 +14,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "util/test.h"
|
||||
#include "util/flags.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/strutil.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
@ -24,9 +25,9 @@
|
||||
#define LOGGING 0
|
||||
#endif
|
||||
|
||||
DEFINE_bool(show_regexps, false, "show regexps during testing");
|
||||
DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
|
||||
|
||||
DEFINE_int32(max_bad_regexp_inputs, 1,
|
||||
DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
|
||||
"Stop testing a regular expression after finding this many "
|
||||
"strings that break it.");
|
||||
|
||||
@ -62,11 +63,12 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (i > 0)
|
||||
printf(" ");
|
||||
if (m[i].begin() == NULL)
|
||||
if (m[i].data() == NULL)
|
||||
printf("-");
|
||||
else
|
||||
printf("%td-%td",
|
||||
m[i].begin() - input.begin(), m[i].end() - input.begin());
|
||||
m[i].begin() - input.begin(),
|
||||
m[i].end() - input.begin());
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,10 +78,11 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
|
||||
void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
|
||||
regexps_++;
|
||||
std::string regexp = const_regexp;
|
||||
if (!topwrapper_.empty())
|
||||
if (!topwrapper_.empty()) {
|
||||
regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
|
||||
}
|
||||
|
||||
if (FLAGS_show_regexps) {
|
||||
if (GetFlag(FLAGS_show_regexps)) {
|
||||
printf("\r%s", regexp.c_str());
|
||||
fflush(stdout);
|
||||
}
|
||||
@ -134,7 +137,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
|
||||
tests_++;
|
||||
if (!tester.TestInput(strgen_.Next())) {
|
||||
failures_++;
|
||||
if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
|
||||
if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
46
extern/re2/re2/testing/filtered_re2_test.cc
vendored
46
extern/re2/re2/testing/filtered_re2_test.cc
vendored
@ -7,6 +7,7 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "util/test.h"
|
||||
#include "util/logging.h"
|
||||
@ -291,4 +292,49 @@ TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
|
||||
"EmptyStringInStringSetBug", &v));
|
||||
}
|
||||
|
||||
TEST(FilteredRE2Test, MoveSemantics) {
|
||||
FilterTestVars v1;
|
||||
int id;
|
||||
v1.f.Add("foo\\d+", v1.opts, &id);
|
||||
EXPECT_EQ(0, id);
|
||||
v1.f.Compile(&v1.atoms);
|
||||
EXPECT_EQ(1, v1.atoms.size());
|
||||
EXPECT_EQ("foo", v1.atoms[0]);
|
||||
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(1, v1.matches.size());
|
||||
EXPECT_EQ(0, v1.matches[0]);
|
||||
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(0, v1.matches.size());
|
||||
|
||||
// The moved-to object should do what the moved-from object did.
|
||||
FilterTestVars v2;
|
||||
v2.f = std::move(v1.f);
|
||||
v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
|
||||
EXPECT_EQ(1, v2.matches.size());
|
||||
EXPECT_EQ(0, v2.matches[0]);
|
||||
v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
|
||||
EXPECT_EQ(0, v2.matches.size());
|
||||
|
||||
// The moved-from object should have been reset and be reusable.
|
||||
v1.f.Add("bar\\d+", v1.opts, &id);
|
||||
EXPECT_EQ(0, id);
|
||||
v1.f.Compile(&v1.atoms);
|
||||
EXPECT_EQ(1, v1.atoms.size());
|
||||
EXPECT_EQ("bar", v1.atoms[0]);
|
||||
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(0, v1.matches.size());
|
||||
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(1, v1.matches.size());
|
||||
EXPECT_EQ(0, v1.matches[0]);
|
||||
|
||||
// Verify that "overwriting" works and also doesn't leak memory.
|
||||
// (The latter will need a leak detector such as LeakSanitizer.)
|
||||
v1.f = std::move(v2.f);
|
||||
v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(1, v1.matches.size());
|
||||
EXPECT_EQ(0, v1.matches[0]);
|
||||
v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
|
||||
EXPECT_EQ(0, v1.matches.size());
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
9
extern/re2/re2/testing/null_walker.cc
vendored
9
extern/re2/re2/testing/null_walker.cc
vendored
@ -14,12 +14,15 @@ namespace re2 {
|
||||
class NullWalker : public Regexp::Walker<bool> {
|
||||
public:
|
||||
NullWalker() {}
|
||||
bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||
|
||||
virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
|
||||
bool* child_args, int nchild_args);
|
||||
|
||||
bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk not WalkExponential.
|
||||
virtual bool ShortVisit(Regexp* re, bool a) {
|
||||
// Should never be called: we use Walk(), not WalkExponential().
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
LOG(DFATAL) << "NullWalker::ShortVisit called";
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
|
||||
|
16
extern/re2/re2/testing/random_test.cc
vendored
16
extern/re2/re2/testing/random_test.cc
vendored
@ -9,12 +9,13 @@
|
||||
#include <vector>
|
||||
|
||||
#include "util/test.h"
|
||||
#include "util/flags.h"
|
||||
#include "re2/testing/exhaustive_tester.h"
|
||||
|
||||
DEFINE_int32(regexpseed, 404, "Random regexp seed.");
|
||||
DEFINE_int32(regexpcount, 100, "How many random regexps to generate.");
|
||||
DEFINE_int32(stringseed, 200, "Random string seed.");
|
||||
DEFINE_int32(stringcount, 100, "How many random strings to generate.");
|
||||
DEFINE_FLAG(int, regexpseed, 404, "Random regexp seed.");
|
||||
DEFINE_FLAG(int, regexpcount, 100, "How many random regexps to generate.");
|
||||
DEFINE_FLAG(int, stringseed, 200, "Random string seed.");
|
||||
DEFINE_FLAG(int, stringcount, 100, "How many random strings to generate.");
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -37,8 +38,10 @@ static void RandomTest(int maxatoms, int maxops,
|
||||
|
||||
ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
|
||||
maxstrlen, stralphabet, wrapper, "");
|
||||
t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
|
||||
t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount);
|
||||
t.RandomStrings(GetFlag(FLAGS_stringseed),
|
||||
GetFlag(FLAGS_stringcount));
|
||||
t.GenerateRandom(GetFlag(FLAGS_regexpseed),
|
||||
GetFlag(FLAGS_regexpcount));
|
||||
printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
|
||||
t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
|
||||
EXPECT_EQ(0, t.failures());
|
||||
@ -96,4 +99,3 @@ TEST(Random, Complicated) {
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
|
25
extern/re2/re2/testing/re2_arg_test.cc
vendored
25
extern/re2/re2/testing/re2_arg_test.cc
vendored
@ -11,6 +11,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "util/test.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/re2.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -132,4 +133,28 @@ TEST(RE2ArgTest, Uint64Test) {
|
||||
PARSE_FOR_TYPE(uint64_t, 5);
|
||||
}
|
||||
|
||||
TEST(RE2ArgTest, ParseFromTest) {
|
||||
#if !defined(_MSC_VER)
|
||||
struct {
|
||||
bool ParseFrom(const char* str, size_t n) {
|
||||
LOG(INFO) << "str = " << str << ", n = " << n;
|
||||
return true;
|
||||
}
|
||||
} obj1;
|
||||
RE2::Arg arg1(&obj1);
|
||||
EXPECT_TRUE(arg1.Parse("one", 3));
|
||||
|
||||
struct {
|
||||
bool ParseFrom(const char* str, size_t n) {
|
||||
LOG(INFO) << "str = " << str << ", n = " << n;
|
||||
return false;
|
||||
}
|
||||
// Ensure that RE2::Arg works even with overloaded ParseFrom().
|
||||
void ParseFrom(const char* str) {}
|
||||
} obj2;
|
||||
RE2::Arg arg2(&obj2);
|
||||
EXPECT_FALSE(arg2.Parse("two", 3));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
79
extern/re2/re2/testing/re2_test.cc
vendored
79
extern/re2/re2/testing/re2_test.cc
vendored
@ -12,6 +12,7 @@
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h> /* for sysconf */
|
||||
@ -223,6 +224,15 @@ TEST(RE2, Extract) {
|
||||
ASSERT_EQ(s, "'foo'");
|
||||
}
|
||||
|
||||
TEST(RE2, MaxSubmatchTooLarge) {
|
||||
std::string s;
|
||||
ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
|
||||
s = "foo";
|
||||
ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
|
||||
s = "foo";
|
||||
ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
|
||||
}
|
||||
|
||||
TEST(RE2, Consume) {
|
||||
RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
|
||||
std::string word;
|
||||
@ -473,28 +483,27 @@ TEST(ProgramFanout, BigProgram) {
|
||||
RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
|
||||
RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
|
||||
|
||||
std::map<int, int> histogram;
|
||||
std::vector<int> histogram;
|
||||
|
||||
// 3 is the largest non-empty bucket and has 1 element.
|
||||
ASSERT_EQ(3, re1.ProgramFanout(&histogram));
|
||||
ASSERT_EQ(1, histogram[3]);
|
||||
|
||||
// 7 is the largest non-empty bucket and has 10 elements.
|
||||
ASSERT_EQ(7, re10.ProgramFanout(&histogram));
|
||||
ASSERT_EQ(10, histogram[7]);
|
||||
// 6 is the largest non-empty bucket and has 10 elements.
|
||||
ASSERT_EQ(6, re10.ProgramFanout(&histogram));
|
||||
ASSERT_EQ(10, histogram[6]);
|
||||
|
||||
// 10 is the largest non-empty bucket and has 100 elements.
|
||||
ASSERT_EQ(10, re100.ProgramFanout(&histogram));
|
||||
ASSERT_EQ(100, histogram[10]);
|
||||
// 9 is the largest non-empty bucket and has 100 elements.
|
||||
ASSERT_EQ(9, re100.ProgramFanout(&histogram));
|
||||
ASSERT_EQ(100, histogram[9]);
|
||||
|
||||
// 13 is the largest non-empty bucket and has 1000 elements.
|
||||
ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
|
||||
ASSERT_EQ(1000, histogram[13]);
|
||||
|
||||
// 2 is the largest non-empty bucket and has 3 elements.
|
||||
// This differs from the others due to how reverse `.' works.
|
||||
// 2 is the largest non-empty bucket and has 1 element.
|
||||
ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
|
||||
ASSERT_EQ(3, histogram[2]);
|
||||
ASSERT_EQ(1, histogram[2]);
|
||||
|
||||
// 5 is the largest non-empty bucket and has 10 elements.
|
||||
ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
|
||||
@ -1232,11 +1241,10 @@ TEST(RE2, DeepRecursion) {
|
||||
// Suggested by Josh Hyman. Failed when SearchOnePass was
|
||||
// not implementing case-folding.
|
||||
TEST(CaseInsensitive, MatchAndConsume) {
|
||||
std::string result;
|
||||
std::string text = "A fish named *Wanda*";
|
||||
StringPiece sp(text);
|
||||
|
||||
EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
|
||||
StringPiece result;
|
||||
EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
|
||||
EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
|
||||
}
|
||||
|
||||
@ -1269,38 +1277,43 @@ TEST(RE2, CL8622304) {
|
||||
EXPECT_EQ(val, "1,0x2F,030,4,5");
|
||||
}
|
||||
|
||||
|
||||
// Check that RE2 returns correct regexp pieces on error.
|
||||
// In particular, make sure it returns whole runes
|
||||
// and that it always reports invalid UTF-8.
|
||||
// Also check that Perl error flag piece is big enough.
|
||||
static struct ErrorTest {
|
||||
const char *regexp;
|
||||
const char *error;
|
||||
RE2::ErrorCode error_code;
|
||||
const char *error_arg;
|
||||
} error_tests[] = {
|
||||
{ "ab\\αcd", "\\α" },
|
||||
{ "ef\\x☺01", "\\x☺0" },
|
||||
{ "gh\\x1☺01", "\\x1☺" },
|
||||
{ "ij\\x1", "\\x1" },
|
||||
{ "kl\\x", "\\x" },
|
||||
{ "uv\\x{0000☺}", "\\x{0000☺" },
|
||||
{ "wx\\p{ABC", "\\p{ABC" },
|
||||
{ "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
|
||||
{ "aa(?sm☺i", "(?sm☺" },
|
||||
{ "bb[abc", "[abc" },
|
||||
{ "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
|
||||
{ "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
|
||||
{ "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
|
||||
{ "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
|
||||
{ "kl\\x", RE2::ErrorBadEscape, "\\x" },
|
||||
{ "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
|
||||
{ "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
|
||||
// used to return (?s but the error is X
|
||||
{ "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
|
||||
{ "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
|
||||
{ "bb[abc", RE2::ErrorMissingBracket, "[abc" },
|
||||
{ "abc(def", RE2::ErrorMissingParen, "abc(def" },
|
||||
{ "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
|
||||
|
||||
{ "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
|
||||
{ "op\377qr", "" },
|
||||
{ "st\\x{00000\377", "" },
|
||||
{ "zz\\p{\377}", "" },
|
||||
{ "zz\\x{00\377}", "" },
|
||||
{ "zz(?P<name\377>abc)", "" },
|
||||
// no argument string returned for invalid UTF-8
|
||||
{ "mn\\x1\377", RE2::ErrorBadUTF8, "" },
|
||||
{ "op\377qr", RE2::ErrorBadUTF8, "" },
|
||||
{ "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
|
||||
{ "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
|
||||
{ "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
|
||||
{ "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
|
||||
};
|
||||
TEST(RE2, ErrorArgs) {
|
||||
TEST(RE2, ErrorCodeAndArg) {
|
||||
for (size_t i = 0; i < arraysize(error_tests); i++) {
|
||||
RE2 re(error_tests[i].regexp, RE2::Quiet);
|
||||
EXPECT_FALSE(re.ok());
|
||||
EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
|
||||
EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
|
||||
EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
|
||||
}
|
||||
}
|
||||
|
||||
|
1385
extern/re2/re2/testing/regexp_benchmark.cc
vendored
1385
extern/re2/re2/testing/regexp_benchmark.cc
vendored
File diff suppressed because it is too large
Load Diff
12
extern/re2/re2/testing/regexp_generator.cc
vendored
12
extern/re2/re2/testing/regexp_generator.cc
vendored
@ -241,7 +241,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
|
||||
std::vector<std::string> Explode(const StringPiece& s) {
|
||||
std::vector<std::string> v;
|
||||
|
||||
for (const char *q = s.begin(); q < s.end(); ) {
|
||||
for (const char *q = s.data(); q < s.data() + s.size(); ) {
|
||||
const char* p = q;
|
||||
Rune r;
|
||||
q += chartorune(&r, q);
|
||||
@ -256,11 +256,11 @@ std::vector<std::string> Explode(const StringPiece& s) {
|
||||
std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
|
||||
std::vector<std::string> v;
|
||||
|
||||
if (sep.size() == 0)
|
||||
if (sep.empty())
|
||||
return Explode(s);
|
||||
|
||||
const char *p = s.begin();
|
||||
for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
|
||||
const char *p = s.data();
|
||||
for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) {
|
||||
if (StringPiece(q, sep.size()) == sep) {
|
||||
v.push_back(std::string(p, q - p));
|
||||
p = q + sep.size();
|
||||
@ -268,8 +268,8 @@ std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (p < s.end())
|
||||
v.push_back(std::string(p, s.end() - p));
|
||||
if (p < s.data() + s.size())
|
||||
v.push_back(std::string(p, s.data() + s.size() - p));
|
||||
return v;
|
||||
}
|
||||
|
||||
|
95
extern/re2/re2/testing/required_prefix_test.cc
vendored
95
extern/re2/re2/testing/required_prefix_test.cc
vendored
@ -6,6 +6,7 @@
|
||||
|
||||
#include "util/test.h"
|
||||
#include "util/logging.h"
|
||||
#include "re2/prog.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
namespace re2 {
|
||||
@ -19,15 +20,18 @@ struct PrefixTest {
|
||||
};
|
||||
|
||||
static PrefixTest tests[] = {
|
||||
// If the regexp is missing a ^, there's no required prefix.
|
||||
{ "abc", false },
|
||||
// Empty cases.
|
||||
{ "", false },
|
||||
{ "(?m)^", false },
|
||||
{ "(?-m)^", false },
|
||||
|
||||
// If the regexp has no ^, there's no required prefix.
|
||||
{ "abc", false },
|
||||
|
||||
// If the regexp immediately goes into
|
||||
// something not a literal match, there's no required prefix.
|
||||
{ "^(abc)", false },
|
||||
{ "^a*", false },
|
||||
{ "^(abc)", false },
|
||||
|
||||
// Otherwise, it should work.
|
||||
{ "^abc$", true, "abc", false, "(?-m:$)" },
|
||||
@ -53,15 +57,15 @@ TEST(RequiredPrefix, SimpleTests) {
|
||||
bool f;
|
||||
Regexp* s;
|
||||
ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf")
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
|
||||
<< " " << re->Dump();
|
||||
if (t.return_value) {
|
||||
ASSERT_EQ(p, std::string(t.prefix))
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||
ASSERT_EQ(f, t.foldcase)
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||
ASSERT_EQ(s->ToString(), std::string(t.suffix))
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||
s->Decref();
|
||||
}
|
||||
re->Decref();
|
||||
@ -69,4 +73,81 @@ TEST(RequiredPrefix, SimpleTests) {
|
||||
}
|
||||
}
|
||||
|
||||
static PrefixTest for_accel_tests[] = {
|
||||
// Empty cases.
|
||||
{ "", false },
|
||||
{ "(?m)^", false },
|
||||
{ "(?-m)^", false },
|
||||
|
||||
// If the regexp has a ^, there's no required prefix.
|
||||
{ "^abc", false },
|
||||
|
||||
// If the regexp immediately goes into
|
||||
// something not a literal match, there's no required prefix.
|
||||
{ "a*", false },
|
||||
|
||||
// Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
|
||||
// capturing groups, but doesn't try to glue prefix fragments together.
|
||||
{ "(a?)def", false },
|
||||
{ "(ab?)def", true, "a", false },
|
||||
{ "(abc?)def", true, "ab", false },
|
||||
{ "(()a)def", false },
|
||||
{ "((a)b)def", true, "a", false },
|
||||
{ "((ab)c)def", true, "ab", false },
|
||||
|
||||
// Otherwise, it should work.
|
||||
{ "abc$", true, "abc", false },
|
||||
{ "abc", true, "abc", false },
|
||||
{ "(?i)abc", true, "abc", true },
|
||||
{ "abcd*", true, "abc", false },
|
||||
{ "[Aa][Bb]cd*", true, "ab", true },
|
||||
{ "ab[Cc]d*", true, "ab", false },
|
||||
{ "☺abc", true, "☺abc", false },
|
||||
};
|
||||
|
||||
TEST(RequiredPrefixForAccel, SimpleTests) {
|
||||
for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
|
||||
const PrefixTest& t = for_accel_tests[i];
|
||||
for (size_t j = 0; j < 2; j++) {
|
||||
Regexp::ParseFlags flags = Regexp::LikePerl;
|
||||
if (j == 0)
|
||||
flags = flags | Regexp::Latin1;
|
||||
Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
|
||||
ASSERT_TRUE(re != NULL) << " " << t.regexp;
|
||||
|
||||
std::string p;
|
||||
bool f;
|
||||
ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
|
||||
<< " " << re->Dump();
|
||||
if (t.return_value) {
|
||||
ASSERT_EQ(p, std::string(t.prefix))
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||
ASSERT_EQ(f, t.foldcase)
|
||||
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
||||
}
|
||||
re->Decref();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(PrefixAccel, BasicTest) {
|
||||
Regexp* re = Regexp::Parse("abc\\d+", Regexp::LikePerl, NULL);
|
||||
ASSERT_TRUE(re != NULL);
|
||||
Prog* prog = re->CompileToProg(0);
|
||||
ASSERT_TRUE(prog != NULL);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
std::string text(i, 'a');
|
||||
const char* p = reinterpret_cast<const char*>(
|
||||
prog->PrefixAccel(text.data(), text.size()));
|
||||
EXPECT_TRUE(p == NULL);
|
||||
text.append("abc");
|
||||
p = reinterpret_cast<const char*>(
|
||||
prog->PrefixAccel(text.data(), text.size()));
|
||||
EXPECT_EQ(i, p-text.data());
|
||||
}
|
||||
delete prog;
|
||||
re->Decref();
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
26
extern/re2/re2/testing/set_test.cc
vendored
26
extern/re2/re2/testing/set_test.cc
vendored
@ -5,6 +5,7 @@
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "util/test.h"
|
||||
#include "util/logging.h"
|
||||
@ -201,4 +202,29 @@ TEST(Set, Prefix) {
|
||||
ASSERT_EQ(v[0], 0);
|
||||
}
|
||||
|
||||
TEST(Set, MoveSemantics) {
|
||||
RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED);
|
||||
ASSERT_EQ(s1.Add("foo\\d+", NULL), 0);
|
||||
ASSERT_EQ(s1.Compile(), true);
|
||||
ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
|
||||
ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
|
||||
|
||||
// The moved-to object should do what the moved-from object did.
|
||||
RE2::Set s2 = std::move(s1);
|
||||
ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true);
|
||||
ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false);
|
||||
|
||||
// The moved-from object should have been reset and be reusable.
|
||||
ASSERT_EQ(s1.Add("bar\\d+", NULL), 0);
|
||||
ASSERT_EQ(s1.Compile(), true);
|
||||
ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false);
|
||||
ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true);
|
||||
|
||||
// Verify that "overwriting" works and also doesn't leak memory.
|
||||
// (The latter will need a leak detector such as LeakSanitizer.)
|
||||
s1 = std::move(s2);
|
||||
ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
|
||||
ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
27
extern/re2/re2/testing/string_generator.cc
vendored
27
extern/re2/re2/testing/string_generator.cc
vendored
@ -111,4 +111,31 @@ void StringGenerator::GenerateNULL() {
|
||||
hasnext_ = true;
|
||||
}
|
||||
|
||||
std::string DeBruijnString(int n) {
|
||||
CHECK_GE(n, 1);
|
||||
CHECK_LE(n, 29);
|
||||
const size_t size = size_t{1} << static_cast<size_t>(n);
|
||||
const size_t mask = size - 1;
|
||||
std::vector<bool> did(size, false);
|
||||
std::string s;
|
||||
s.reserve(static_cast<size_t>(n) + size);
|
||||
for (size_t i = 0; i < static_cast<size_t>(n - 1); i++)
|
||||
s += '0';
|
||||
size_t bits = 0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
bits <<= 1;
|
||||
bits &= mask;
|
||||
if (!did[bits | 1]) {
|
||||
bits |= 1;
|
||||
s += '1';
|
||||
} else {
|
||||
s += '0';
|
||||
}
|
||||
CHECK(!did[bits]);
|
||||
did[bits] = true;
|
||||
}
|
||||
CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
13
extern/re2/re2/testing/string_generator.h
vendored
13
extern/re2/re2/testing/string_generator.h
vendored
@ -58,6 +58,19 @@ class StringGenerator {
|
||||
StringGenerator& operator=(const StringGenerator&) = delete;
|
||||
};
|
||||
|
||||
// Generates and returns a string over binary alphabet {0,1} that contains
|
||||
// all possible binary sequences of length n as subsequences. The obvious
|
||||
// brute force method would generate a string of length n * 2^n, but this
|
||||
// generates a string of length n-1 + 2^n called a De Bruijn cycle.
|
||||
// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
|
||||
//
|
||||
// Such a string is useful for testing a DFA. If you have a DFA
|
||||
// where distinct last n bytes implies distinct states, then running on a
|
||||
// DeBruijn string causes the DFA to need to create a new state at every
|
||||
// position in the input, never reusing any states until it gets to the
|
||||
// end of the string. This is the worst possible case for DFA execution.
|
||||
std::string DeBruijnString(int n);
|
||||
|
||||
} // namespace re2
|
||||
|
||||
#endif // RE2_TESTING_STRING_GENERATOR_H_
|
||||
|
58
extern/re2/re2/testing/tester.cc
vendored
58
extern/re2/re2/testing/tester.cc
vendored
@ -18,14 +18,15 @@
|
||||
#include "re2/re2.h"
|
||||
#include "re2/regexp.h"
|
||||
|
||||
DEFINE_bool(dump_prog, false, "dump regexp program");
|
||||
DEFINE_bool(log_okay, false, "log successful runs");
|
||||
DEFINE_bool(dump_rprog, false, "dump reversed regexp program");
|
||||
DEFINE_FLAG(bool, dump_prog, false, "dump regexp program");
|
||||
DEFINE_FLAG(bool, log_okay, false, "log successful runs");
|
||||
DEFINE_FLAG(bool, dump_rprog, false, "dump reversed regexp program");
|
||||
|
||||
DEFINE_int32(max_regexp_failures, 100,
|
||||
DEFINE_FLAG(int, max_regexp_failures, 100,
|
||||
"maximum number of regexp test failures (-1 = unlimited)");
|
||||
|
||||
DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test");
|
||||
DEFINE_FLAG(std::string, regexp_engines, "",
|
||||
"pattern to select regexp engines to test");
|
||||
|
||||
namespace re2 {
|
||||
|
||||
@ -62,11 +63,11 @@ static uint32_t Engines() {
|
||||
if (did_parse)
|
||||
return cached_engines;
|
||||
|
||||
if (FLAGS_regexp_engines.empty()) {
|
||||
if (GetFlag(FLAGS_regexp_engines).empty()) {
|
||||
cached_engines = ~0;
|
||||
} else {
|
||||
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
|
||||
if (FLAGS_regexp_engines.find(EngineName(i)) != std::string::npos)
|
||||
if (GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos)
|
||||
cached_engines |= 1<<i;
|
||||
}
|
||||
|
||||
@ -85,6 +86,20 @@ static uint32_t Engines() {
|
||||
|
||||
// The result of running a match.
|
||||
struct TestInstance::Result {
|
||||
Result()
|
||||
: skipped(false),
|
||||
matched(false),
|
||||
untrusted(false),
|
||||
have_submatch(false),
|
||||
have_submatch0(false) {
|
||||
ClearSubmatch();
|
||||
}
|
||||
|
||||
void ClearSubmatch() {
|
||||
for (int i = 0; i < kMaxSubmatch; i++)
|
||||
submatch[i] = StringPiece();
|
||||
}
|
||||
|
||||
bool skipped; // test skipped: wasn't applicable
|
||||
bool matched; // found a match
|
||||
bool untrusted; // don't really trust the answer
|
||||
@ -99,10 +114,11 @@ typedef TestInstance::Result Result;
|
||||
// where a and b are the starting and ending offsets of s in text.
|
||||
static std::string FormatCapture(const StringPiece& text,
|
||||
const StringPiece& s) {
|
||||
if (s.begin() == NULL)
|
||||
if (s.data() == NULL)
|
||||
return "(?,?)";
|
||||
return StringPrintf("(%td,%td)",
|
||||
s.begin() - text.begin(), s.end() - text.begin());
|
||||
s.begin() - text.begin(),
|
||||
s.end() - text.begin());
|
||||
}
|
||||
|
||||
// Returns whether text contains non-ASCII (>= 0x80) bytes.
|
||||
@ -198,7 +214,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
|
||||
error_ = true;
|
||||
return;
|
||||
}
|
||||
if (FLAGS_dump_prog) {
|
||||
if (GetFlag(FLAGS_dump_prog)) {
|
||||
LOG(INFO) << "Prog for "
|
||||
<< " regexp "
|
||||
<< CEscape(regexp_str_)
|
||||
@ -216,7 +232,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
|
||||
error_ = true;
|
||||
return;
|
||||
}
|
||||
if (FLAGS_dump_rprog)
|
||||
if (GetFlag(FLAGS_dump_rprog))
|
||||
LOG(INFO) << rprog_->Dump();
|
||||
}
|
||||
|
||||
@ -290,9 +306,6 @@ void TestInstance::RunSearch(Engine type,
|
||||
const StringPiece& orig_context,
|
||||
Prog::Anchor anchor,
|
||||
Result* result) {
|
||||
// Result is not trivial, so we cannot freely clear it with memset(3),
|
||||
// but zeroing objects like so is safe and expedient for our purposes.
|
||||
memset(reinterpret_cast<void*>(result), 0, sizeof *result);
|
||||
if (regexp_ == NULL) {
|
||||
result->skipped = true;
|
||||
return;
|
||||
@ -476,7 +489,7 @@ void TestInstance::RunSearch(Engine type,
|
||||
}
|
||||
|
||||
if (!result->matched)
|
||||
memset(result->submatch, 0, sizeof result->submatch);
|
||||
result->ClearSubmatch();
|
||||
}
|
||||
|
||||
// Checks whether r is okay given that correct is the right answer.
|
||||
@ -489,7 +502,7 @@ static bool ResultOkay(const Result& r, const Result& correct) {
|
||||
return false;
|
||||
if (r.have_submatch || r.have_submatch0) {
|
||||
for (int i = 0; i < kMaxSubmatch; i++) {
|
||||
if (correct.submatch[i].begin() != r.submatch[i].begin() ||
|
||||
if (correct.submatch[i].data() != r.submatch[i].data() ||
|
||||
correct.submatch[i].size() != r.submatch[i].size())
|
||||
return false;
|
||||
if (!r.have_submatch)
|
||||
@ -528,7 +541,7 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
|
||||
Result r;
|
||||
RunSearch(i, text, context, anchor, &r);
|
||||
if (ResultOkay(r, correct)) {
|
||||
if (FLAGS_log_okay)
|
||||
if (GetFlag(FLAGS_log_okay))
|
||||
LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
|
||||
continue;
|
||||
}
|
||||
@ -555,8 +568,8 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 1+num_captures_; i++) {
|
||||
if (r.submatch[i].begin() != correct.submatch[i].begin() ||
|
||||
r.submatch[i].end() != correct.submatch[i].end()) {
|
||||
if (r.submatch[i].data() != correct.submatch[i].data() ||
|
||||
r.submatch[i].size() != correct.submatch[i].size()) {
|
||||
LOG(INFO) <<
|
||||
StringPrintf(" $%d: should be %s is %s",
|
||||
i,
|
||||
@ -571,7 +584,10 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
|
||||
}
|
||||
|
||||
if (!all_okay) {
|
||||
if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0)
|
||||
// This will be initialised once (after flags have been initialised)
|
||||
// and that is desirable because we want to enforce a global limit.
|
||||
static int max_regexp_failures = GetFlag(FLAGS_max_regexp_failures);
|
||||
if (max_regexp_failures > 0 && --max_regexp_failures == 0)
|
||||
LOG(QFATAL) << "Too many regexp failures.";
|
||||
}
|
||||
|
||||
@ -640,7 +656,7 @@ static Prog::Anchor anchors[] = {
|
||||
|
||||
bool Tester::TestInput(const StringPiece& text) {
|
||||
bool okay = TestInputInContext(text, text);
|
||||
if (text.size() > 0) {
|
||||
if (!text.empty()) {
|
||||
StringPiece sp;
|
||||
sp = text;
|
||||
sp.remove_prefix(1);
|
||||
|
2
extern/re2/re2/unicode.py
vendored
2
extern/re2/re2/unicode.py
vendored
@ -13,7 +13,7 @@ import re
|
||||
from six.moves import urllib
|
||||
|
||||
# Directory or URL where Unicode tables reside.
|
||||
_UNICODE_DIR = "https://www.unicode.org/Public/12.1.0/ucd"
|
||||
_UNICODE_DIR = "https://www.unicode.org/Public/13.0.0/ucd"
|
||||
|
||||
# Largest valid Unicode code value.
|
||||
_RUNE_MAX = 0x10FFFF
|
||||
|
12
extern/re2/re2/unicode_casefold.cc
vendored
12
extern/re2/re2/unicode_casefold.cc
vendored
@ -7,7 +7,7 @@
|
||||
namespace re2 {
|
||||
|
||||
|
||||
// 1381 groups, 2792 pairs, 356 ranges
|
||||
// 1384 groups, 2798 pairs, 358 ranges
|
||||
const CaseFold unicode_casefold[] = {
|
||||
{ 65, 90, 32 },
|
||||
{ 97, 106, -32 },
|
||||
@ -349,6 +349,8 @@ const CaseFold unicode_casefold[] = {
|
||||
{ 42948, 42948, -48 },
|
||||
{ 42949, 42949, -42307 },
|
||||
{ 42950, 42950, -35384 },
|
||||
{ 42951, 42954, OddEven },
|
||||
{ 42997, 42998, OddEven },
|
||||
{ 43859, 43859, -928 },
|
||||
{ 43888, 43967, -38864 },
|
||||
{ 65313, 65338, 32 },
|
||||
@ -366,9 +368,9 @@ const CaseFold unicode_casefold[] = {
|
||||
{ 125184, 125217, 34 },
|
||||
{ 125218, 125251, -34 },
|
||||
};
|
||||
const int num_unicode_casefold = 356;
|
||||
const int num_unicode_casefold = 358;
|
||||
|
||||
// 1381 groups, 1411 pairs, 198 ranges
|
||||
// 1384 groups, 1414 pairs, 200 ranges
|
||||
const CaseFold unicode_tolower[] = {
|
||||
{ 65, 90, 32 },
|
||||
{ 181, 181, 775 },
|
||||
@ -560,6 +562,8 @@ const CaseFold unicode_tolower[] = {
|
||||
{ 42948, 42948, -48 },
|
||||
{ 42949, 42949, -42307 },
|
||||
{ 42950, 42950, -35384 },
|
||||
{ 42951, 42953, OddEvenSkip },
|
||||
{ 42997, 42997, OddEven },
|
||||
{ 43888, 43967, -38864 },
|
||||
{ 65313, 65338, 32 },
|
||||
{ 66560, 66599, 40 },
|
||||
@ -569,7 +573,7 @@ const CaseFold unicode_tolower[] = {
|
||||
{ 93760, 93791, 32 },
|
||||
{ 125184, 125217, 34 },
|
||||
};
|
||||
const int num_unicode_tolower = 198;
|
||||
const int num_unicode_tolower = 200;
|
||||
|
||||
|
||||
|
||||
|
361
extern/re2/re2/unicode_groups.cc
vendored
361
extern/re2/re2/unicode_groups.cc
vendored
File diff suppressed because it is too large
Load Diff
28
extern/re2/re2/walker-inl.h
vendored
28
extern/re2/re2/walker-inl.h
vendored
@ -89,7 +89,7 @@ template<typename T> class Regexp::Walker {
|
||||
|
||||
private:
|
||||
// Walk state for the entire traversal.
|
||||
std::stack<WalkState<T> >* stack_;
|
||||
std::stack<WalkState<T>> stack_;
|
||||
bool stopped_early_;
|
||||
int max_visits_;
|
||||
|
||||
@ -119,7 +119,7 @@ template<typename T> T Regexp::Walker<T>::Copy(T arg) {
|
||||
|
||||
// State about a single level in the traversal.
|
||||
template<typename T> struct WalkState {
|
||||
WalkState<T>(Regexp* re, T parent)
|
||||
WalkState(Regexp* re, T parent)
|
||||
: re(re),
|
||||
n(-1),
|
||||
parent_arg(parent),
|
||||
@ -134,24 +134,22 @@ template<typename T> struct WalkState {
|
||||
};
|
||||
|
||||
template<typename T> Regexp::Walker<T>::Walker() {
|
||||
stack_ = new std::stack<WalkState<T> >;
|
||||
stopped_early_ = false;
|
||||
}
|
||||
|
||||
template<typename T> Regexp::Walker<T>::~Walker() {
|
||||
Reset();
|
||||
delete stack_;
|
||||
}
|
||||
|
||||
// Clears the stack. Should never be necessary, since
|
||||
// Walk always enters and exits with an empty stack.
|
||||
// Logs DFATAL if stack is not already clear.
|
||||
template<typename T> void Regexp::Walker<T>::Reset() {
|
||||
if (stack_ && stack_->size() > 0) {
|
||||
if (!stack_.empty()) {
|
||||
LOG(DFATAL) << "Stack not empty.";
|
||||
while (stack_->size() > 0) {
|
||||
delete stack_->top().child_args;
|
||||
stack_->pop();
|
||||
while (!stack_.empty()) {
|
||||
delete[] stack_.top().child_args;
|
||||
stack_.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -165,12 +163,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
||||
return top_arg;
|
||||
}
|
||||
|
||||
stack_->push(WalkState<T>(re, top_arg));
|
||||
stack_.push(WalkState<T>(re, top_arg));
|
||||
|
||||
WalkState<T>* s;
|
||||
for (;;) {
|
||||
T t;
|
||||
s = &stack_->top();
|
||||
s = &stack_.top();
|
||||
Regexp* re = s->re;
|
||||
switch (s->n) {
|
||||
case -1: {
|
||||
@ -201,7 +199,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
||||
s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
|
||||
s->n++;
|
||||
} else {
|
||||
stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
|
||||
stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -214,12 +212,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
||||
}
|
||||
}
|
||||
|
||||
// We've finished stack_->top().
|
||||
// We've finished stack_.top().
|
||||
// Update next guy down.
|
||||
stack_->pop();
|
||||
if (stack_->size() == 0)
|
||||
stack_.pop();
|
||||
if (stack_.empty())
|
||||
return t;
|
||||
s = &stack_->top();
|
||||
s = &stack_.top();
|
||||
if (s->child_args != NULL)
|
||||
s->child_args[s->n] = t;
|
||||
else
|
||||
|
12
extern/re2/re2_test.bzl
vendored
12
extern/re2/re2_test.bzl
vendored
@ -1,12 +0,0 @@
|
||||
# Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# Defines a Bazel macro that instantiates a native cc_test rule for an RE2 test.
|
||||
def re2_test(name, deps=[], size="medium"):
|
||||
native.cc_test(
|
||||
name=name,
|
||||
srcs=["re2/testing/%s.cc" % (name)],
|
||||
deps=[":test"] + deps,
|
||||
size=size,
|
||||
)
|
0
extern/re2/runtests
vendored
Normal file → Executable file
0
extern/re2/runtests
vendored
Normal file → Executable file
11
extern/re2/testinstall.cc
vendored
11
extern/re2/testinstall.cc
vendored
@ -2,11 +2,11 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <re2/re2.h>
|
||||
#include <re2/filtered_re2.h>
|
||||
#include <stdio.h>
|
||||
#include <re2/filtered_re2.h>
|
||||
#include <re2/re2.h>
|
||||
|
||||
int main(void) {
|
||||
int main() {
|
||||
re2::FilteredRE2 f;
|
||||
int id;
|
||||
f.Add("a.*b.*c", RE2::DefaultOptions, &id);
|
||||
@ -15,10 +15,13 @@ int main(void) {
|
||||
std::vector<int> ids;
|
||||
f.FirstMatch("abbccc", ids);
|
||||
|
||||
if(RE2::FullMatch("axbyc", "a.*b.*c")) {
|
||||
int n;
|
||||
if (RE2::FullMatch("axbyc", "a.*b.*c") &&
|
||||
RE2::PartialMatch("foo123bar", "(\\d+)", &n) && n == 123) {
|
||||
printf("PASS\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
printf("FAIL\n");
|
||||
return 2;
|
||||
}
|
||||
|
156
extern/re2/util/benchmark.cc
vendored
156
extern/re2/util/benchmark.cc
vendored
@ -7,119 +7,87 @@
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#include "util/util.h"
|
||||
#include "util/flags.h"
|
||||
#include "util/benchmark.h"
|
||||
#include "util/flags.h"
|
||||
#include "re2/re2.h"
|
||||
|
||||
DEFINE_string(test_tmpdir, "/var/tmp", "temp directory");
|
||||
|
||||
#ifdef _WIN32
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
using testing::Benchmark;
|
||||
using ::testing::Benchmark;
|
||||
|
||||
static Benchmark* benchmarks[10000];
|
||||
static int nbenchmarks;
|
||||
|
||||
void Benchmark::Register() {
|
||||
benchmarks[nbenchmarks] = this;
|
||||
if(lo < 1)
|
||||
lo = 1;
|
||||
if(hi < lo)
|
||||
hi = lo;
|
||||
nbenchmarks++;
|
||||
lo_ = std::max(1, lo_);
|
||||
hi_ = std::max(lo_, hi_);
|
||||
benchmarks[nbenchmarks++] = this;
|
||||
}
|
||||
|
||||
static int64_t nsec() {
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::steady_clock::now().time_since_epoch()).count();
|
||||
std::chrono::steady_clock::now().time_since_epoch())
|
||||
.count();
|
||||
}
|
||||
|
||||
static int64_t bytes;
|
||||
static int64_t ns;
|
||||
static int64_t t0;
|
||||
static int64_t ns;
|
||||
static int64_t bytes;
|
||||
static int64_t items;
|
||||
|
||||
void SetBenchmarkBytesProcessed(int64_t x) {
|
||||
bytes = x;
|
||||
void StartBenchmarkTiming() {
|
||||
if (t0 == 0) {
|
||||
t0 = nsec();
|
||||
}
|
||||
}
|
||||
|
||||
void StopBenchmarkTiming() {
|
||||
if(t0 != 0)
|
||||
if (t0 != 0) {
|
||||
ns += nsec() - t0;
|
||||
t0 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void StartBenchmarkTiming() {
|
||||
if(t0 == 0)
|
||||
void SetBenchmarkBytesProcessed(int64_t b) { bytes = b; }
|
||||
|
||||
void SetBenchmarkItemsProcessed(int64_t i) { items = i; }
|
||||
|
||||
static void RunFunc(Benchmark* b, int iters, int arg) {
|
||||
t0 = nsec();
|
||||
}
|
||||
|
||||
void SetBenchmarkItemsProcessed(int n) {
|
||||
items = n;
|
||||
}
|
||||
|
||||
void BenchmarkMemoryUsage() {
|
||||
// TODO(rsc): Implement.
|
||||
}
|
||||
|
||||
int NumCPUs() {
|
||||
return static_cast<int>(std::thread::hardware_concurrency());
|
||||
}
|
||||
|
||||
static void runN(Benchmark *b, int n, int siz) {
|
||||
ns = 0;
|
||||
bytes = 0;
|
||||
items = 0;
|
||||
ns = 0;
|
||||
t0 = nsec();
|
||||
if(b->fn)
|
||||
b->fn(n);
|
||||
else if(b->fnr)
|
||||
b->fnr(n, siz);
|
||||
else {
|
||||
fprintf(stderr, "%s: missing function\n", b->name);
|
||||
abort();
|
||||
}
|
||||
if(t0 != 0)
|
||||
ns += nsec() - t0;
|
||||
b->func()(iters, arg);
|
||||
StopBenchmarkTiming();
|
||||
}
|
||||
|
||||
static int round(int n) {
|
||||
int base = 1;
|
||||
|
||||
while(base*10 < n)
|
||||
base *= 10;
|
||||
if(n < 2*base)
|
||||
return 2*base;
|
||||
if(n < 5*base)
|
||||
return 5*base;
|
||||
while (base * 10 < n) base *= 10;
|
||||
if (n < 2 * base) return 2 * base;
|
||||
if (n < 5 * base) return 5 * base;
|
||||
return 10 * base;
|
||||
}
|
||||
|
||||
void RunBench(Benchmark* b, int nthread, int siz) {
|
||||
int n, last;
|
||||
static void RunBench(Benchmark* b, int arg) {
|
||||
int iters, last;
|
||||
|
||||
// TODO(rsc): Threaded benchmarks.
|
||||
if(nthread != 1)
|
||||
return;
|
||||
|
||||
// run once in case it's expensive
|
||||
n = 1;
|
||||
runN(b, n, siz);
|
||||
while(ns < (int)1e9 && n < (int)1e9) {
|
||||
last = n;
|
||||
if(ns/n == 0)
|
||||
n = (int)1e9;
|
||||
else
|
||||
n = (int)1e9 / static_cast<int>(ns/n);
|
||||
|
||||
n = std::max(last+1, std::min(n+n/2, 100*last));
|
||||
n = round(n);
|
||||
runN(b, n, siz);
|
||||
// Run once just in case it's expensive.
|
||||
iters = 1;
|
||||
RunFunc(b, iters, arg);
|
||||
while (ns < (int)1e9 && iters < (int)1e9) {
|
||||
last = iters;
|
||||
if (ns / iters == 0) {
|
||||
iters = (int)1e9;
|
||||
} else {
|
||||
iters = (int)1e9 / static_cast<int>(ns / iters);
|
||||
}
|
||||
iters = std::max(last + 1, std::min(iters + iters / 2, 100 * last));
|
||||
iters = round(iters);
|
||||
RunFunc(b, iters, arg);
|
||||
}
|
||||
|
||||
char mb[100];
|
||||
@ -127,35 +95,37 @@ void RunBench(Benchmark* b, int nthread, int siz) {
|
||||
mb[0] = '\0';
|
||||
suf[0] = '\0';
|
||||
if (ns > 0 && bytes > 0)
|
||||
snprintf(mb, sizeof mb, "\t%7.2f MB/s", ((double)bytes/1e6)/((double)ns/1e9));
|
||||
if(b->fnr || b->lo != b->hi) {
|
||||
if(siz >= (1<<20))
|
||||
snprintf(suf, sizeof suf, "/%dM", siz/(1<<20));
|
||||
else if(siz >= (1<<10))
|
||||
snprintf(suf, sizeof suf, "/%dK", siz/(1<<10));
|
||||
else
|
||||
snprintf(suf, sizeof suf, "/%d", siz);
|
||||
snprintf(mb, sizeof mb, "\t%7.2f MB/s",
|
||||
((double)bytes / 1e6) / ((double)ns / 1e9));
|
||||
if (b->has_arg()) {
|
||||
if (arg >= (1 << 20)) {
|
||||
snprintf(suf, sizeof suf, "/%dM", arg / (1 << 20));
|
||||
} else if (arg >= (1 << 10)) {
|
||||
snprintf(suf, sizeof suf, "/%dK", arg / (1 << 10));
|
||||
} else {
|
||||
snprintf(suf, sizeof suf, "/%d", arg);
|
||||
}
|
||||
printf("%s%s\t%8lld\t%10lld ns/op%s\n", b->name, suf, (long long)n, (long long)ns/n, mb);
|
||||
}
|
||||
printf("%s%s\t%8d\t%10lld ns/op%s\n", b->name(), suf, iters,
|
||||
(long long)ns / iters, mb);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
static int match(const char* name, int argc, const char** argv) {
|
||||
if(argc == 1)
|
||||
return 1;
|
||||
for(int i = 1; i < argc; i++)
|
||||
static bool WantBench(const char* name, int argc, const char** argv) {
|
||||
if (argc == 1) return true;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (RE2::PartialMatch(name, argv[i]))
|
||||
return 1;
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
for (int i = 0; i < nbenchmarks; i++) {
|
||||
Benchmark* b = benchmarks[i];
|
||||
if(match(b->name, argc, argv))
|
||||
for(int j = b->threadlo; j <= b->threadhi; j++)
|
||||
for(int k = std::max(b->lo, 1); k <= std::max(b->hi, 1); k<<=1)
|
||||
RunBench(b, j, k);
|
||||
if (!WantBench(b->name(), argc, argv))
|
||||
continue;
|
||||
for (int arg = b->lo(); arg <= b->hi(); arg <<= 1)
|
||||
RunBench(b, arg);
|
||||
}
|
||||
}
|
||||
|
||||
|
155
extern/re2/util/benchmark.h
vendored
155
extern/re2/util/benchmark.h
vendored
@ -6,35 +6,148 @@
|
||||
#define UTIL_BENCHMARK_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <functional>
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "util/util.h"
|
||||
|
||||
// Globals for the old benchmark API.
|
||||
void StartBenchmarkTiming();
|
||||
void StopBenchmarkTiming();
|
||||
void SetBenchmarkBytesProcessed(int64_t b);
|
||||
void SetBenchmarkItemsProcessed(int64_t i);
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
// The new benchmark API implemented as a layer over the old benchmark API.
|
||||
// (Please refer to https://github.com/google/benchmark for documentation.)
|
||||
class State {
|
||||
private:
|
||||
class Iterator {
|
||||
public:
|
||||
// Benchmark code looks like this:
|
||||
//
|
||||
// for (auto _ : state) {
|
||||
// // ...
|
||||
// }
|
||||
//
|
||||
// We try to avoid compiler warnings about such variables being unused.
|
||||
struct ATTRIBUTE_UNUSED Value {};
|
||||
|
||||
explicit Iterator(int64_t iters) : iters_(iters) {}
|
||||
|
||||
bool operator!=(const Iterator& that) const {
|
||||
if (iters_ != that.iters_) {
|
||||
return true;
|
||||
} else {
|
||||
// We are about to stop the loop, so stop timing.
|
||||
StopBenchmarkTiming();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Value operator*() const {
|
||||
return Value();
|
||||
}
|
||||
|
||||
Iterator& operator++() {
|
||||
--iters_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t iters_;
|
||||
};
|
||||
|
||||
public:
|
||||
explicit State(int64_t iters)
|
||||
: iters_(iters), arg_(0), has_arg_(false) {}
|
||||
|
||||
State(int64_t iters, int64_t arg)
|
||||
: iters_(iters), arg_(arg), has_arg_(true) {}
|
||||
|
||||
Iterator begin() {
|
||||
// We are about to start the loop, so start timing.
|
||||
StartBenchmarkTiming();
|
||||
return Iterator(iters_);
|
||||
}
|
||||
|
||||
Iterator end() {
|
||||
return Iterator(0);
|
||||
}
|
||||
|
||||
void SetBytesProcessed(int64_t b) { SetBenchmarkBytesProcessed(b); }
|
||||
void SetItemsProcessed(int64_t i) { SetBenchmarkItemsProcessed(i); }
|
||||
int64_t iterations() const { return iters_; }
|
||||
// Pretend to support multiple arguments.
|
||||
int64_t range(int pos) const { CHECK(has_arg_); return arg_; }
|
||||
|
||||
private:
|
||||
int64_t iters_;
|
||||
int64_t arg_;
|
||||
bool has_arg_;
|
||||
|
||||
State(const State&) = delete;
|
||||
State& operator=(const State&) = delete;
|
||||
};
|
||||
|
||||
} // namespace benchmark
|
||||
|
||||
namespace testing {
|
||||
struct Benchmark {
|
||||
const char* name;
|
||||
void (*fn)(int);
|
||||
void (*fnr)(int, int);
|
||||
int lo;
|
||||
int hi;
|
||||
int threadlo;
|
||||
int threadhi;
|
||||
|
||||
class Benchmark {
|
||||
public:
|
||||
Benchmark(const char* name, void (*func)(benchmark::State&))
|
||||
: name_(name),
|
||||
func_([func](int iters, int arg) {
|
||||
benchmark::State state(iters);
|
||||
func(state);
|
||||
}),
|
||||
lo_(0),
|
||||
hi_(0),
|
||||
has_arg_(false) {
|
||||
Register();
|
||||
}
|
||||
|
||||
Benchmark(const char* name, void (*func)(benchmark::State&), int lo, int hi)
|
||||
: name_(name),
|
||||
func_([func](int iters, int arg) {
|
||||
benchmark::State state(iters, arg);
|
||||
func(state);
|
||||
}),
|
||||
lo_(lo),
|
||||
hi_(hi),
|
||||
has_arg_(true) {
|
||||
Register();
|
||||
}
|
||||
|
||||
// Pretend to support multiple threads.
|
||||
Benchmark* ThreadRange(int lo, int hi) { return this; }
|
||||
|
||||
const char* name() const { return name_; }
|
||||
const std::function<void(int, int)>& func() const { return func_; }
|
||||
int lo() const { return lo_; }
|
||||
int hi() const { return hi_; }
|
||||
bool has_arg() const { return has_arg_; }
|
||||
|
||||
private:
|
||||
void Register();
|
||||
Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
|
||||
Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
|
||||
void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
|
||||
Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
|
||||
|
||||
const char* name_;
|
||||
std::function<void(int, int)> func_;
|
||||
int lo_;
|
||||
int hi_;
|
||||
bool has_arg_;
|
||||
|
||||
Benchmark(const Benchmark&) = delete;
|
||||
Benchmark& operator=(const Benchmark&) = delete;
|
||||
};
|
||||
|
||||
} // namespace testing
|
||||
|
||||
void SetBenchmarkBytesProcessed(int64_t);
|
||||
void StopBenchmarkTiming();
|
||||
void StartBenchmarkTiming();
|
||||
void BenchmarkMemoryUsage();
|
||||
void SetBenchmarkItemsProcessed(int);
|
||||
|
||||
int NumCPUs();
|
||||
|
||||
#define BENCHMARK(f) \
|
||||
::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
|
||||
::testing::Benchmark* _benchmark_##f = \
|
||||
(new ::testing::Benchmark(#f, f))
|
||||
|
||||
#define BENCHMARK_RANGE(f, lo, hi) \
|
||||
::testing::Benchmark* _benchmark_##f = \
|
||||
|
19
extern/re2/util/flags.h
vendored
19
extern/re2/util/flags.h
vendored
@ -10,20 +10,17 @@
|
||||
// If you want to do that, see
|
||||
// https://gflags.github.io/gflags/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define DEFINE_flag(type, name, deflt, desc) \
|
||||
#define DEFINE_FLAG(type, name, deflt, desc) \
|
||||
namespace re2 { type FLAGS_##name = deflt; }
|
||||
|
||||
#define DECLARE_flag(type, name) \
|
||||
#define DECLARE_FLAG(type, name) \
|
||||
namespace re2 { extern type FLAGS_##name; }
|
||||
|
||||
#define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
|
||||
#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32_t, name, deflt, desc)
|
||||
#define DEFINE_string(name, deflt, desc) DEFINE_flag(std::string, name, deflt, desc)
|
||||
|
||||
#define DECLARE_bool(name) DECLARE_flag(bool, name)
|
||||
#define DECLARE_int32(name) DECLARE_flag(int32_t, name)
|
||||
#define DECLARE_string(name) DECLARE_flag(std::string, name)
|
||||
namespace re2 {
|
||||
template <typename T>
|
||||
T GetFlag(const T& flag) {
|
||||
return flag;
|
||||
}
|
||||
} // namespace re2
|
||||
|
||||
#endif // UTIL_FLAGS_H_
|
||||
|
19
extern/re2/util/malloc_counter.h
vendored
Normal file
19
extern/re2/util/malloc_counter.h
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef UTIL_MALLOC_COUNTER_H_
|
||||
#define UTIL_MALLOC_COUNTER_H_
|
||||
|
||||
namespace testing {
|
||||
class MallocCounter {
|
||||
public:
|
||||
MallocCounter(int x) {}
|
||||
static const int THIS_THREAD_ONLY = 0;
|
||||
long long HeapGrowth() { return 0; }
|
||||
long long PeakHeapGrowth() { return 0; }
|
||||
void Reset() {}
|
||||
};
|
||||
} // namespace testing
|
||||
|
||||
#endif // UTIL_MALLOC_COUNTER_H_
|
23
extern/re2/util/mutex.h
vendored
23
extern/re2/util/mutex.h
vendored
@ -10,7 +10,13 @@
|
||||
* You should assume the locks are *not* re-entrant.
|
||||
*/
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#ifdef _WIN32
|
||||
// Requires Windows Vista or Windows Server 2008 at minimum.
|
||||
#include <windows.h>
|
||||
#if defined(WINVER) && WINVER >= 0x0600
|
||||
#define MUTEX_IS_WIN32_SRWLOCK
|
||||
#endif
|
||||
#else
|
||||
#ifndef _POSIX_C_SOURCE
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
#endif
|
||||
@ -20,7 +26,9 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MUTEX_IS_PTHREAD_RWLOCK)
|
||||
#if defined(MUTEX_IS_WIN32_SRWLOCK)
|
||||
typedef SRWLOCK MutexType;
|
||||
#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
typedef pthread_rwlock_t MutexType;
|
||||
@ -56,7 +64,16 @@ class Mutex {
|
||||
Mutex& operator=(const Mutex&) = delete;
|
||||
};
|
||||
|
||||
#if defined(MUTEX_IS_PTHREAD_RWLOCK)
|
||||
#if defined(MUTEX_IS_WIN32_SRWLOCK)
|
||||
|
||||
Mutex::Mutex() { InitializeSRWLock(&mutex_); }
|
||||
Mutex::~Mutex() { }
|
||||
void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
|
||||
void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
|
||||
void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
|
||||
void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
|
||||
|
||||
#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
|
||||
|
||||
#define SAFE_PTHREAD(fncall) \
|
||||
do { \
|
||||
|
40
extern/re2/util/pcre.cc
vendored
40
extern/re2/util/pcre.cc
vendored
@ -22,9 +22,7 @@
|
||||
#include "util/strutil.h"
|
||||
|
||||
// Silence warnings about the wacky formatting in the operator() functions.
|
||||
// Note that we test for Clang first because it defines __GNUC__ as well.
|
||||
#if defined(__clang__)
|
||||
#elif defined(__GNUC__) && __GNUC__ >= 6
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
|
||||
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
|
||||
#endif
|
||||
|
||||
@ -35,8 +33,9 @@
|
||||
// not exceed main thread stacks. Note that other threads
|
||||
// often have smaller stacks, and therefore tightening
|
||||
// regexp_stack_limit may frequently be necessary.
|
||||
DEFINE_int32(regexp_stack_limit, 256<<10, "default PCRE stack limit (bytes)");
|
||||
DEFINE_int32(regexp_match_limit, 1000000,
|
||||
DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
|
||||
"default PCRE stack limit (bytes)");
|
||||
DEFINE_FLAG(int, regexp_match_limit, 1000000,
|
||||
"default PCRE match limit (function calls)");
|
||||
|
||||
#ifndef USEPCRE
|
||||
@ -523,12 +522,12 @@ int PCRE::TryMatch(const StringPiece& text,
|
||||
|
||||
int match_limit = match_limit_;
|
||||
if (match_limit <= 0) {
|
||||
match_limit = FLAGS_regexp_match_limit;
|
||||
match_limit = GetFlag(FLAGS_regexp_match_limit);
|
||||
}
|
||||
|
||||
int stack_limit = stack_limit_;
|
||||
if (stack_limit <= 0) {
|
||||
stack_limit = FLAGS_regexp_stack_limit;
|
||||
stack_limit = GetFlag(FLAGS_regexp_stack_limit);
|
||||
}
|
||||
|
||||
pcre_extra extra = { 0 };
|
||||
@ -977,32 +976,7 @@ static bool parse_double_float(const char* str, size_t n, bool isfloat,
|
||||
} else {
|
||||
r = strtod(buf, &end);
|
||||
}
|
||||
if (end != buf + n) {
|
||||
#ifdef _WIN32
|
||||
// Microsoft's strtod() doesn't handle inf and nan, so we have to
|
||||
// handle it explicitly. Speed is not important here because this
|
||||
// code is only called in unit tests.
|
||||
bool pos = true;
|
||||
const char* i = buf;
|
||||
if ('-' == *i) {
|
||||
pos = false;
|
||||
++i;
|
||||
} else if ('+' == *i) {
|
||||
++i;
|
||||
}
|
||||
if (0 == _stricmp(i, "inf") || 0 == _stricmp(i, "infinity")) {
|
||||
r = std::numeric_limits<double>::infinity();
|
||||
if (!pos)
|
||||
r = -r;
|
||||
} else if (0 == _stricmp(i, "nan")) {
|
||||
r = std::numeric_limits<double>::quiet_NaN();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
return false; // Leftover junk
|
||||
#endif
|
||||
}
|
||||
if (end != buf + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
if (dest == NULL) return true;
|
||||
if (isfloat) {
|
||||
|
6
extern/re2/util/pcre.h
vendored
6
extern/re2/util/pcre.h
vendored
@ -555,7 +555,7 @@ class PCRE_Options {
|
||||
// Hex/Octal/Binary?
|
||||
|
||||
// Special class for parsing into objects that define a ParseFrom() method
|
||||
template <class T>
|
||||
template <typename T>
|
||||
class _PCRE_MatchObject {
|
||||
public:
|
||||
static inline bool Parse(const char* str, size_t n, void* dest) {
|
||||
@ -600,9 +600,9 @@ class PCRE::Arg {
|
||||
#undef MAKE_PARSER
|
||||
|
||||
// Generic constructor
|
||||
template <class T> Arg(T*, Parser parser);
|
||||
template <typename T> Arg(T*, Parser parser);
|
||||
// Generic constructor template
|
||||
template <class T> Arg(T* p)
|
||||
template <typename T> Arg(T* p)
|
||||
: arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user