Update re2 to version 2021-04-01.

2025-01-22 16:43:03 +01:00 · 2021-05-26 10:07:49 -03:00 · 2021-05-26 10:07:49 -03:00 · 2e35cc66e7
commit 2e35cc66e7
parent d9d8cc36d4
103 changed files with 3877 additions and 2714 deletions
--- a/extern/re2/.github/bazel.sh
+++ b/extern/re2/.github/bazel.sh
@ -1,8 +1,6 @@
 #!/bin/bash
 set -eux

-cd git/re2
-
 bazel clean
 bazel build --compilation_mode=dbg -- //:all
 bazel test  --compilation_mode=dbg --test_output=errors -- //:all \
--- a/extern/re2/.github/cmake.sh
+++ b/extern/re2/.github/cmake.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+set -eux
+
+cmake -D CMAKE_BUILD_TYPE=Debug
+cmake --build . --config Debug --clean-first
+ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
+
+cmake -D CMAKE_BUILD_TYPE=Release
+cmake --build . --config Release --clean-first
+ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
+
+exit 0
--- a/extern/re2/.github/workflows/ci-bazel.yml
+++ b/extern/re2/.github/workflows/ci-bazel.yml
@ -0,0 +1,17 @@
+name: CI (Bazel)
+on:
+  push:
+    branches: [master]
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest, ubuntu-latest, windows-latest]
+    env:
+      BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v2
+      - run: .github/bazel.sh
+        shell: bash
--- a/extern/re2/.github/workflows/ci-cmake.yml
+++ b/extern/re2/.github/workflows/ci-cmake.yml
@ -0,0 +1,15 @@
+name: CI (CMake)
+on:
+  push:
+    branches: [master]
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest, ubuntu-latest, windows-latest]
+    steps:
+      - uses: actions/checkout@v2
+      - run: .github/cmake.sh
+        shell: bash
--- a/extern/re2/.github/workflows/ci.yml
+++ b/extern/re2/.github/workflows/ci.yml
@ -0,0 +1,51 @@
+name: CI
+on:
+  push:
+    branches: [master]
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest, ubuntu-latest]
+    env:
+      CC: clang
+      CXX: clang++
+    steps:
+      - uses: actions/checkout@v2
+      - run: make && make test
+        shell: bash
+  build-clang:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        tag: [9, 10, 11]
+    env:
+      CC: clang-${{ matrix.tag }}
+      CXX: clang++-${{ matrix.tag }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install Clang ${{ matrix.tag }}
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x ./llvm.sh
+          sudo ./llvm.sh ${{ matrix.tag }}
+        shell: bash
+      - run: make && make test
+        shell: bash
+  build-gcc:
+    runs-on: ubuntu-latest
+    container: gcc:${{ matrix.tag }}
+    strategy:
+      fail-fast: false
+      matrix:
+        tag: [4, 5, 6, 7, 8, 9, 10]
+    env:
+      CC: gcc
+      CXX: g++
+    steps:
+      - uses: actions/checkout@v2
+      - run: make && make test
+        shell: bash
--- a/extern/re2/.gitignore
+++ b/extern/re2/.gitignore
@ -3,4 +3,3 @@
 core
 obj/
 benchlog.*
-builds/
--- a/extern/re2/.travis.yml
+++ b/extern/re2/.travis.yml
@ -1,179 +0,0 @@
-language: cpp
-sudo: false
-dist: trusty
-script:
-  - make
-  - make test
-matrix:
-  include:
-
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-4.8
-      env:
-         - MATRIX_EVAL="CC=gcc-4.8 CXX=g++-4.8"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-4.9
-      env:
-         - MATRIX_EVAL="CC=gcc-4.9 CXX=g++-4.9"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-5
-      env:
-         - MATRIX_EVAL="CC=gcc-5 CXX=g++-5"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-6
-      env:
-        - MATRIX_EVAL="CC=gcc-6 CXX=g++-6"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-7
-      env:
-        - MATRIX_EVAL="CC=gcc-7 CXX=g++-7"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-8
-      env:
-        - MATRIX_EVAL="CC=gcc-8 CXX=g++-8"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-9
-      env:
-        - MATRIX_EVAL="CC=gcc-9 CXX=g++-9"
-
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - llvm-toolchain-precise-3.5
-          packages:
-            - clang-3.5
-      env:
-        - MATRIX_EVAL="CC=clang-3.5 CXX=clang++-3.5"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - llvm-toolchain-precise-3.6
-          packages:
-            - clang-3.6
-      env:
-        - MATRIX_EVAL="CC=clang-3.6 CXX=clang++-3.6"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - llvm-toolchain-precise-3.7
-          packages:
-            - clang-3.7
-      env:
-        - MATRIX_EVAL="CC=clang-3.7 CXX=clang++-3.7"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - llvm-toolchain-precise-3.8
-          packages:
-            - clang-3.8
-      env:
-        - MATRIX_EVAL="CC=clang-3.8 CXX=clang++-3.8"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - llvm-toolchain-precise-3.9
-          packages:
-            - clang-3.9
-      env:
-        - MATRIX_EVAL="CC=clang-3.9 CXX=clang++-3.9"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - llvm-toolchain-trusty-4.0
-          packages:
-            - clang-4.0
-      env:
-        - MATRIX_EVAL="CC=clang-4.0 CXX=clang++-4.0"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - llvm-toolchain-trusty-5.0
-          packages:
-            - clang-5.0
-      env:
-        - MATRIX_EVAL="CC=clang-5.0 CXX=clang++-5.0"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-6.0 main'
-              key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
-          packages:
-            - clang-6.0
-      env:
-        - MATRIX_EVAL="CC=clang-6.0 CXX=clang++-6.0"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main'
-              key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
-          packages:
-            - clang-7
-      env:
-        - MATRIX_EVAL="CC=clang-7 CXX=clang++-7"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-            - sourceline: 'deb https://apt.llvm.org/trusty/ llvm-toolchain-trusty-8 main'
-              key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
-          packages:
-            - clang-8
-      env:
-        - MATRIX_EVAL="CC=clang-8 CXX=clang++-8"
-
-before_install:
-  - eval "${MATRIX_EVAL}"
--- a/extern/re2/BUILD
+++ b/extern/re2/BUILD
@ -9,19 +9,21 @@ licenses(["notice"])
 exports_files(["LICENSE"])

 config_setting(
-    name = "darwin",
+    name = "macos",
    values = {"cpu": "darwin"},
 )

+config_setting(
+    name = "wasm",
+    values = {"cpu": "wasm32"},
+)
+
 config_setting(
    name = "windows",
    values = {"cpu": "x64_windows"},
 )

-config_setting(
-    name = "windows_msvc",
-    values = {"cpu": "x64_windows_msvc"},
-)
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")

 cc_library(
    name = "re2",
@ -36,6 +38,7 @@ cc_library(
        "re2/onepass.cc",
        "re2/parse.cc",
        "re2/perl_groups.cc",
+        "re2/pod_array.h",
        "re2/prefilter.cc",
        "re2/prefilter.h",
        "re2/prefilter_tree.cc",
@ -47,6 +50,8 @@ cc_library(
        "re2/regexp.h",
        "re2/set.cc",
        "re2/simplify.cc",
+        "re2/sparse_array.h",
+        "re2/sparse_set.h",
        "re2/stringpiece.cc",
        "re2/tostring.cc",
        "re2/unicode_casefold.cc",
@ -54,14 +59,10 @@ cc_library(
        "re2/unicode_groups.cc",
        "re2/unicode_groups.h",
        "re2/walker-inl.h",
-        "util/flags.h",
        "util/logging.h",
        "util/mix.h",
        "util/mutex.h",
-        "util/pod_array.h",
        "util/rune.cc",
-        "util/sparse_array.h",
-        "util/sparse_set.h",
        "util/strutil.cc",
        "util/strutil.h",
        "util/utf.h",
@ -74,17 +75,17 @@ cc_library(
        "re2/stringpiece.h",
    ],
    copts = select({
+        ":wasm": [],
        ":windows": [],
-        ":windows_msvc": [],
        "//conditions:default": ["-pthread"],
    }),
    linkopts = select({
-        # Darwin doesn't need `-pthread' when linking and it appears that
+        # macOS doesn't need `-pthread' when linking and it appears that
        # older versions of Clang will warn about the unused command line
        # argument, so just don't pass it.
-        ":darwin": [],
+        ":macos": [],
+        ":wasm": [],
        ":windows": [],
-        ":windows_msvc": [],
        "//conditions:default": ["-pthread"],
    }),
    visibility = ["//visibility:public"],
@ -109,6 +110,8 @@ cc_library(
        "re2/testing/string_generator.h",
        "re2/testing/tester.h",
        "util/benchmark.h",
+        "util/flags.h",
+        "util/malloc_counter.h",
        "util/pcre.h",
        "util/test.h",
    ],
@ -122,106 +125,144 @@ cc_library(
    deps = [":testing"],
 )

-load(":re2_test.bzl", "re2_test")
-
-re2_test(
-    "charclass_test",
+cc_test(
+    name = "charclass_test",
    size = "small",
+    srcs = ["re2/testing/charclass_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "compile_test",
+cc_test(
+    name = "compile_test",
    size = "small",
+    srcs = ["re2/testing/compile_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "filtered_re2_test",
+cc_test(
+    name = "filtered_re2_test",
    size = "small",
+    srcs = ["re2/testing/filtered_re2_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "mimics_pcre_test",
+cc_test(
+    name = "mimics_pcre_test",
    size = "small",
+    srcs = ["re2/testing/mimics_pcre_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "parse_test",
+cc_test(
+    name = "parse_test",
    size = "small",
+    srcs = ["re2/testing/parse_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "possible_match_test",
+cc_test(
+    name = "possible_match_test",
    size = "small",
+    srcs = ["re2/testing/possible_match_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "re2_arg_test",
+cc_test(
+    name = "re2_arg_test",
    size = "small",
+    srcs = ["re2/testing/re2_arg_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "re2_test",
+cc_test(
+    name = "re2_test",
    size = "small",
+    srcs = ["re2/testing/re2_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "regexp_test",
+cc_test(
+    name = "regexp_test",
    size = "small",
+    srcs = ["re2/testing/regexp_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "required_prefix_test",
+cc_test(
+    name = "required_prefix_test",
    size = "small",
+    srcs = ["re2/testing/required_prefix_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "search_test",
+cc_test(
+    name = "search_test",
    size = "small",
+    srcs = ["re2/testing/search_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "set_test",
+cc_test(
+    name = "set_test",
    size = "small",
+    srcs = ["re2/testing/set_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "simplify_test",
+cc_test(
+    name = "simplify_test",
    size = "small",
+    srcs = ["re2/testing/simplify_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "string_generator_test",
+cc_test(
+    name = "string_generator_test",
    size = "small",
+    srcs = ["re2/testing/string_generator_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "dfa_test",
+cc_test(
+    name = "dfa_test",
    size = "large",
+    srcs = ["re2/testing/dfa_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "exhaustive1_test",
+cc_test(
+    name = "exhaustive1_test",
    size = "large",
+    srcs = ["re2/testing/exhaustive1_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "exhaustive2_test",
+cc_test(
+    name = "exhaustive2_test",
    size = "large",
+    srcs = ["re2/testing/exhaustive2_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "exhaustive3_test",
+cc_test(
+    name = "exhaustive3_test",
    size = "large",
+    srcs = ["re2/testing/exhaustive3_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "exhaustive_test",
+cc_test(
+    name = "exhaustive_test",
    size = "large",
+    srcs = ["re2/testing/exhaustive_test.cc"],
+    deps = [":test"],
 )

-re2_test(
-    "random_test",
+cc_test(
+    name = "random_test",
    size = "large",
+    srcs = ["re2/testing/random_test.cc"],
+    deps = [":test"],
 )

 cc_library(
--- a/extern/re2/CMakeLists.txt
+++ b/extern/re2/CMakeLists.txt
@ -2,8 +2,8 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.

-# Old enough to support Ubuntu Trusty.
-cmake_minimum_required(VERSION 2.8.12)
+# Old enough to support Ubuntu Xenial.
+cmake_minimum_required(VERSION 3.5.1)

 if(POLICY CMP0048)
  cmake_policy(SET CMP0048 NEW)
@ -11,6 +11,12 @@ endif()

 project(RE2 CXX)
 include(CTest)
+include(GNUInstallDirs)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()

 option(BUILD_SHARED_LIBS "build shared libraries" OFF)
 option(USEPCRE "use PCRE in tests and benchmarks" OFF)
@ -19,6 +25,10 @@ option(USEPCRE "use PCRE in tests and benchmarks" OFF)
 # so we provide an option similar to BUILD_TESTING, but just for RE2.
 option(RE2_BUILD_TESTING "enable testing for RE2" ON)

+# ABI version
+# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
+set(SONAME 9)
+
 set(EXTRA_TARGET_LINK_LIBRARIES)

 if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
@ -27,7 +37,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
  endif()
  if(BUILD_SHARED_LIBS)
    # See http://www.kitware.com/blog/home/post/939 for details.
-    cmake_minimum_required(VERSION 3.4)
    set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
  endif()
  # CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX,
@ -36,13 +45,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
  # Without a byte order mark (BOM), Visual Studio assumes that the source
  # file is encoded using the current user code page, so we specify UTF-8.
  add_compile_options(/utf-8)
-  # allow multi-processor compilation
-  add_compile_options(/MP)
-elseif(CYGWIN OR MINGW)
-  # See https://stackoverflow.com/questions/38139631 for details.
-  add_compile_options(-std=gnu++11)
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
-  add_compile_options(-std=c++11)
 endif()

 if(WIN32)
@ -58,8 +60,6 @@ if(USEPCRE)
  list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre)
 endif()

-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-
 set(RE2_SOURCES
    re2/bitstate.cc
    re2/compile.cc
@ -86,6 +86,8 @@ set(RE2_SOURCES
    )

 add_library(re2 ${RE2_SOURCES})
+target_include_directories(re2 PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
+set_target_properties(re2 PROPERTIES SOVERSION ${SONAME} VERSION ${SONAME}.0.0)
 add_library(re2::re2 ALIAS re2)

 if(RE2_BUILD_TESTING)
@ -101,6 +103,7 @@ if(RE2_BUILD_TESTING)
      )

  add_library(testing STATIC ${TESTING_SOURCES})
+  target_link_libraries(testing PUBLIC re2)

  set(TEST_TARGETS
      charclass_test
@ -132,13 +135,13 @@ if(RE2_BUILD_TESTING)

  foreach(target ${TEST_TARGETS})
    add_executable(${target} re2/testing/${target}.cc util/test.cc)
-    target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES})
+    target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
    add_test(NAME ${target} COMMAND ${target})
  endforeach(target)

  foreach(target ${BENCHMARK_TARGETS})
    add_executable(${target} re2/testing/${target}.cc util/benchmark.cc)
-    target_link_libraries(${target} testing re2 ${EXTRA_TARGET_LINK_LIBRARIES})
+    target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
  endforeach(target)
 endif()

@ -149,6 +152,12 @@ set(RE2_HEADERS
    re2/stringpiece.h
    )

-install(FILES ${RE2_HEADERS} DESTINATION include/re2)
-install(TARGETS re2 EXPORT re2Config ARCHIVE DESTINATION lib LIBRARY DESTINATION lib RUNTIME DESTINATION bin INCLUDES DESTINATION include)
-install(EXPORT re2Config DESTINATION lib/cmake/re2 NAMESPACE re2::)
+install(FILES ${RE2_HEADERS}
+        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/re2)
+install(TARGETS re2 EXPORT re2Config
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+        INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+install(EXPORT re2Config
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/re2 NAMESPACE re2::)
--- a/extern/re2/Makefile
+++ b/extern/re2/Makefile
@ -44,7 +44,7 @@ endif

 # ABI version
 # http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
-SONAME=0
+SONAME=9

 # To rebuild the Tables generated by Perl and Python scripts (requires Internet
 # access for Unicode data), uncomment the following line:
@ -55,7 +55,7 @@ ifeq ($(shell uname),Darwin)
 SOEXT=dylib
 SOEXTVER=$(SONAME).$(SOEXT)
 SOEXTVER00=$(SONAME).0.0.$(SOEXT)
-MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS)
+MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-compatibility_version,$(SONAME),-current_version,$(SONAME).0.0,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS)
 else ifeq ($(shell uname),SunOS)
 SOEXT=so
 SOEXTVER=$(SOEXT).$(SONAME)
@ -68,6 +68,7 @@ SOEXTVER00=$(SOEXT).$(SONAME).0.0
 MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS)
 endif

+.PHONY: all
 all: obj/libre2.a obj/so/libre2.$(SOEXT)

 INSTALL_HFILES=\
@ -80,24 +81,25 @@ HFILES=\
 	util/benchmark.h\
 	util/flags.h\
 	util/logging.h\
+	util/malloc_counter.h\
 	util/mix.h\
 	util/mutex.h\
 	util/pcre.h\
-	util/pod_array.h\
-	util/sparse_array.h\
-	util/sparse_set.h\
 	util/strutil.h\
 	util/test.h\
 	util/utf.h\
 	util/util.h\
 	re2/bitmap256.h\
 	re2/filtered_re2.h\
+	re2/pod_array.h\
 	re2/prefilter.h\
 	re2/prefilter_tree.h\
 	re2/prog.h\
 	re2/re2.h\
 	re2/regexp.h\
 	re2/set.h\
+	re2/sparse_array.h\
+	re2/sparse_set.h\
 	re2/stringpiece.h\
 	re2/testing/exhaustive_tester.h\
 	re2/testing/regexp_generator.h\
@ -175,117 +177,156 @@ DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
 DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
 DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))

+.PRECIOUS: obj/%.o
 obj/%.o: %.cc $(HFILES)
 	@mkdir -p $$(dirname $@)
 	$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc

+.PRECIOUS: obj/dbg/%.o
 obj/dbg/%.o: %.cc $(HFILES)
 	@mkdir -p $$(dirname $@)
 	$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc

+.PRECIOUS: obj/so/%.o
 obj/so/%.o: %.cc $(HFILES)
 	@mkdir -p $$(dirname $@)
 	$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc

+.PRECIOUS: obj/libre2.a
 obj/libre2.a: $(OFILES)
 	@mkdir -p obj
 	$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)

+.PRECIOUS: obj/dbg/libre2.a
 obj/dbg/libre2.a: $(DOFILES)
 	@mkdir -p obj/dbg
 	$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)

-obj/so/libre2.$(SOEXT): $(SOFILES)
+.PRECIOUS: obj/so/libre2.$(SOEXT)
+obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin
 	@mkdir -p obj/so
 	$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)
 	ln -sf libre2.$(SOEXTVER) $@

+.PRECIOUS: obj/dbg/test/%
 obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
 	@mkdir -p obj/dbg/test
 	$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)

+.PRECIOUS: obj/test/%
 obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
 	@mkdir -p obj/test
 	$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)

 # Test the shared lib, falling back to the static lib for private symbols
+.PRECIOUS: obj/so/test/%
 obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
 	@mkdir -p obj/so/test
 	$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)

+# Filter out dump.o because testing::TempDir() isn't available for it.
 obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
 	@mkdir -p obj/test
-	$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+	$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(filter-out obj/re2/testing/dump.o, $(TESTOFILES)) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)

 # re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing
 # is simply a way to check that the target builds and then to run it against a
 # fixed set of inputs. To perform real fuzzing, refer to the documentation for
 # libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/).
+obj/test/re2_fuzzer: CXXFLAGS:=-I./re2/fuzzing/compiler-rt/include $(CXXFLAGS)
 obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o
 	@mkdir -p obj/test
 	$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)

 ifdef REBUILD_TABLES
+.PRECIOUS: re2/perl_groups.cc
 re2/perl_groups.cc: re2/make_perl_groups.pl
 	perl $< > $@

+.PRECIOUS: re2/unicode_%.cc
 re2/unicode_%.cc: re2/make_unicode_%.py
 	python $< > $@
-
-.PRECIOUS: re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
 endif

+.PHONY: distclean
 distclean: clean
 	rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc

+.PHONY: clean
 clean:
 	rm -rf obj
 	rm -f re2/*.pyc

+.PHONY: testofiles
 testofiles: $(TESTOFILES)

+.PHONY: test
 test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test

+.PHONY: debug-test
 debug-test: $(DTESTS)
 	@./runtests $(DTESTS)

+.PHONY: static-test
 static-test: $(TESTS)
 	@./runtests $(TESTS)

+.PHONY: shared-test
 shared-test: $(STESTS)
 	@./runtests -shared-library-path obj/so $(STESTS)

+.PHONY: debug-bigtest
 debug-bigtest: $(DTESTS) $(DBIGTESTS)
 	@./runtests $(DTESTS) $(DBIGTESTS)

+.PHONY: static-bigtest
 static-bigtest: $(TESTS) $(BIGTESTS)
 	@./runtests $(TESTS) $(BIGTESTS)

+.PHONY: shared-bigtest
 shared-bigtest: $(STESTS) $(SBIGTESTS)
 	@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)

+.PHONY: benchmark
 benchmark: obj/test/regexp_benchmark

+.PHONY: fuzz
 fuzz: obj/test/re2_fuzzer

-install: obj/libre2.a obj/so/libre2.$(SOEXT)
-	mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
-	$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
+.PHONY: install
+install: static-install shared-install
+
+.PHONY: static
+static: obj/libre2.a
+
+.PHONY: static-install
+static-install: obj/libre2.a common-install
 	$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
+
+.PHONY: shared
+shared: obj/so/libre2.$(SOEXT)
+
+.PHONY: shared-install
+shared-install: obj/so/libre2.$(SOEXT) common-install
 	$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
 	ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
 	ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
-	$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
-	$(SED_INPLACE) -e "s#@prefix@#${prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
-	$(SED_INPLACE) -e "s#@exec_prefix@#${exec_prefix}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
-	$(SED_INPLACE) -e "s#@includedir@#${includedir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
-	$(SED_INPLACE) -e "s#@libdir@#${libdir}#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc

+.PHONY: common-install
+common-install:
+	mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
+	$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
+	$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
+	$(SED_INPLACE) -e "s#@includedir@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
+	$(SED_INPLACE) -e "s#@libdir@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
+
+.PHONY: testinstall
 testinstall: static-testinstall shared-testinstall
 	@echo
 	@echo Install tests passed.
 	@echo

+.PHONY: static-testinstall
 static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
 static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
 static-testinstall:
@ -300,6 +341,7 @@ else
 	obj/testinstall
 endif

+.PHONY: shared-testinstall
 shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
 shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
 shared-testinstall:
@ -312,19 +354,14 @@ else
 	LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/testinstall
 endif

+.PHONY: benchlog
 benchlog: obj/test/regexp_benchmark
 	(echo '==BENCHMARK==' `hostname` `date`; \
 	  (uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
 	  echo; \
 	  ./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')

-# Keep gmake from deleting intermediate files it creates.
-# This makes repeated builds faster and preserves debug info on OS X.
-
-.PRECIOUS: obj/%.o obj/dbg/%.o obj/so/%.o obj/libre2.a \
-	obj/dbg/libre2.a obj/so/libre2.a \
-	obj/test/% obj/so/test/% obj/dbg/test/%
-
+.PHONY: log
 log:
 	$(MAKE) clean
 	$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
@ -340,6 +377,3 @@ log:
 	echo '#' RE2 basic search tests built by make $@ >re2-search.txt
 	echo '#' $$(date) >>re2-search.txt
 	obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt
-
-x: x.cc obj/libre2.a
-	g++ -I. -o x x.cc obj/libre2.a
--- a/extern/re2/README
+++ b/extern/re2/README
@ -27,12 +27,15 @@ under the BSD-style license found in the LICENSE file.

 RE2's native language is C++.

+The Python wrapper is at https://github.com/google/re2/tree/abseil/python
+and on PyPI (https://pypi.org/project/google-re2/).
+
 A C wrapper is at https://github.com/marcomaggi/cre2/.
 An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
 An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
 A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
 An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
 A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
-A Python wrapper is at https://github.com/facebook/pyre2/ and on PyPI (pypi.org).
 An R wrapper is at https://github.com/qinwf/re2r/ and on CRAN (cran.r-project.org).
 A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
+A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).
--- a/extern/re2/WORKSPACE
+++ b/extern/re2/WORKSPACE
@ -3,4 +3,13 @@
 # license that can be found in the LICENSE file.

 # Bazel (http://bazel.io/) WORKSPACE file for RE2.
+
 workspace(name = "com_googlesource_code_re2")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+http_archive(
+    name = "rules_cc",
+    strip_prefix = "rules_cc-master",
+    urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
+)
--- a/extern/re2/benchlog/benchplot.py
+++ b/extern/re2/benchlog/benchplot.py
--- a/extern/re2/benchlog/mktable
+++ b/extern/re2/benchlog/mktable
--- a/extern/re2/doc/mksyntaxgo
+++ b/extern/re2/doc/mksyntaxgo
@ -15,7 +15,7 @@ sam -d $out <<'!'
 ,s/\n\n\n+/\n\n/g
 ,x/(^.*	.*\n)+/ | awk -F'	' '{printf("  %-14s %s\n", $1, $2)}'
 1,2c
-// Copyright 2012 The Go Authors.  All rights reserved.
+// Copyright 2012 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

@ -33,6 +33,7 @@ Parts of the syntax can be disabled by passing alternate flags to Parse.

 .
 $a
+Unicode character classes are those in unicode.Categories and unicode.Scripts.
 */
 package syntax
 .
--- a/extern/re2/doc/mksyntaxhtml
+++ b/extern/re2/doc/mksyntaxhtml
--- a/extern/re2/doc/mksyntaxwiki
+++ b/extern/re2/doc/mksyntaxwiki
--- a/extern/re2/doc/syntax.html
+++ b/extern/re2/doc/syntax.html
@ -47,6 +47,10 @@
 <tr><td><code><font color=#808080>x{-n}</font></code></td><td>(≡ <code>x{n}?</code>)  <font size=-2>VIM</font></td></tr>
 <tr><td><code><font color=#808080>x=</font></code></td><td>(≡ <code>x?</code>)  <font size=-2>VIM</font></td></tr>
 <tr><td></td></tr>
+<tr><td colspan=2>Implementation restriction: The counting forms <code>x{n,m}</code>, <code>x{n,}</code>, and <code>x{n}</code></td></tr>
+<tr><td colspan=2>reject forms that create a minimum or maximum repetition count above 1000.</td></tr>
+<tr><td colspan=2>Unlimited repetitions are not subject to this restriction.</td></tr>
+<tr><td></td></tr>
 <tr><td colspan=2><b>Possessive repetitions:</b></td></tr>
 <tr><td><code><font color=#808080>x*+</font></code></td><td>zero or more <code>x</code>, possessive </td></tr>
 <tr><td><code><font color=#808080>x++</font></code></td><td>one or more <code>x</code>, possessive </td></tr>
@ -56,10 +60,10 @@
 <tr><td><code><font color=#808080>x{n}+</font></code></td><td>exactly <code>n</code> <code>x</code>, possessive </td></tr>
 <tr><td></td></tr>
 <tr><td colspan=2><b>Grouping:</b></td></tr>
-<tr><td><code>(re)</code></td><td>numbered capturing group</td></tr>
-<tr><td><code>(?P&lt;name&gt;re)</code></td><td>named &amp; numbered capturing group</td></tr>
-<tr><td><code><font color=#808080>(?&lt;name&gt;re)</font></code></td><td>named &amp; numbered capturing group </td></tr>
-<tr><td><code><font color=#808080>(?'name're)</font></code></td><td>named &amp; numbered capturing group </td></tr>
+<tr><td><code>(re)</code></td><td>numbered capturing group (submatch)</td></tr>
+<tr><td><code>(?P&lt;name&gt;re)</code></td><td>named &amp; numbered capturing group (submatch)</td></tr>
+<tr><td><code><font color=#808080>(?&lt;name&gt;re)</font></code></td><td>named &amp; numbered capturing group (submatch) </td></tr>
+<tr><td><code><font color=#808080>(?'name're)</font></code></td><td>named &amp; numbered capturing group (submatch) </td></tr>
 <tr><td><code>(?:re)</code></td><td>non-capturing group</td></tr>
 <tr><td><code>(?flags)</code></td><td>set flags within current group; non-capturing</td></tr>
 <tr><td><code>(?flags:re)</code></td><td>set flags during re; non-capturing</td></tr>
@ -80,8 +84,8 @@
 <tr><td><code>^</code></td><td>at beginning of text or line (<code>m</code>=true)</td></tr>
 <tr><td><code>$</code></td><td>at end of text (like <code>\z</code> not <code>\Z</code>) or line (<code>m</code>=true)</td></tr>
 <tr><td><code>\A</code></td><td>at beginning of text</td></tr>
-<tr><td><code>\b</code></td><td>at word boundary (<code>\w</code> on one side and <code>\W</code>, <code>\A</code>, or <code>\z</code> on the other)</td></tr>
-<tr><td><code>\B</code></td><td>not a word boundary</td></tr>
+<tr><td><code>\b</code></td><td>at ASCII word boundary (<code>\w</code> on one side and <code>\W</code>, <code>\A</code>, or <code>\z</code> on the other)</td></tr>
+<tr><td><code>\B</code></td><td>not at ASCII word boundary</td></tr>
 <tr><td><code><font color=#808080>\G</font></code></td><td>at beginning of subtext being searched  <font size=-2>PCRE</font></td></tr>
 <tr><td><code><font color=#808080>\G</font></code></td><td>at end of last match  <font size=-2>PERL</font></td></tr>
 <tr><td><code><font color=#808080>\Z</font></code></td><td>at end of text, or before newline at end of text </td></tr>
@ -166,7 +170,7 @@
 <tr><td><code>[\p{Name}]</code></td><td>named Unicode property inside character class (≡ <code>\p{Name}</code>)</td></tr>
 <tr><td><code>[^\p{Name}]</code></td><td>named Unicode property inside negated character class (≡ <code>\P{Name}</code>)</td></tr>
 <tr><td></td></tr>
-<tr><td colspan=2><b>Perl character classes:</b></td></tr>
+<tr><td colspan=2><b>Perl character classes (all ASCII-only):</b></td></tr>
 <tr><td><code>\d</code></td><td>digits (≡ <code>[0-9]</code>)</td></tr>
 <tr><td><code>\D</code></td><td>not digits (≡ <code>[^0-9]</code>)</td></tr>
 <tr><td><code>\s</code></td><td>whitespace (≡ <code>[\t\n\f\r ]</code>)</td></tr>
@ -237,105 +241,162 @@
 <tr><td><code>Zs</code></td><td>space separator</td></tr>
 <tr><td></td></tr>
 <tr><td colspan=2><b>Unicode character class names--scripts:</b></td></tr>
-<tr><td><code>Arabic</code></td><td>Arabic</td></tr>
-<tr><td><code>Armenian</code></td><td>Armenian</td></tr>
-<tr><td><code>Balinese</code></td><td>Balinese</td></tr>
-<tr><td><code>Bamum</code></td><td>Bamum</td></tr>
-<tr><td><code>Batak</code></td><td>Batak</td></tr>
-<tr><td><code>Bengali</code></td><td>Bengali</td></tr>
-<tr><td><code>Bopomofo</code></td><td>Bopomofo</td></tr>
-<tr><td><code>Brahmi</code></td><td>Brahmi</td></tr>
-<tr><td><code>Braille</code></td><td>Braille</td></tr>
-<tr><td><code>Buginese</code></td><td>Buginese</td></tr>
-<tr><td><code>Buhid</code></td><td>Buhid</td></tr>
-<tr><td><code>Canadian_Aboriginal</code></td><td>Canadian Aboriginal</td></tr>
-<tr><td><code>Carian</code></td><td>Carian</td></tr>
-<tr><td><code>Chakma</code></td><td>Chakma</td></tr>
-<tr><td><code>Cham</code></td><td>Cham</td></tr>
-<tr><td><code>Cherokee</code></td><td>Cherokee</td></tr>
-<tr><td><code>Common</code></td><td>characters not specific to one script</td></tr>
-<tr><td><code>Coptic</code></td><td>Coptic</td></tr>
-<tr><td><code>Cuneiform</code></td><td>Cuneiform</td></tr>
-<tr><td><code>Cypriot</code></td><td>Cypriot</td></tr>
-<tr><td><code>Cyrillic</code></td><td>Cyrillic</td></tr>
-<tr><td><code>Deseret</code></td><td>Deseret</td></tr>
-<tr><td><code>Devanagari</code></td><td>Devanagari</td></tr>
-<tr><td><code>Egyptian_Hieroglyphs</code></td><td>Egyptian Hieroglyphs</td></tr>
-<tr><td><code>Ethiopic</code></td><td>Ethiopic</td></tr>
-<tr><td><code>Georgian</code></td><td>Georgian</td></tr>
-<tr><td><code>Glagolitic</code></td><td>Glagolitic</td></tr>
-<tr><td><code>Gothic</code></td><td>Gothic</td></tr>
-<tr><td><code>Greek</code></td><td>Greek</td></tr>
-<tr><td><code>Gujarati</code></td><td>Gujarati</td></tr>
-<tr><td><code>Gurmukhi</code></td><td>Gurmukhi</td></tr>
-<tr><td><code>Han</code></td><td>Han</td></tr>
-<tr><td><code>Hangul</code></td><td>Hangul</td></tr>
-<tr><td><code>Hanunoo</code></td><td>Hanunoo</td></tr>
-<tr><td><code>Hebrew</code></td><td>Hebrew</td></tr>
-<tr><td><code>Hiragana</code></td><td>Hiragana</td></tr>
-<tr><td><code>Imperial_Aramaic</code></td><td>Imperial Aramaic</td></tr>
-<tr><td><code>Inherited</code></td><td>inherit script from previous character</td></tr>
-<tr><td><code>Inscriptional_Pahlavi</code></td><td>Inscriptional Pahlavi</td></tr>
-<tr><td><code>Inscriptional_Parthian</code></td><td>Inscriptional Parthian</td></tr>
-<tr><td><code>Javanese</code></td><td>Javanese</td></tr>
-<tr><td><code>Kaithi</code></td><td>Kaithi</td></tr>
-<tr><td><code>Kannada</code></td><td>Kannada</td></tr>
-<tr><td><code>Katakana</code></td><td>Katakana</td></tr>
-<tr><td><code>Kayah_Li</code></td><td>Kayah Li</td></tr>
-<tr><td><code>Kharoshthi</code></td><td>Kharoshthi</td></tr>
-<tr><td><code>Khmer</code></td><td>Khmer</td></tr>
-<tr><td><code>Lao</code></td><td>Lao</td></tr>
-<tr><td><code>Latin</code></td><td>Latin</td></tr>
-<tr><td><code>Lepcha</code></td><td>Lepcha</td></tr>
-<tr><td><code>Limbu</code></td><td>Limbu</td></tr>
-<tr><td><code>Linear_B</code></td><td>Linear B</td></tr>
-<tr><td><code>Lycian</code></td><td>Lycian</td></tr>
-<tr><td><code>Lydian</code></td><td>Lydian</td></tr>
-<tr><td><code>Malayalam</code></td><td>Malayalam</td></tr>
-<tr><td><code>Mandaic</code></td><td>Mandaic</td></tr>
-<tr><td><code>Meetei_Mayek</code></td><td>Meetei Mayek</td></tr>
-<tr><td><code>Meroitic_Cursive</code></td><td>Meroitic Cursive</td></tr>
-<tr><td><code>Meroitic_Hieroglyphs</code></td><td>Meroitic Hieroglyphs</td></tr>
-<tr><td><code>Miao</code></td><td>Miao</td></tr>
-<tr><td><code>Mongolian</code></td><td>Mongolian</td></tr>
-<tr><td><code>Myanmar</code></td><td>Myanmar</td></tr>
-<tr><td><code>New_Tai_Lue</code></td><td>New Tai Lue (aka Simplified Tai Lue)</td></tr>
-<tr><td><code>Nko</code></td><td>Nko</td></tr>
-<tr><td><code>Ogham</code></td><td>Ogham</td></tr>
-<tr><td><code>Ol_Chiki</code></td><td>Ol Chiki</td></tr>
-<tr><td><code>Old_Italic</code></td><td>Old Italic</td></tr>
-<tr><td><code>Old_Persian</code></td><td>Old Persian</td></tr>
-<tr><td><code>Old_South_Arabian</code></td><td>Old South Arabian</td></tr>
-<tr><td><code>Old_Turkic</code></td><td>Old Turkic</td></tr>
-<tr><td><code>Oriya</code></td><td>Oriya</td></tr>
-<tr><td><code>Osmanya</code></td><td>Osmanya</td></tr>
-<tr><td><code>Phags_Pa</code></td><td>'Phags Pa</td></tr>
-<tr><td><code>Phoenician</code></td><td>Phoenician</td></tr>
-<tr><td><code>Rejang</code></td><td>Rejang</td></tr>
-<tr><td><code>Runic</code></td><td>Runic</td></tr>
-<tr><td><code>Saurashtra</code></td><td>Saurashtra</td></tr>
-<tr><td><code>Sharada</code></td><td>Sharada</td></tr>
-<tr><td><code>Shavian</code></td><td>Shavian</td></tr>
-<tr><td><code>Sinhala</code></td><td>Sinhala</td></tr>
-<tr><td><code>Sora_Sompeng</code></td><td>Sora Sompeng</td></tr>
-<tr><td><code>Sundanese</code></td><td>Sundanese</td></tr>
-<tr><td><code>Syloti_Nagri</code></td><td>Syloti Nagri</td></tr>
-<tr><td><code>Syriac</code></td><td>Syriac</td></tr>
-<tr><td><code>Tagalog</code></td><td>Tagalog</td></tr>
-<tr><td><code>Tagbanwa</code></td><td>Tagbanwa</td></tr>
-<tr><td><code>Tai_Le</code></td><td>Tai Le</td></tr>
-<tr><td><code>Tai_Tham</code></td><td>Tai Tham</td></tr>
-<tr><td><code>Tai_Viet</code></td><td>Tai Viet</td></tr>
-<tr><td><code>Takri</code></td><td>Takri</td></tr>
-<tr><td><code>Tamil</code></td><td>Tamil</td></tr>
-<tr><td><code>Telugu</code></td><td>Telugu</td></tr>
-<tr><td><code>Thaana</code></td><td>Thaana</td></tr>
-<tr><td><code>Thai</code></td><td>Thai</td></tr>
-<tr><td><code>Tibetan</code></td><td>Tibetan</td></tr>
-<tr><td><code>Tifinagh</code></td><td>Tifinagh</td></tr>
-<tr><td><code>Ugaritic</code></td><td>Ugaritic</td></tr>
-<tr><td><code>Vai</code></td><td>Vai</td></tr>
-<tr><td><code>Yi</code></td><td>Yi</td></tr>
+<tr><td colspan=2>Adlam</td></tr>
+<tr><td colspan=2>Ahom</td></tr>
+<tr><td colspan=2>Anatolian_Hieroglyphs</td></tr>
+<tr><td colspan=2>Arabic</td></tr>
+<tr><td colspan=2>Armenian</td></tr>
+<tr><td colspan=2>Avestan</td></tr>
+<tr><td colspan=2>Balinese</td></tr>
+<tr><td colspan=2>Bamum</td></tr>
+<tr><td colspan=2>Bassa_Vah</td></tr>
+<tr><td colspan=2>Batak</td></tr>
+<tr><td colspan=2>Bengali</td></tr>
+<tr><td colspan=2>Bhaiksuki</td></tr>
+<tr><td colspan=2>Bopomofo</td></tr>
+<tr><td colspan=2>Brahmi</td></tr>
+<tr><td colspan=2>Braille</td></tr>
+<tr><td colspan=2>Buginese</td></tr>
+<tr><td colspan=2>Buhid</td></tr>
+<tr><td colspan=2>Canadian_Aboriginal</td></tr>
+<tr><td colspan=2>Carian</td></tr>
+<tr><td colspan=2>Caucasian_Albanian</td></tr>
+<tr><td colspan=2>Chakma</td></tr>
+<tr><td colspan=2>Cham</td></tr>
+<tr><td colspan=2>Cherokee</td></tr>
+<tr><td colspan=2>Chorasmian</td></tr>
+<tr><td colspan=2>Common</td></tr>
+<tr><td colspan=2>Coptic</td></tr>
+<tr><td colspan=2>Cuneiform</td></tr>
+<tr><td colspan=2>Cypriot</td></tr>
+<tr><td colspan=2>Cyrillic</td></tr>
+<tr><td colspan=2>Deseret</td></tr>
+<tr><td colspan=2>Devanagari</td></tr>
+<tr><td colspan=2>Dives_Akuru</td></tr>
+<tr><td colspan=2>Dogra</td></tr>
+<tr><td colspan=2>Duployan</td></tr>
+<tr><td colspan=2>Egyptian_Hieroglyphs</td></tr>
+<tr><td colspan=2>Elbasan</td></tr>
+<tr><td colspan=2>Elymaic</td></tr>
+<tr><td colspan=2>Ethiopic</td></tr>
+<tr><td colspan=2>Georgian</td></tr>
+<tr><td colspan=2>Glagolitic</td></tr>
+<tr><td colspan=2>Gothic</td></tr>
+<tr><td colspan=2>Grantha</td></tr>
+<tr><td colspan=2>Greek</td></tr>
+<tr><td colspan=2>Gujarati</td></tr>
+<tr><td colspan=2>Gunjala_Gondi</td></tr>
+<tr><td colspan=2>Gurmukhi</td></tr>
+<tr><td colspan=2>Han</td></tr>
+<tr><td colspan=2>Hangul</td></tr>
+<tr><td colspan=2>Hanifi_Rohingya</td></tr>
+<tr><td colspan=2>Hanunoo</td></tr>
+<tr><td colspan=2>Hatran</td></tr>
+<tr><td colspan=2>Hebrew</td></tr>
+<tr><td colspan=2>Hiragana</td></tr>
+<tr><td colspan=2>Imperial_Aramaic</td></tr>
+<tr><td colspan=2>Inherited</td></tr>
+<tr><td colspan=2>Inscriptional_Pahlavi</td></tr>
+<tr><td colspan=2>Inscriptional_Parthian</td></tr>
+<tr><td colspan=2>Javanese</td></tr>
+<tr><td colspan=2>Kaithi</td></tr>
+<tr><td colspan=2>Kannada</td></tr>
+<tr><td colspan=2>Katakana</td></tr>
+<tr><td colspan=2>Kayah_Li</td></tr>
+<tr><td colspan=2>Kharoshthi</td></tr>
+<tr><td colspan=2>Khitan_Small_Script</td></tr>
+<tr><td colspan=2>Khmer</td></tr>
+<tr><td colspan=2>Khojki</td></tr>
+<tr><td colspan=2>Khudawadi</td></tr>
+<tr><td colspan=2>Lao</td></tr>
+<tr><td colspan=2>Latin</td></tr>
+<tr><td colspan=2>Lepcha</td></tr>
+<tr><td colspan=2>Limbu</td></tr>
+<tr><td colspan=2>Linear_A</td></tr>
+<tr><td colspan=2>Linear_B</td></tr>
+<tr><td colspan=2>Lisu</td></tr>
+<tr><td colspan=2>Lycian</td></tr>
+<tr><td colspan=2>Lydian</td></tr>
+<tr><td colspan=2>Mahajani</td></tr>
+<tr><td colspan=2>Makasar</td></tr>
+<tr><td colspan=2>Malayalam</td></tr>
+<tr><td colspan=2>Mandaic</td></tr>
+<tr><td colspan=2>Manichaean</td></tr>
+<tr><td colspan=2>Marchen</td></tr>
+<tr><td colspan=2>Masaram_Gondi</td></tr>
+<tr><td colspan=2>Medefaidrin</td></tr>
+<tr><td colspan=2>Meetei_Mayek</td></tr>
+<tr><td colspan=2>Mende_Kikakui</td></tr>
+<tr><td colspan=2>Meroitic_Cursive</td></tr>
+<tr><td colspan=2>Meroitic_Hieroglyphs</td></tr>
+<tr><td colspan=2>Miao</td></tr>
+<tr><td colspan=2>Modi</td></tr>
+<tr><td colspan=2>Mongolian</td></tr>
+<tr><td colspan=2>Mro</td></tr>
+<tr><td colspan=2>Multani</td></tr>
+<tr><td colspan=2>Myanmar</td></tr>
+<tr><td colspan=2>Nabataean</td></tr>
+<tr><td colspan=2>Nandinagari</td></tr>
+<tr><td colspan=2>New_Tai_Lue</td></tr>
+<tr><td colspan=2>Newa</td></tr>
+<tr><td colspan=2>Nko</td></tr>
+<tr><td colspan=2>Nushu</td></tr>
+<tr><td colspan=2>Nyiakeng_Puachue_Hmong</td></tr>
+<tr><td colspan=2>Ogham</td></tr>
+<tr><td colspan=2>Ol_Chiki</td></tr>
+<tr><td colspan=2>Old_Hungarian</td></tr>
+<tr><td colspan=2>Old_Italic</td></tr>
+<tr><td colspan=2>Old_North_Arabian</td></tr>
+<tr><td colspan=2>Old_Permic</td></tr>
+<tr><td colspan=2>Old_Persian</td></tr>
+<tr><td colspan=2>Old_Sogdian</td></tr>
+<tr><td colspan=2>Old_South_Arabian</td></tr>
+<tr><td colspan=2>Old_Turkic</td></tr>
+<tr><td colspan=2>Oriya</td></tr>
+<tr><td colspan=2>Osage</td></tr>
+<tr><td colspan=2>Osmanya</td></tr>
+<tr><td colspan=2>Pahawh_Hmong</td></tr>
+<tr><td colspan=2>Palmyrene</td></tr>
+<tr><td colspan=2>Pau_Cin_Hau</td></tr>
+<tr><td colspan=2>Phags_Pa</td></tr>
+<tr><td colspan=2>Phoenician</td></tr>
+<tr><td colspan=2>Psalter_Pahlavi</td></tr>
+<tr><td colspan=2>Rejang</td></tr>
+<tr><td colspan=2>Runic</td></tr>
+<tr><td colspan=2>Samaritan</td></tr>
+<tr><td colspan=2>Saurashtra</td></tr>
+<tr><td colspan=2>Sharada</td></tr>
+<tr><td colspan=2>Shavian</td></tr>
+<tr><td colspan=2>Siddham</td></tr>
+<tr><td colspan=2>SignWriting</td></tr>
+<tr><td colspan=2>Sinhala</td></tr>
+<tr><td colspan=2>Sogdian</td></tr>
+<tr><td colspan=2>Sora_Sompeng</td></tr>
+<tr><td colspan=2>Soyombo</td></tr>
+<tr><td colspan=2>Sundanese</td></tr>
+<tr><td colspan=2>Syloti_Nagri</td></tr>
+<tr><td colspan=2>Syriac</td></tr>
+<tr><td colspan=2>Tagalog</td></tr>
+<tr><td colspan=2>Tagbanwa</td></tr>
+<tr><td colspan=2>Tai_Le</td></tr>
+<tr><td colspan=2>Tai_Tham</td></tr>
+<tr><td colspan=2>Tai_Viet</td></tr>
+<tr><td colspan=2>Takri</td></tr>
+<tr><td colspan=2>Tamil</td></tr>
+<tr><td colspan=2>Tangut</td></tr>
+<tr><td colspan=2>Telugu</td></tr>
+<tr><td colspan=2>Thaana</td></tr>
+<tr><td colspan=2>Thai</td></tr>
+<tr><td colspan=2>Tibetan</td></tr>
+<tr><td colspan=2>Tifinagh</td></tr>
+<tr><td colspan=2>Tirhuta</td></tr>
+<tr><td colspan=2>Ugaritic</td></tr>
+<tr><td colspan=2>Vai</td></tr>
+<tr><td colspan=2>Wancho</td></tr>
+<tr><td colspan=2>Warang_Citi</td></tr>
+<tr><td colspan=2>Yezidi</td></tr>
+<tr><td colspan=2>Yi</td></tr>
+<tr><td colspan=2>Zanabazar_Square</td></tr>
 <tr><td></td></tr>
 <tr><td colspan=2><b>Vim character classes:</b></td></tr>
 <tr><td><code><font color=#808080>\i</font></code></td><td>identifier character  <font size=-2>VIM</font></td></tr>
--- a/extern/re2/doc/syntax.txt
+++ b/extern/re2/doc/syntax.txt
@ -253,6 +253,7 @@ Caucasian_Albanian
 Chakma
 Cham
 Cherokee
+Chorasmian
 Common
 Coptic
 Cuneiform
@ -260,6 +261,7 @@ Cypriot
 Cyrillic
 Deseret
 Devanagari
+Dives_Akuru
 Dogra
 Duployan
 Egyptian_Hieroglyphs
@ -291,6 +293,7 @@ Kannada
 Katakana
 Kayah_Li
 Kharoshthi
+Khitan_Small_Script
 Khmer
 Khojki
 Khudawadi
@ -380,6 +383,7 @@ Ugaritic
 Vai
 Wancho
 Warang_Citi
+Yezidi
 Yi
 Zanabazar_Square

--- a/extern/re2/kokoro/cmake.sh
+++ b/extern/re2/kokoro/cmake.sh
@ -1,25 +0,0 @@
-#!/bin/bash
-set -eux
-
-cd git/re2
-
-case "${KOKORO_JOB_NAME}" in
-  */windows-*)
-    CMAKE_G_A_FLAGS=('-G' 'Visual Studio 14 2015' '-A' 'x64')
-    ;;
-  *)
-    CMAKE_G_A_FLAGS=()
-    # Work around a bug in older versions of bash. :/
-    set +u
-    ;;
-esac
-
-cmake -D CMAKE_BUILD_TYPE=Debug "${CMAKE_G_A_FLAGS[@]}" .
-cmake --build . --config Debug --clean-first
-ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
-
-cmake -D CMAKE_BUILD_TYPE=Release "${CMAKE_G_A_FLAGS[@]}" .
-cmake --build . --config Release --clean-first
-ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
-
-exit 0
--- a/extern/re2/kokoro/macos-bazel.cfg
+++ b/extern/re2/kokoro/macos-bazel.cfg
@ -1 +0,0 @@
-build_file: "re2/kokoro/macos-bazel.sh"
--- a/extern/re2/kokoro/macos-bazel.sh
+++ b/extern/re2/kokoro/macos-bazel.sh
@ -1,4 +0,0 @@
-#!/bin/bash
-set -eux
-bash git/re2/kokoro/bazel.sh
-exit $?
--- a/extern/re2/kokoro/macos-cmake.cfg
+++ b/extern/re2/kokoro/macos-cmake.cfg
@ -1 +0,0 @@
-build_file: "re2/kokoro/macos-cmake.sh"
--- a/extern/re2/kokoro/macos-cmake.sh
+++ b/extern/re2/kokoro/macos-cmake.sh
@ -1,4 +0,0 @@
-#!/bin/bash
-set -eux
-bash git/re2/kokoro/cmake.sh
-exit $?
--- a/extern/re2/kokoro/ubuntu-bazel.cfg
+++ b/extern/re2/kokoro/ubuntu-bazel.cfg
@ -1 +0,0 @@
-build_file: "re2/kokoro/ubuntu-bazel.sh"
--- a/extern/re2/kokoro/ubuntu-bazel.sh
+++ b/extern/re2/kokoro/ubuntu-bazel.sh
@ -1,4 +0,0 @@
-#!/bin/bash
-set -eux
-bash git/re2/kokoro/bazel.sh
-exit $?
--- a/extern/re2/kokoro/windows-bazel.bat
+++ b/extern/re2/kokoro/windows-bazel.bat
@ -1,2 +0,0 @@
-bash git/re2/kokoro/bazel.sh
-EXIT /B %ERRORLEVEL%
--- a/extern/re2/kokoro/windows-bazel.cfg
+++ b/extern/re2/kokoro/windows-bazel.cfg
@ -1 +0,0 @@
-build_file: "re2/kokoro/windows-bazel.bat"
--- a/extern/re2/kokoro/windows-cmake.bat
+++ b/extern/re2/kokoro/windows-cmake.bat
@ -1,2 +0,0 @@
-bash git/re2/kokoro/cmake.sh
-EXIT /B %ERRORLEVEL%
--- a/extern/re2/kokoro/windows-cmake.cfg
+++ b/extern/re2/kokoro/windows-cmake.cfg
@ -1 +0,0 @@
-build_file: "re2/kokoro/windows-cmake.bat"
--- a/extern/re2/lib/git/commit-msg.hook
+++ b/extern/re2/lib/git/commit-msg.hook
--- a/extern/re2/libre2.symbols
+++ b/extern/re2/libre2.symbols
@ -11,6 +11,9 @@
 		# re2::FilteredRE2*
 		_ZN3re211FilteredRE2*;
 		_ZNK3re211FilteredRE2*;
+		# re2::re2_internal*
+		_ZN3re212re2_internal*;
+		_ZNK3re212re2_internal*;
 	local:
 		*;
 };
--- a/extern/re2/libre2.symbols.darwin
+++ b/extern/re2/libre2.symbols.darwin
@ -10,3 +10,6 @@ __ZN3re2ls*
 # re2::FilteredRE2*
 __ZN3re211FilteredRE2*
 __ZNK3re211FilteredRE2*
+# re2::re2_internal*
+__ZN3re212re2_internal*
+__ZNK3re212re2_internal*
--- a/extern/re2/re2.pc
+++ b/extern/re2/re2.pc
@ -1,5 +1,3 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
 includedir=@includedir@
 libdir=@libdir@

--- a/extern/re2/re2/bitmap256.h
+++ b/extern/re2/re2/bitmap256.h
@ -32,7 +32,7 @@ class Bitmap256 {
    DCHECK_GE(c, 0);
    DCHECK_LE(c, 255);

-    return (words_[c / 64] & (1ULL << (c % 64))) != 0;
+    return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
  }

  // Sets the bit with index c.
@ -40,7 +40,7 @@ class Bitmap256 {
    DCHECK_GE(c, 0);
    DCHECK_LE(c, 255);

-    words_[c / 64] |= (1ULL << (c % 64));
+    words_[c / 64] |= (uint64_t{1} << (c % 64));
  }

  // Finds the next non-zero bit with index >= c.
@ -51,7 +51,6 @@ class Bitmap256 {
  // Finds the least significant non-zero bit in n.
  static int FindLSBSet(uint64_t n) {
    DCHECK_NE(n, 0);
-
 #if defined(__GNUC__)
    return __builtin_ctzll(n);
 #elif defined(_MSC_VER) && defined(_M_X64)
@ -89,7 +88,7 @@ int Bitmap256::FindNextSetBit(int c) const {

  // Check the word that contains the bit. Mask out any lower bits.
  int i = c / 64;
-  uint64_t word = words_[i] & (~0ULL << (c % 64));
+  uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
  if (word != 0)
    return (i * 64) + FindLSBSet(word);

--- a/extern/re2/re2/bitstate.cc
+++ b/extern/re2/re2/bitstate.cc
@ -7,7 +7,7 @@
 // Prog::SearchBitState is a regular expression search with submatch
 // tracking for small regular expressions and texts.  Similarly to
 // testing/backtrack.cc, it allocates a bitmap with (count of
-// lists) * (length of prog) bits to make sure it never explores the
+// lists) * (length of text) bits to make sure it never explores the
 // same (instruction list, character position) multiple times.  This
 // limits the search to run in time linear in the length of the text.
 //
@ -24,7 +24,7 @@
 #include <utility>

 #include "util/logging.h"
-#include "util/pod_array.h"
+#include "re2/pod_array.h"
 #include "re2/prog.h"
 #include "re2/regexp.h"

@ -63,11 +63,14 @@ class BitState {
  int nsubmatch_;           //   # of submatches to fill in

  // Search state
-  static const int VisitedBits = 32;
-  PODArray<uint32_t> visited_;  // bitmap: (list ID, char*) pairs visited
+  static constexpr int kVisitedBits = 64;
+  PODArray<uint64_t> visited_;  // bitmap: (list ID, char*) pairs visited
  PODArray<const char*> cap_;   // capture registers
  PODArray<Job> job_;           // stack of text positions to explore
  int njob_;                    // stack size
+
+  BitState(const BitState&) = delete;
+  BitState& operator=(const BitState&) = delete;
 };

 BitState::BitState(Prog* prog)
@ -86,10 +89,10 @@ BitState::BitState(Prog* prog)
 // we don't repeat the visit.
 bool BitState::ShouldVisit(int id, const char* p) {
  int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
-          static_cast<int>(p-text_.begin());
-  if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1))))
+          static_cast<int>(p-text_.data());
+  if (visited_[n/kVisitedBits] & (uint64_t{1} << (n & (kVisitedBits-1))))
    return false;
-  visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1));
+  visited_[n/kVisitedBits] |= uint64_t{1} << (n & (kVisitedBits-1));
  return true;
 }

@ -134,7 +137,7 @@ void BitState::Push(int id, const char* p) {
 // Return whether it succeeded.
 bool BitState::TrySearch(int id0, const char* p0) {
  bool matched = false;
-  const char* end = text_.end();
+  const char* end = text_.data() + text_.size();
  njob_ = 0;
  // Push() no longer checks ShouldVisit(),
  // so we must perform the check ourselves.
@ -251,7 +254,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
        matched = true;
        cap_[1] = p;
        if (submatch_[0].data() == NULL ||
-            (longest_ && p > submatch_[0].end())) {
+            (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
          for (int i = 0; i < nsubmatch_; i++)
            submatch_[i] =
                StringPiece(cap_[2 * i],
@ -288,7 +291,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
  // Search parameters.
  text_ = text;
  context_ = context;
-  if (context_.begin() == NULL)
+  if (context_.data() == NULL)
    context_ = text;
  if (prog_->anchor_start() && context_.begin() != text.begin())
    return false;
@ -304,8 +307,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,

  // Allocate scratch space.
  int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
-  nvisited = (nvisited + VisitedBits-1) / VisitedBits;
-  visited_ = PODArray<uint32_t>(nvisited);
+  nvisited = (nvisited + kVisitedBits-1) / kVisitedBits;
+  visited_ = PODArray<uint64_t>(nvisited);
  memset(visited_.data(), 0, nvisited*sizeof visited_[0]);

  int ncap = 2*nsubmatch;
@ -319,8 +322,8 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,

  // Anchored search must start at text.begin().
  if (anchored_) {
-    cap_[0] = text.begin();
-    return TrySearch(prog_->start(), text.begin());
+    cap_[0] = text.data();
+    return TrySearch(prog_->start(), text.data());
  }

  // Unanchored search, starting from each possible text position.
@ -329,18 +332,22 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
  // This looks like it's quadratic in the size of the text,
  // but we are not clearing visited_ between calls to TrySearch,
  // so no work is duplicated and it ends up still being linear.
-  for (const char* p = text.begin(); p <= text.end(); p++) {
-    // Try to use memchr to find the first byte quickly.
-    int fb = prog_->first_byte();
-    if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
-      p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
+  const char* etext = text.data() + text.size();
+  for (const char* p = text.data(); p <= etext; p++) {
+    // Try to use prefix accel (e.g. memchr) to skip ahead.
+    if (p < etext && prog_->can_prefix_accel()) {
+      p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
      if (p == NULL)
-        p = text.end();
+        p = etext;
    }

    cap_[0] = p;
    if (TrySearch(prog_->start(), p))  // Match must be leftmost; done.
      return true;
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    if (p == NULL)
+      break;
  }
  return false;
 }
--- a/extern/re2/re2/compile.cc
+++ b/extern/re2/re2/compile.cc
@ -14,8 +14,8 @@
 #include <utility>

 #include "util/logging.h"
-#include "util/pod_array.h"
 #include "util/utf.h"
+#include "re2/pod_array.h"
 #include "re2/prog.h"
 #include "re2/re2.h"
 #include "re2/regexp.h"
@ -30,91 +30,57 @@ namespace re2 {
 // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
 //
 // Because the out and out1 fields in Inst are no longer pointers,
-// we can't use pointers directly here either.  Instead, p refers
-// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1).
-// p == 0 represents the NULL list.  This is okay because instruction #0
+// we can't use pointers directly here either.  Instead, head refers
+// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
+// head == 0 represents the NULL list.  This is okay because instruction #0
 // is always the fail instruction, which never appears on a list.
-
 struct PatchList {
-  uint32_t p;
-
  // Returns patch list containing just p.
-  static PatchList Mk(uint32_t p);
+  static PatchList Mk(uint32_t p) {
+    return {p, p};
+  }

-  // Patches all the entries on l to have value v.
+  // Patches all the entries on l to have value p.
  // Caller must not ever use patch list again.
-  static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v);
-
-  // Deref returns the next pointer pointed at by p.
-  static PatchList Deref(Prog::Inst *inst0, PatchList l);
-
-  // Appends two patch lists and returns result.
-  static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
-};
-
-static PatchList nullPatchList = { 0 };
-
-// Returns patch list containing just p.
-PatchList PatchList::Mk(uint32_t p) {
-  PatchList l;
-  l.p = p;
-  return l;
-}
-
-// Returns the next pointer pointed at by l.
-PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) {
-  Prog::Inst* ip = &inst0[l.p>>1];
-  if (l.p&1)
-    l.p = ip->out1();
-  else
-    l.p = ip->out();
-  return l;
-}
-
-// Patches all the entries on l to have value v.
-void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32_t val) {
-  while (l.p != 0) {
-    Prog::Inst* ip = &inst0[l.p>>1];
-    if (l.p&1) {
-      l.p = ip->out1();
-      ip->out1_ = val;
-    } else {
-      l.p = ip->out();
-      ip->set_out(val);
+  static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
+    while (l.head != 0) {
+      Prog::Inst* ip = &inst0[l.head>>1];
+      if (l.head&1) {
+        l.head = ip->out1();
+        ip->out1_ = p;
+      } else {
+        l.head = ip->out();
+        ip->set_out(p);
+      }
    }
  }
-}

-// Appends two patch lists and returns result.
-PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
-  if (l1.p == 0)
-    return l2;
-  if (l2.p == 0)
-    return l1;
-
-  PatchList l = l1;
-  for (;;) {
-    PatchList next = PatchList::Deref(inst0, l);
-    if (next.p == 0)
-      break;
-    l = next;
+  // Appends two patch lists and returns result.
+  static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
+    if (l1.head == 0)
+      return l2;
+    if (l2.head == 0)
+      return l1;
+    Prog::Inst* ip = &inst0[l1.tail>>1];
+    if (l1.tail&1)
+      ip->out1_ = l2.head;
+    else
+      ip->set_out(l2.head);
+    return {l1.head, l2.tail};
  }

-  Prog::Inst* ip = &inst0[l.p>>1];
-  if (l.p&1)
-    ip->out1_ = l2.p;
-  else
-    ip->set_out(l2.p);
+  uint32_t head;
+  uint32_t tail;  // for constant-time append
+};

-  return l1;
-}
+static const PatchList kNullPatchList = {0, 0};

 // Compiled program fragment.
 struct Frag {
  uint32_t begin;
  PatchList end;

-  Frag() : begin(0) { end.p = 0; }  // needed so Frag can go in vector
+  Frag() : begin(0) { end.head = 0; }  // needed so Frag can go in vector
  Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {}
 };

@ -212,8 +178,8 @@ class Compiler : public Regexp::Walker<Frag> {
  int AddSuffixRecursive(int root, int id);

  // Finds the trie node for the given suffix. Returns a Frag in order to
-  // distinguish between pointing at the root node directly (end.p == 0)
-  // and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively).
+  // distinguish between pointing at the root node directly (end.head == 0)
+  // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
  Frag FindByteRange(int root, int id);

  // Compares two ByteRanges and returns true iff they are equal.
@ -225,8 +191,8 @@ class Compiler : public Regexp::Walker<Frag> {
  // Single rune.
  Frag Literal(Rune r, bool foldcase);

-  void Setup(Regexp::ParseFlags, int64_t, RE2::Anchor);
-  Prog* Finish();
+  void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor);
+  Prog* Finish(Regexp* re);

  // Returns .* where dot = any byte
  Frag DotStar();
@ -298,7 +264,7 @@ int Compiler::AllocInst(int n) {

 // Returns an unmatchable fragment.
 Frag Compiler::NoMatch() {
-  return Frag(0, nullPatchList);
+  return Frag(0, kNullPatchList);
 }

 // Is a an unmatchable fragment?
@ -314,7 +280,7 @@ Frag Compiler::Cat(Frag a, Frag b) {
  // Elide no-op.
  Prog::Inst* begin = &inst_[a.begin];
  if (begin->opcode() == kInstNop &&
-      a.end.p == (a.begin << 1) &&
+      a.end.head == (a.begin << 1) &&
      begin->out() == 0) {
    // in case refs to a somewhere
    PatchList::Patch(inst_.data(), a.end, b.begin);
@ -419,7 +385,7 @@ Frag Compiler::Match(int32_t match_id) {
  if (id < 0)
    return NoMatch();
  inst_[id].InitMatch(match_id);
-  return Frag(id, nullPatchList);
+  return Frag(id, kNullPatchList);
 }

 // Returns a fragment matching a particular empty-width op (like ^ or $)
@ -467,7 +433,7 @@ static int MaxRune(int len) {
 void Compiler::BeginRange() {
  rune_cache_.clear();
  rune_range_.begin = 0;
-  rune_range_.end = nullPatchList;
+  rune_range_.end = kNullPatchList;
 }

 int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
@ -548,9 +514,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
  }

  int br;
-  if (f.end.p == 0)
+  if (f.end.head == 0)
    br = root;
-  else if (f.end.p&1)
+  else if (f.end.head&1)
    br = inst_[f.begin].out1();
  else
    br = inst_[f.begin].out();
@ -566,9 +532,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
    // Ensure that the parent points to the clone, not to the original.
    // Note that this could leave the head unreachable except via the cache.
    br = byterange;
-    if (f.end.p == 0)
+    if (f.end.head == 0)
      root = br;
-    else if (f.end.p&1)
+    else if (f.end.head&1)
      inst_[f.begin].out1_ = br;
    else
      inst_[f.begin].set_out(br);
@ -601,7 +567,7 @@ bool Compiler::ByteRangeEqual(int id1, int id2) {
 Frag Compiler::FindByteRange(int root, int id) {
  if (inst_[root].opcode() == kInstByteRange) {
    if (ByteRangeEqual(root, id))
-      return Frag(root, nullPatchList);
+      return Frag(root, kNullPatchList);
    else
      return NoMatch();
  }
@ -662,48 +628,43 @@ void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
                                   static_cast<uint8_t>(hi), foldcase, 0));
 }

-// Table describing how to make a UTF-8 matching machine
-// for the rune range 80-10FFFF (Runeself-Runemax).
-// This range happens frequently enough (for example /./ and /[^a-z]/)
-// and the rune_cache_ map is slow enough that this is worth
-// special handling.  Makes compilation of a small expression
-// with a dot in it about 10% faster.
-// The * in the comments below mark whole sequences.
-static struct ByteRangeProg {
-  int next;
-  int lo;
-  int hi;
-} prog_80_10ffff[] = {
-  // Two-byte
-  { -1, 0x80, 0xBF, },  // 0:  80-BF
-  {  0, 0xC2, 0xDF, },  // 1:  C2-DF 80-BF*
-
-  // Three-byte
-  {  0, 0xA0, 0xBF, },  // 2:  A0-BF 80-BF
-  {  2, 0xE0, 0xE0, },  // 3:  E0 A0-BF 80-BF*
-  {  0, 0x80, 0xBF, },  // 4:  80-BF 80-BF
-  {  4, 0xE1, 0xEF, },  // 5:  E1-EF 80-BF 80-BF*
-
-  // Four-byte
-  {  4, 0x90, 0xBF, },  // 6:  90-BF 80-BF 80-BF
-  {  6, 0xF0, 0xF0, },  // 7:  F0 90-BF 80-BF 80-BF*
-  {  4, 0x80, 0xBF, },  // 8:  80-BF 80-BF 80-BF
-  {  8, 0xF1, 0xF3, },  // 9: F1-F3 80-BF 80-BF 80-BF*
-  {  4, 0x80, 0x8F, },  // 10: 80-8F 80-BF 80-BF
-  { 10, 0xF4, 0xF4, },  // 11: F4 80-8F 80-BF 80-BF*
-};
-
 void Compiler::Add_80_10ffff() {
-  int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning
-  for (size_t i = 0; i < arraysize(prog_80_10ffff); i++) {
-    const ByteRangeProg& p = prog_80_10ffff[i];
-    int next = 0;
-    if (p.next >= 0)
-      next = inst[p.next];
-    inst[i] = UncachedRuneByteSuffix(static_cast<uint8_t>(p.lo),
-                                     static_cast<uint8_t>(p.hi), false, next);
-    if ((p.lo & 0xC0) != 0x80)
-      AddSuffix(inst[i]);
+  // The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough
+  // (for example, for /./ and /[^a-z]/) that it is worth simplifying: by
+  // permitting overlong encodings in E0 and F0 sequences and code points
+  // over 10FFFF in F4 sequences, the size of the bytecode and the number
+  // of equivalence classes are reduced significantly.
+  int id;
+  if (reversed_) {
+    // Prefix factoring matters, but we don't have to handle it here
+    // because the rune range trie logic takes care of that already.
+    id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    AddSuffix(id);
+
+    id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    AddSuffix(id);
+
+    id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    AddSuffix(id);
+  } else {
+    // Suffix factoring matters - and we do have to handle it here.
+    int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0);
+    id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1);
+    AddSuffix(id);
+
+    int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1);
+    id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2);
+    AddSuffix(id);
+
+    int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2);
+    id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3);
+    AddSuffix(id);
  }
 }

@ -711,9 +672,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
  if (lo > hi)
    return;

-  // Pick off 80-10FFFF as a common special case
-  // that can bypass the slow rune_cache_.
-  if (lo == 0x80 && hi == 0x10ffff && !reversed_) {
+  // Pick off 80-10FFFF as a common special case.
+  if (lo == 0x80 && hi == 0x10ffff) {
    Add_80_10ffff();
    return;
  }
@ -1095,8 +1055,6 @@ static bool IsAnchorEnd(Regexp** pre, int depth) {

 void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
                     RE2::Anchor anchor) {
-  prog_->set_flags(flags);
-
  if (flags & Regexp::Latin1)
    encoding_ = kEncodingLatin1;
  max_mem_ = max_mem;
@ -1117,14 +1075,11 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
    // on the program.)
    if (m >= 1<<24)
      m = 1<<24;
-
    // Inst imposes its own limit (currently bigger than 2^24 but be safe).
    if (m > Prog::Inst::kMaxInst)
      m = Prog::Inst::kMaxInst;
-
    max_ninst_ = static_cast<int>(m);
  }
-
  anchor_ = anchor;
 }

@ -1178,10 +1133,10 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
  c.prog_->set_start_unanchored(all.begin);

  // Hand ownership of prog_ to caller.
-  return c.Finish();
+  return c.Finish(re);
 }

-Prog* Compiler::Finish() {
+Prog* Compiler::Finish(Regexp* re) {
  if (failed_)
    return NULL;

@ -1198,6 +1153,17 @@ Prog* Compiler::Finish() {
  prog_->Flatten();
  prog_->ComputeByteMap();

+  if (!prog_->reversed()) {
+    std::string prefix;
+    bool prefix_foldcase;
+    if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase) &&
+        !prefix_foldcase) {
+      prog_->prefix_size_ = prefix.size();
+      prog_->prefix_front_ = prefix.front();
+      prog_->prefix_back_ = prefix.back();
+    }
+  }
+
  // Record remaining memory for DFA.
  if (max_mem_ <= 0) {
    prog_->set_dfa_mem(1<<20);
@ -1254,7 +1220,7 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
  c.prog_->set_start(all.begin);
  c.prog_->set_start_unanchored(all.begin);

-  Prog* prog = c.Finish();
+  Prog* prog = c.Finish(re);
  if (prog == NULL)
    return NULL;

--- a/extern/re2/re2/dfa.cc
+++ b/extern/re2/re2/dfa.cc
@ -39,10 +39,11 @@
 #include "util/logging.h"
 #include "util/mix.h"
 #include "util/mutex.h"
-#include "util/pod_array.h"
-#include "util/sparse_set.h"
 #include "util/strutil.h"
+#include "re2/pod_array.h"
 #include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/sparse_set.h"
 #include "re2/stringpiece.h"

 // Silence "zero-sized array in struct/union" warning for DFA::State::next_.
@ -52,17 +53,6 @@

 namespace re2 {

-#if !defined(__linux__)  /* only Linux seems to have memrchr */
-static void* memrchr(const void* s, int c, size_t n) {
-  const unsigned char* p = (const unsigned char*)s;
-  for (p += n; n > 0; n--)
-    if (*--p == c)
-      return (void*)p;
-
-  return NULL;
-}
-#endif
-
 // Controls whether the DFA should bail out early if the NFA would be faster.
 static bool dfa_should_bail_when_slow = true;

@ -177,11 +167,8 @@ class DFA {
  typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;

 private:
-  // Special "first_byte" values for a state.  (Values >= 0 denote actual bytes.)
-  enum {
-    kFbUnknown = -1,   // No analysis has been performed.
-    kFbNone = -2,      // The first-byte trick cannot be used.
-  };
+  // Make it easier to swap in a scalable reader-writer mutex.
+  using CacheMutex = Mutex;

  enum {
    // Indices into start_ for unanchored searches.
@ -249,25 +236,26 @@ class DFA {
  struct SearchParams {
    SearchParams(const StringPiece& text, const StringPiece& context,
                 RWLocker* cache_lock)
-      : text(text), context(context),
+      : text(text),
+        context(context),
        anchored(false),
+        can_prefix_accel(false),
        want_earliest_match(false),
        run_forward(false),
        start(NULL),
-        first_byte(kFbUnknown),
        cache_lock(cache_lock),
        failed(false),
        ep(NULL),
-        matches(NULL) { }
+        matches(NULL) {}

    StringPiece text;
    StringPiece context;
    bool anchored;
+    bool can_prefix_accel;
    bool want_earliest_match;
    bool run_forward;
    State* start;
-    int first_byte;
-    RWLocker *cache_lock;
+    RWLocker* cache_lock;
    bool failed;     // "out" parameter: whether search gave up
    const char* ep;  // "out" parameter: end pointer for match
    SparseSet* matches;
@ -278,15 +266,13 @@ class DFA {
  };

  // Before each search, the parameters to Search are analyzed by
-  // AnalyzeSearch to determine the state in which to start and the
-  // "first_byte" for that state, if any.
+  // AnalyzeSearch to determine the state in which to start.
  struct StartInfo {
-    StartInfo() : start(NULL), first_byte(kFbUnknown) {}
-    State* start;
-    std::atomic<int> first_byte;
+    StartInfo() : start(NULL) {}
+    std::atomic<State*> start;
  };

-  // Fills in params->start and params->first_byte using
+  // Fills in params->start and params->can_prefix_accel using
  // the other search parameters.  Returns true on success,
  // false on failure.
  // cache_mutex_.r <= L < mutex_
@ -297,10 +283,10 @@ class DFA {
  // The generic search loop, inlined to create specialized versions.
  // cache_mutex_.r <= L < mutex_
  // Might unlock and relock cache_mutex_ via params->cache_lock.
-  inline bool InlinedSearchLoop(SearchParams* params,
-                                bool have_first_byte,
-                                bool want_earliest_match,
-                                bool run_forward);
+  template <bool can_prefix_accel,
+            bool want_earliest_match,
+            bool run_forward>
+  inline bool InlinedSearchLoop(SearchParams* params);

  // The specialized versions of InlinedSearchLoop.  The three letters
  // at the ends of the name denote the true/false values used as the
@ -322,13 +308,6 @@ class DFA {
  // Might unlock and relock cache_mutex_ via params->cache_lock.
  bool FastSearchLoop(SearchParams* params);

-  // For debugging, a slow search loop that calls InlinedSearchLoop
-  // directly -- because the booleans passed are not constants, the
-  // loop is not specialized like the SearchFFF etc. versions, so it
-  // runs much more slowly.  Useful only for debugging.
-  // cache_mutex_.r <= L < mutex_
-  // Might unlock and relock cache_mutex_ via params->cache_lock.
-  bool SlowSearchLoop(SearchParams* params);

  // Looks up bytes in bytemap_ but handles case c == kByteEndText too.
  int ByteMap(int c) {
@ -355,11 +334,14 @@ class DFA {
  // while holding cache_mutex_ for writing, to avoid interrupting other
  // readers.  Any State* pointers are only valid while cache_mutex_
  // is held.
-  Mutex cache_mutex_;
+  CacheMutex cache_mutex_;
  int64_t mem_budget_;     // Total memory budget for all States.
  int64_t state_budget_;   // Amount of memory remaining for new States.
  StateSet state_cache_;   // All States computed so far.
  StartInfo start_[kMaxStart];
+
+  DFA(const DFA&) = delete;
+  DFA& operator=(const DFA&) = delete;
 };

 // Shorthand for casting to uint8_t*.
@ -442,7 +424,7 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
    q1_(NULL),
    mem_budget_(max_mem) {
  if (ExtraDebug)
-    fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
+    fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
  int nmark = 0;
  if (kind_ == Prog::kLongestMatch)
    nmark = prog_->size();
@ -613,7 +595,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
  // Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
  // those are the only operators with any effect in
  // RunWorkqOnEmptyString or RunWorkqOnByte.
-  int* inst = new int[q->size()];
+  PODArray<int> inst(q->size());
  int n = 0;
  uint32_t needflags = 0;  // flags needed by kInstEmptyWidth instructions
  bool sawmatch = false;   // whether queue contains guaranteed kInstMatch
@ -643,7 +625,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
             (it == q->begin() && ip->greedy(prog_))) &&
            (kind_ != Prog::kLongestMatch || !sawmark) &&
            (flag & kFlagMatch)) {
-          delete[] inst;
          if (ExtraDebug)
            fprintf(stderr, " -> FullMatchState\n");
          return FullMatchState;
@ -690,7 +671,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
  // the execution loop can stop early.  This is only okay
  // if the state is *not* a matching state.
  if (n == 0 && flag == 0) {
-    delete[] inst;
    if (ExtraDebug)
      fprintf(stderr, " -> DeadState\n");
    return DeadState;
@ -700,7 +680,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
  // unordered state sets separated by Marks.  Sort each set
  // to canonicalize, to reduce the number of distinct sets stored.
  if (kind_ == Prog::kLongestMatch) {
-    int* ip = inst;
+    int* ip = inst.data();
    int* ep = ip + n;
    while (ip < ep) {
      int* markp = ip;
@ -717,7 +697,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
  // we have an unordered set of states (i.e. we don't have Marks)
  // and sorting will reduce the number of distinct sets stored.
  if (kind_ == Prog::kManyMatch) {
-    int* ip = inst;
+    int* ip = inst.data();
    int* ep = ip + n;
    std::sort(ip, ep);
  }
@ -736,8 +716,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
  // Save the needed empty-width flags in the top bits for use later.
  flag |= needflags << kFlagNeedShift;

-  State* state = CachedState(inst, n, flag);
-  delete[] inst;
+  State* state = CachedState(inst.data(), n, flag);
  return state;
 }

@ -971,8 +950,21 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
        break;

      case kInstByteRange:   // can follow if c is in range
-        if (ip->Matches(c))
-          AddToQueue(newq, ip->out(), flag);
+        if (!ip->Matches(c))
+          break;
+        AddToQueue(newq, ip->out(), flag);
+        if (ip->hint() != 0) {
+          // We have a hint, but we must cancel out the
+          // increment that will occur after the break.
+          i += ip->hint() - 1;
+        } else {
+          // We have no hint, so we must find the end
+          // of the current list and then skip to it.
+          Prog::Inst* ip0 = ip;
+          while (!ip->last())
+            ++ip;
+          i += ip - ip0;
+        }
        break;

      case kInstMatch:
@ -989,8 +981,8 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
  }

  if (ExtraDebug)
-    fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(),
-            c, flag, DumpWorkq(newq).c_str(), *ismatch);
+    fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
+            DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
 }

 // Processes input byte c in state, returning new state.
@ -1117,7 +1109,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {

 class DFA::RWLocker {
 public:
-  explicit RWLocker(Mutex* mu);
+  explicit RWLocker(CacheMutex* mu);
  ~RWLocker();

  // If the lock is only held for reading right now,
@ -1127,19 +1119,19 @@ class DFA::RWLocker {
  void LockForWriting();

 private:
-  Mutex* mu_;
+  CacheMutex* mu_;
  bool writing_;

  RWLocker(const RWLocker&) = delete;
  RWLocker& operator=(const RWLocker&) = delete;
 };

-DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) {
+DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
  mu_->ReaderLock();
 }

-// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations
-// does not support lock upgrade.
+// This function is marked as NO_THREAD_SAFETY_ANALYSIS because
+// the annotations don't support lock upgrade.
 void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
  if (!writing_) {
    mu_->ReaderUnlock();
@ -1171,11 +1163,14 @@ void DFA::ResetCache(RWLocker* cache_lock) {
  // Re-acquire the cache_mutex_ for writing (exclusive use).
  cache_lock->LockForWriting();

+  hooks::GetDFAStateCacheResetHook()({
+      state_budget_,
+      state_cache_.size(),
+  });
+
  // Clear the cache, reset the memory budget.
-  for (int i = 0; i < kMaxStart; i++) {
-    start_[i].start = NULL;
-    start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
-  }
+  for (int i = 0; i < kMaxStart; i++)
+    start_[i].start.store(NULL, std::memory_order_relaxed);
  ClearCache();
  mem_budget_ = state_budget_;
 }
@ -1290,8 +1285,7 @@ DFA::State* DFA::StateSaver::Restore() {
 // situation, the DFA can do better than executing the simple loop.
 // Instead, it can call memchr to search very quickly for the byte c.
 // Whether the start state has this property is determined during a
-// pre-compilation pass, and if so, the byte b is passed to the search
-// loop as the "first_byte" argument, along with a boolean "have_first_byte".
+// pre-compilation pass and the "can_prefix_accel" argument is set.
 //
 // Fourth, the desired behavior is to search for the leftmost-best match
 // (approximately, the same one that Perl would find), which is not
@ -1323,15 +1317,16 @@ DFA::State* DFA::StateSaver::Restore() {
 // The bools are equal to the same-named variables in params, but
 // making them function arguments lets the inliner specialize
 // this function to each combination (see two paragraphs above).
-inline bool DFA::InlinedSearchLoop(SearchParams* params,
-                                   bool have_first_byte,
-                                   bool want_earliest_match,
-                                   bool run_forward) {
+template <bool can_prefix_accel,
+          bool want_earliest_match,
+          bool run_forward>
+inline bool DFA::InlinedSearchLoop(SearchParams* params) {
  State* start = params->start;
-  const uint8_t* bp = BytePtr(params->text.begin());  // start of text
-  const uint8_t* p = bp;                              // text scanning point
-  const uint8_t* ep = BytePtr(params->text.end());    // end of text
-  const uint8_t* resetp = NULL;                       // p at last cache reset
+  const uint8_t* bp = BytePtr(params->text.data());  // start of text
+  const uint8_t* p = bp;                             // text scanning point
+  const uint8_t* ep = BytePtr(params->text.data() +
+                              params->text.size());  // end of text
+  const uint8_t* resetp = NULL;                      // p at last cache reset
  if (!run_forward) {
    using std::swap;
    swap(p, ep);
@ -1366,25 +1361,16 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,

  while (p != ep) {
    if (ExtraDebug)
-      fprintf(stderr, "@%td: %s\n",
-              p - bp, DumpState(s).c_str());
+      fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());

-    if (have_first_byte && s == start) {
-      // In start state, only way out is to find first_byte,
-      // so use optimized assembly in memchr to skip ahead.
-      // If first_byte isn't found, we can skip to the end
-      // of the string.
-      if (run_forward) {
-        if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
-          p = ep;
-          break;
-        }
-      } else {
-        if ((p = BytePtr(memrchr(ep, params->first_byte, p - ep))) == NULL) {
-          p = ep;
-          break;
-        }
-        p++;
+    if (can_prefix_accel && s == start) {
+      // In start state, only way out is to find the prefix,
+      // so we use prefix accel (e.g. memchr) to skip ahead.
+      // If not found, we can skip to the end of the string.
+      p = BytePtr(prog_->PrefixAccel(p, ep - p));
+      if (p == NULL) {
+        p = ep;
+        break;
      }
    }

@ -1475,8 +1461,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
      else
        lastmatch = p + 1;
      if (ExtraDebug)
-        fprintf(stderr, "match @%td! [%s]\n",
-                lastmatch - bp, DumpState(s).c_str());
+        fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
      if (params->matches != NULL && kind_ == Prog::kManyMatch) {
        for (int i = s->ninst_ - 1; i >= 0; i--) {
          int id = s->inst_[i];
@ -1560,36 +1545,28 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,

 // Inline specializations of the general loop.
 bool DFA::SearchFFF(SearchParams* params) {
-  return InlinedSearchLoop(params, 0, 0, 0);
+  return InlinedSearchLoop<false, false, false>(params);
 }
 bool DFA::SearchFFT(SearchParams* params) {
-  return InlinedSearchLoop(params, 0, 0, 1);
+  return InlinedSearchLoop<false, false, true>(params);
 }
 bool DFA::SearchFTF(SearchParams* params) {
-  return InlinedSearchLoop(params, 0, 1, 0);
+  return InlinedSearchLoop<false, true, false>(params);
 }
 bool DFA::SearchFTT(SearchParams* params) {
-  return InlinedSearchLoop(params, 0, 1, 1);
+  return InlinedSearchLoop<false, true, true>(params);
 }
 bool DFA::SearchTFF(SearchParams* params) {
-  return InlinedSearchLoop(params, 1, 0, 0);
+  return InlinedSearchLoop<true, false, false>(params);
 }
 bool DFA::SearchTFT(SearchParams* params) {
-  return InlinedSearchLoop(params, 1, 0, 1);
+  return InlinedSearchLoop<true, false, true>(params);
 }
 bool DFA::SearchTTF(SearchParams* params) {
-  return InlinedSearchLoop(params, 1, 1, 0);
+  return InlinedSearchLoop<true, true, false>(params);
 }
 bool DFA::SearchTTT(SearchParams* params) {
-  return InlinedSearchLoop(params, 1, 1, 1);
-}
-
-// For debugging, calls the general code directly.
-bool DFA::SlowSearchLoop(SearchParams* params) {
-  return InlinedSearchLoop(params,
-                           params->first_byte >= 0,
-                           params->want_earliest_match,
-                           params->run_forward);
+  return InlinedSearchLoop<true, true, true>(params);
 }

 // For performance, calls the appropriate specialized version
@ -1608,8 +1585,7 @@ bool DFA::FastSearchLoop(SearchParams* params) {
    &DFA::SearchTTT,
  };

-  bool have_first_byte = params->first_byte >= 0;
-  int index = 4 * have_first_byte +
+  int index = 4 * params->can_prefix_accel +
              2 * params->want_earliest_match +
              1 * params->run_forward;
  return (this->*Searches[index])(params);
@ -1701,13 +1677,22 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
    }
  }

-  if (ExtraDebug)
-    fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
-            params->anchored, params->run_forward, flags,
-            DumpState(info->start).c_str(), info->first_byte.load());
+  params->start = info->start.load(std::memory_order_acquire);

-  params->start = info->start;
-  params->first_byte = info->first_byte.load(std::memory_order_acquire);
+  // Even if we could prefix accel, we cannot do so when anchored and,
+  // less obviously, we cannot do so when we are going to need flags.
+  // This trick works only when there is a single byte that leads to a
+  // different state!
+  if (prog_->can_prefix_accel() &&
+      !params->anchored &&
+      params->start > SpecialStateMax &&
+      params->start->flag_ >> kFlagNeedShift == 0)
+    params->can_prefix_accel = true;
+
+  if (ExtraDebug)
+    fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
+            params->anchored, params->run_forward, flags,
+            DumpState(params->start).c_str(), params->can_prefix_accel);

  return true;
 }
@ -1716,47 +1701,25 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
                              uint32_t flags) {
  // Quick check.
-  int fb = info->first_byte.load(std::memory_order_acquire);
-  if (fb != kFbUnknown)
+  State* start = info->start.load(std::memory_order_acquire);
+  if (start != NULL)
    return true;

  MutexLock l(&mutex_);
-  fb = info->first_byte.load(std::memory_order_relaxed);
-  if (fb != kFbUnknown)
+  start = info->start.load(std::memory_order_relaxed);
+  if (start != NULL)
    return true;

  q0_->clear();
  AddToQueue(q0_,
             params->anchored ? prog_->start() : prog_->start_unanchored(),
             flags);
-  info->start = WorkqToCachedState(q0_, NULL, flags);
-  if (info->start == NULL)
+  start = WorkqToCachedState(q0_, NULL, flags);
+  if (start == NULL)
    return false;

-  if (info->start == DeadState) {
-    // Synchronize with "quick check" above.
-    info->first_byte.store(kFbNone, std::memory_order_release);
-    return true;
-  }
-
-  if (info->start == FullMatchState) {
-    // Synchronize with "quick check" above.
-    info->first_byte.store(kFbNone, std::memory_order_release);  // will be ignored
-    return true;
-  }
-
-  // Even if we have a first_byte, we cannot use it when anchored and,
-  // less obviously, we cannot use it when we are going to need flags.
-  // This trick works only when there is a single byte that leads to a
-  // different state!
-  int first_byte = prog_->first_byte();
-  if (first_byte == -1 ||
-      params->anchored ||
-      info->start->flag_ >> kFlagNeedShift != 0)
-    first_byte = kFbNone;
-
  // Synchronize with "quick check" above.
-  info->first_byte.store(first_byte, std::memory_order_release);
+  info->start.store(start, std::memory_order_release);
  return true;
 }

@ -1779,8 +1742,7 @@ bool DFA::Search(const StringPiece& text,
  if (ExtraDebug) {
    fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
    fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
-            std::string(text).c_str(), anchored, want_earliest_match,
-            run_forward, kind_);
+            std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
  }

  RWLocker l(&cache_mutex_);
@ -1798,9 +1760,9 @@ bool DFA::Search(const StringPiece& text,
    return false;
  if (params.start == FullMatchState) {
    if (run_forward == want_earliest_match)
-      *epp = text.begin();
+      *epp = text.data();
    else
-      *epp = text.end();
+      *epp = text.data() + text.size();
    return true;
  }
  if (ExtraDebug)
@ -1863,15 +1825,15 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
  *failed = false;

  StringPiece context = const_context;
-  if (context.begin() == NULL)
+  if (context.data() == NULL)
    context = text;
-  bool carat = anchor_start();
+  bool caret = anchor_start();
  bool dollar = anchor_end();
  if (reversed_) {
    using std::swap;
-    swap(carat, dollar);
+    swap(caret, dollar);
  }
-  if (carat && context.begin() != text.begin())
+  if (caret && context.begin() != text.begin())
    return false;
  if (dollar && context.end() != text.end())
    return false;
@ -1906,11 +1868,15 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
  bool matched = dfa->Search(text, context, anchored,
                             want_earliest_match, !reversed_,
                             failed, &ep, matches);
-  if (*failed)
+  if (*failed) {
+    hooks::GetDFASearchFailureHook()({
+        // Nothing yet...
+    });
    return false;
+  }
  if (!matched)
    return false;
-  if (endmatch && ep != (reversed_ ? text.begin() : text.end()))
+  if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
    return false;

  // If caller cares, record the boundary of the match.
@ -1918,10 +1884,11 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
  // as the beginning.
  if (match0) {
    if (reversed_)
-      *match0 = StringPiece(ep, static_cast<size_t>(text.end() - ep));
+      *match0 =
+          StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
    else
      *match0 =
-          StringPiece(text.begin(), static_cast<size_t>(ep - text.begin()));
+          StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
  }
  return true;
 }
--- a/extern/re2/re2/filtered_re2.cc
+++ b/extern/re2/re2/filtered_re2.cc
@ -6,6 +6,7 @@

 #include <stddef.h>
 #include <string>
+#include <utility>

 #include "util/util.h"
 #include "util/logging.h"
@ -27,7 +28,22 @@ FilteredRE2::FilteredRE2(int min_atom_len)
 FilteredRE2::~FilteredRE2() {
  for (size_t i = 0; i < re2_vec_.size(); i++)
    delete re2_vec_[i];
-  delete prefilter_tree_;
+}
+
+FilteredRE2::FilteredRE2(FilteredRE2&& other)
+    : re2_vec_(std::move(other.re2_vec_)),
+      compiled_(other.compiled_),
+      prefilter_tree_(std::move(other.prefilter_tree_)) {
+  other.re2_vec_.clear();
+  other.re2_vec_.shrink_to_fit();
+  other.compiled_ = false;
+  other.prefilter_tree_.reset(new PrefilterTree());
+}
+
+FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
+  this->~FilteredRE2();
+  (void) new (this) FilteredRE2(std::move(other));
+  return *this;
 }

 RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
@ -38,7 +54,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
  if (!re->ok()) {
    if (options.log_errors()) {
      LOG(ERROR) << "Couldn't compile regular expression, skipping: "
-                 << re << " due to error " << re->error();
+                 << pattern << " due to error " << re->error();
    }
    delete re;
  } else {
--- a/extern/re2/re2/filtered_re2.h
+++ b/extern/re2/re2/filtered_re2.h
@ -10,17 +10,18 @@
 // number of regexps that need to be actually searched.
 //
 // By design, it does not include a string matching engine. This is to
-// allow the user of the class to use their favorite string match
+// allow the user of the class to use their favorite string matching
 // engine. The overall flow is: Add all the regexps using Add, then
-// Compile the FilteredRE2. The compile returns strings that need to
-// be matched. Note that all returned strings are lowercase. For
-// applying regexps to a search text, the caller does the string
-// matching using the strings returned. When doing the string match,
-// note that the caller has to do that on lower cased version of the
-// search text. Then call FirstMatch or AllMatches with a vector of
-// indices of strings that were found in the text to get the actual
-// regexp matches.
+// Compile the FilteredRE2. Compile returns strings that need to be
+// matched. Note that the returned strings are lowercased and distinct.
+// For applying regexps to a search text, the caller does the string
+// matching using the returned strings. When doing the string match,
+// note that the caller has to do that in a case-insensitive way or
+// on a lowercased version of the search text. Then call FirstMatch
+// or AllMatches with a vector of indices of strings that were found
+// in the text to get the actual regexp matches.

+#include <memory>
 #include <string>
 #include <vector>

@ -36,18 +37,25 @@ class FilteredRE2 {
  explicit FilteredRE2(int min_atom_len);
  ~FilteredRE2();

+  // Not copyable.
+  FilteredRE2(const FilteredRE2&) = delete;
+  FilteredRE2& operator=(const FilteredRE2&) = delete;
+  // Movable.
+  FilteredRE2(FilteredRE2&& other);
+  FilteredRE2& operator=(FilteredRE2&& other);
+
  // Uses RE2 constructor to create a RE2 object (re). Returns
  // re->error_code(). If error_code is other than NoError, then re is
  // deleted and not added to re2_vec_.
  RE2::ErrorCode Add(const StringPiece& pattern,
                     const RE2::Options& options,
-                     int *id);
+                     int* id);

  // Prepares the regexps added by Add for filtering.  Returns a set
  // of strings that the caller should check for in candidate texts.
-  // The returned strings are lowercased. When doing string matching,
-  // the search text should be lowercased first to find matching
-  // strings from the set of strings returned by Compile.  Call after
+  // The returned strings are lowercased and distinct. When doing
+  // string matching, it should be performed in a case-insensitive
+  // way or the search text should be lowercased first.  Call after
  // all Add calls are done.
  void Compile(std::vector<std::string>* strings_to_match);

@ -98,10 +106,7 @@ class FilteredRE2 {
  bool compiled_;

  // An AND-OR tree of string atoms used for filtering regexps.
-  PrefilterTree* prefilter_tree_;
-
-  FilteredRE2(const FilteredRE2&) = delete;
-  FilteredRE2& operator=(const FilteredRE2&) = delete;
+  std::unique_ptr<PrefilterTree> prefilter_tree_;
 };

 }  // namespace re2
--- a/extern/re2/re2/fuzzing/compiler-rt/LICENSE
+++ b/extern/re2/re2/fuzzing/compiler-rt/LICENSE
@ -0,0 +1,219 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+    1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+    2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+    3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+    4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+    5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+    6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+    7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+    8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+    9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+    END OF TERMS AND CONDITIONS
+
+    APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+    Copyright [yyyy] [name of copyright owner]
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+
+--- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
--- a/extern/re2/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
+++ b/extern/re2/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
@ -0,0 +1,305 @@
+//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// A single header library providing an utility class to break up an array of
+// bytes. Whenever run on the same input, provides the same output, as long as
+// its methods are called in the same order, with the same arguments.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
+#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
+
+#include <algorithm>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <initializer_list>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// In addition to the comments below, the API is also briefly documented at
+// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
+class FuzzedDataProvider {
+ public:
+  // |data| is an array of length |size| that the FuzzedDataProvider wraps to
+  // provide more granular access. |data| must outlive the FuzzedDataProvider.
+  FuzzedDataProvider(const uint8_t *data, size_t size)
+      : data_ptr_(data), remaining_bytes_(size) {}
+  ~FuzzedDataProvider() = default;
+
+  // Returns a std::vector containing |num_bytes| of input data. If fewer than
+  // |num_bytes| of data remain, returns a shorter std::vector containing all
+  // of the data that's left. Can be used with any byte sized type, such as
+  // char, unsigned char, uint8_t, etc.
+  template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
+    num_bytes = std::min(num_bytes, remaining_bytes_);
+    return ConsumeBytes<T>(num_bytes, num_bytes);
+  }
+
+  // Similar to |ConsumeBytes|, but also appends the terminator value at the end
+  // of the resulting vector. Useful, when a mutable null-terminated C-string is
+  // needed, for example. But that is a rare case. Better avoid it, if possible,
+  // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
+  template <typename T>
+  std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
+                                            T terminator = 0) {
+    num_bytes = std::min(num_bytes, remaining_bytes_);
+    std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
+    result.back() = terminator;
+    return result;
+  }
+
+  // Returns a std::string containing |num_bytes| of input data. Using this and
+  // |.c_str()| on the resulting string is the best way to get an immutable
+  // null-terminated C string. If fewer than |num_bytes| of data remain, returns
+  // a shorter std::string containing all of the data that's left.
+  std::string ConsumeBytesAsString(size_t num_bytes) {
+    static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
+                  "ConsumeBytesAsString cannot convert the data to a string.");
+
+    num_bytes = std::min(num_bytes, remaining_bytes_);
+    std::string result(
+        reinterpret_cast<const std::string::value_type *>(data_ptr_),
+        num_bytes);
+    Advance(num_bytes);
+    return result;
+  }
+
+  // Returns a number in the range [min, max] by consuming bytes from the
+  // input data. The value might not be uniformly distributed in the given
+  // range. If there's no input data left, always returns |min|. |min| must
+  // be less than or equal to |max|.
+  template <typename T> T ConsumeIntegralInRange(T min, T max) {
+    static_assert(std::is_integral<T>::value, "An integral type is required.");
+    static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
+
+    if (min > max)
+      abort();
+
+    // Use the biggest type possible to hold the range and the result.
+    uint64_t range = static_cast<uint64_t>(max) - min;
+    uint64_t result = 0;
+    size_t offset = 0;
+
+    while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
+           remaining_bytes_ != 0) {
+      // Pull bytes off the end of the seed data. Experimentally, this seems to
+      // allow the fuzzer to more easily explore the input space. This makes
+      // sense, since it works by modifying inputs that caused new code to run,
+      // and this data is often used to encode length of data read by
+      // |ConsumeBytes|. Separating out read lengths makes it easier modify the
+      // contents of the data that is actually read.
+      --remaining_bytes_;
+      result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
+      offset += CHAR_BIT;
+    }
+
+    // Avoid division by 0, in case |range + 1| results in overflow.
+    if (range != std::numeric_limits<decltype(range)>::max())
+      result = result % (range + 1);
+
+    return static_cast<T>(min + result);
+  }
+
+  // Returns a std::string of length from 0 to |max_length|. When it runs out of
+  // input data, returns what remains of the input. Designed to be more stable
+  // with respect to a fuzzer inserting characters than just picking a random
+  // length and then consuming that many bytes with |ConsumeBytes|.
+  std::string ConsumeRandomLengthString(size_t max_length) {
+    // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
+    // followed by anything else to the end of the string. As a result of this
+    // logic, a fuzzer can insert characters into the string, and the string
+    // will be lengthened to include those new characters, resulting in a more
+    // stable fuzzer than picking the length of a string independently from
+    // picking its contents.
+    std::string result;
+
+    // Reserve the anticipated capaticity to prevent several reallocations.
+    result.reserve(std::min(max_length, remaining_bytes_));
+    for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
+      char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
+      Advance(1);
+      if (next == '\\' && remaining_bytes_ != 0) {
+        next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
+        Advance(1);
+        if (next != '\\')
+          break;
+      }
+      result += next;
+    }
+
+    result.shrink_to_fit();
+    return result;
+  }
+
+  // Returns a std::vector containing all remaining bytes of the input data.
+  template <typename T> std::vector<T> ConsumeRemainingBytes() {
+    return ConsumeBytes<T>(remaining_bytes_);
+  }
+
+  // Returns a std::string containing all remaining bytes of the input data.
+  // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
+  // object.
+  std::string ConsumeRemainingBytesAsString() {
+    return ConsumeBytesAsString(remaining_bytes_);
+  }
+
+  // Returns a number in the range [Type's min, Type's max]. The value might
+  // not be uniformly distributed in the given range. If there's no input data
+  // left, always returns |min|.
+  template <typename T> T ConsumeIntegral() {
+    return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
+                                  std::numeric_limits<T>::max());
+  }
+
+  // Reads one byte and returns a bool, or false when no data remains.
+  bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
+
+  // Returns a copy of the value selected from the given fixed-size |array|.
+  template <typename T, size_t size>
+  T PickValueInArray(const T (&array)[size]) {
+    static_assert(size > 0, "The array must be non empty.");
+    return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
+  }
+
+  template <typename T>
+  T PickValueInArray(std::initializer_list<const T> list) {
+    // TODO(Dor1s): switch to static_assert once C++14 is allowed.
+    if (!list.size())
+      abort();
+
+    return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
+  }
+
+  // Returns an enum value. The enum must start at 0 and be contiguous. It must
+  // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
+  // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
+  template <typename T> T ConsumeEnum() {
+    static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
+    return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
+        0, static_cast<uint32_t>(T::kMaxValue)));
+  }
+
+  // Returns a floating point number in the range [0.0, 1.0]. If there's no
+  // input data left, always returns 0.
+  template <typename T> T ConsumeProbability() {
+    static_assert(std::is_floating_point<T>::value,
+                  "A floating point type is required.");
+
+    // Use different integral types for different floating point types in order
+    // to provide better density of the resulting values.
+    using IntegralType =
+        typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
+                                  uint64_t>::type;
+
+    T result = static_cast<T>(ConsumeIntegral<IntegralType>());
+    result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
+    return result;
+  }
+
+  // Returns a floating point value in the range [Type's lowest, Type's max] by
+  // consuming bytes from the input data. If there's no input data left, always
+  // returns approximately 0.
+  template <typename T> T ConsumeFloatingPoint() {
+    return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
+                                          std::numeric_limits<T>::max());
+  }
+
+  // Returns a floating point value in the given range by consuming bytes from
+  // the input data. If there's no input data left, returns |min|. Note that
+  // |min| must be less than or equal to |max|.
+  template <typename T> T ConsumeFloatingPointInRange(T min, T max) {
+    if (min > max)
+      abort();
+
+    T range = .0;
+    T result = min;
+    constexpr T zero(.0);
+    if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
+      // The diff |max - min| would overflow the given floating point type. Use
+      // the half of the diff as the range and consume a bool to decide whether
+      // the result is in the first of the second part of the diff.
+      range = (max / 2.0) - (min / 2.0);
+      if (ConsumeBool()) {
+        result += range;
+      }
+    } else {
+      range = max - min;
+    }
+
+    return result + range * ConsumeProbability<T>();
+  }
+
+  // Reports the remaining bytes available for fuzzed input.
+  size_t remaining_bytes() { return remaining_bytes_; }
+
+ private:
+  FuzzedDataProvider(const FuzzedDataProvider &) = delete;
+  FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
+
+  void Advance(size_t num_bytes) {
+    if (num_bytes > remaining_bytes_)
+      abort();
+
+    data_ptr_ += num_bytes;
+    remaining_bytes_ -= num_bytes;
+  }
+
+  template <typename T>
+  std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
+    static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
+
+    // The point of using the size-based constructor below is to increase the
+    // odds of having a vector object with capacity being equal to the length.
+    // That part is always implementation specific, but at least both libc++ and
+    // libstdc++ allocate the requested number of bytes in that constructor,
+    // which seems to be a natural choice for other implementations as well.
+    // To increase the odds even more, we also call |shrink_to_fit| below.
+    std::vector<T> result(size);
+    if (size == 0) {
+      if (num_bytes_to_consume != 0)
+        abort();
+      return result;
+    }
+
+    std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
+    Advance(num_bytes_to_consume);
+
+    // Even though |shrink_to_fit| is also implementation specific, we expect it
+    // to provide an additional assurance in case vector's constructor allocated
+    // a buffer which is larger than the actual amount of data we put inside it.
+    result.shrink_to_fit();
+    return result;
+  }
+
+  template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
+    static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
+    static_assert(!std::numeric_limits<TU>::is_signed,
+                  "Source type must be unsigned.");
+
+    // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
+    if (std::numeric_limits<TS>::is_modulo)
+      return static_cast<TS>(value);
+
+    // Avoid using implementation-defined unsigned to signer conversions.
+    // To learn more, see https://stackoverflow.com/questions/13150449.
+    if (value <= std::numeric_limits<TS>::max()) {
+      return static_cast<TS>(value);
+    } else {
+      constexpr auto TS_min = std::numeric_limits<TS>::min();
+      return TS_min + static_cast<char>(value - TS_min);
+    }
+  }
+
+  const uint8_t *data_ptr_;
+  size_t remaining_bytes_;
+};
+
+#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
--- a/extern/re2/re2/fuzzing/re2_fuzzer.cc
+++ b/extern/re2/re2/fuzzing/re2_fuzzer.cc
@ -2,12 +2,13 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+#include <fuzzer/FuzzedDataProvider.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <map>
 #include <memory>
 #include <queue>
 #include <string>
+#include <vector>

 #include "re2/prefilter.h"
 #include "re2/re2.h"
@ -17,7 +18,38 @@ using re2::StringPiece;
 // NOT static, NOT signed.
 uint8_t dummy = 0;

-void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
+void TestOneInput(StringPiece pattern, const RE2::Options& options,
+                  StringPiece text) {
+  // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
+  // Otherwise, we will waste time on inputs that have long runs of various
+  // character classes. The fuzzer has shown itself to be easily capable of
+  // generating such patterns that fall within the other limits, but result
+  // in timeouts nonetheless. The marginal cost is high - even more so when
+  // counted repetition is involved - whereas the marginal benefit is zero.
+  // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
+  int char_class = 0;
+  int backslash_p = 0;  // very expensive, so handle specially
+  for (size_t i = 0; i < pattern.size(); i++) {
+    if (pattern[i] == '.')
+      char_class++;
+    if (pattern[i] != '\\')
+      continue;
+    i++;
+    if (i >= pattern.size())
+      break;
+    if (pattern[i] == 'p' || pattern[i] == 'P' ||
+        pattern[i] == 'd' || pattern[i] == 'D' ||
+        pattern[i] == 's' || pattern[i] == 'S' ||
+        pattern[i] == 'w' || pattern[i] == 'W')
+      char_class++;
+    if (pattern[i] == 'p' || pattern[i] == 'P')
+      backslash_p++;
+  }
+  if (char_class > 9)
+    return;
+  if (backslash_p > 1)
+    return;
+
  RE2 re(pattern, options);
  if (!re.ok())
    return;
@ -55,7 +87,7 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {

  // Don't waste time fuzzing high-fanout programs.
  // They can cause bug reports due to fuzzer timeouts.
-  std::map<int, int> histogram;
+  std::vector<int> histogram;
  int fanout = re.ProgramFanout(&histogram);
  if (fanout > 9)
    return;
@ -102,72 +134,38 @@ void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {

 // Entry point for libFuzzer.
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
-  if (size == 0 || size > 999)
+  // An input larger than 4 KiB probably isn't interesting. (This limit
+  // allows for fdp.ConsumeRandomLengthString()'s backslash behaviour.)
+  if (size == 0 || size > 4096)
    return 0;

-  // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
-  // Otherwise, we will waste time on inputs that have long runs of various
-  // character classes. The fuzzer has shown itself to be easily capable of
-  // generating such patterns that fall within the other limits, but result
-  // in timeouts nonetheless. The marginal cost is high - even more so when
-  // counted repetition is involved - whereas the marginal benefit is zero.
-  // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
-  int char_class = 0;
-  int backslash_p = 0;  // very expensive, so handle specially
-  for (size_t i = 0; i < size; i++) {
-    if (data[i] == '.')
-      char_class++;
-    if (data[i] != '\\')
-      continue;
-    i++;
-    if (i >= size)
-      break;
-    if (data[i] == 'p' || data[i] == 'P' ||
-        data[i] == 'd' || data[i] == 'D' ||
-        data[i] == 's' || data[i] == 'S' ||
-        data[i] == 'w' || data[i] == 'W')
-      char_class++;
-    if (data[i] == 'p' || data[i] == 'P')
-      backslash_p++;
-  }
-  if (char_class > 9)
-    return 0;
-  if (backslash_p > 1)
-    return 0;
-
-  // The one-at-a-time hash by Bob Jenkins.
-  uint32_t hash = 0;
-  for (size_t i = 0; i < size; i++) {
-    hash += data[i];
-    hash += (hash << 10);
-    hash ^= (hash >> 6);
-  }
-  hash += (hash << 3);
-  hash ^= (hash >> 11);
-  hash += (hash << 15);
+  FuzzedDataProvider fdp(data, size);

+  // The convention here is that fdp.ConsumeBool() returning false sets
+  // the default value whereas returning true sets the alternate value:
+  // most options default to false and so can be set directly; encoding
+  // defaults to UTF-8; case_sensitive defaults to true. We do NOT want
+  // to log errors. max_mem is 64 MiB because we can afford to use more
+  // RAM in exchange for (hopefully) faster fuzzing.
  RE2::Options options;
+  options.set_encoding(fdp.ConsumeBool() ? RE2::Options::EncodingLatin1
+                                         : RE2::Options::EncodingUTF8);
+  options.set_posix_syntax(fdp.ConsumeBool());
+  options.set_longest_match(fdp.ConsumeBool());
  options.set_log_errors(false);
  options.set_max_mem(64 << 20);
-  options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
-                                : RE2::Options::EncodingUTF8);
-  options.set_posix_syntax(hash & 2);
-  options.set_longest_match(hash & 4);
-  options.set_literal(hash & 8);
-  options.set_never_nl(hash & 16);
-  options.set_dot_nl(hash & 32);
-  options.set_never_capture(hash & 64);
-  options.set_case_sensitive(hash & 128);
-  options.set_perl_classes(hash & 256);
-  options.set_word_boundary(hash & 512);
-  options.set_one_line(hash & 1024);
+  options.set_literal(fdp.ConsumeBool());
+  options.set_never_nl(fdp.ConsumeBool());
+  options.set_dot_nl(fdp.ConsumeBool());
+  options.set_never_capture(fdp.ConsumeBool());
+  options.set_case_sensitive(!fdp.ConsumeBool());
+  options.set_perl_classes(fdp.ConsumeBool());
+  options.set_word_boundary(fdp.ConsumeBool());
+  options.set_one_line(fdp.ConsumeBool());

-  const char* ptr = reinterpret_cast<const char*>(data);
-  int len = static_cast<int>(size);
-
-  StringPiece pattern(ptr, len);
-  StringPiece text(ptr, len);
-  Test(pattern, options, text);
+  std::string pattern = fdp.ConsumeRandomLengthString(999);
+  std::string text = fdp.ConsumeRandomLengthString(999);

+  TestOneInput(pattern, options, text);
  return 0;
 }
--- a/extern/re2/re2/make_perl_groups.pl
+++ b/extern/re2/re2/make_perl_groups.pl
@ -76,7 +76,7 @@ sub PrintClass($$@) {
  } else {
    $negname =~ y/a-z/A-Z/;
  }
-  return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }";
+  return "{ \"$escname\", +1, code$cnum, $n, 0, 0 }", "{ \"$negname\", -1, code$cnum, $n, 0, 0 }";
 }

 my $cnum = 0;
--- a/extern/re2/re2/make_unicode_casefold.py
+++ b/extern/re2/re2/make_unicode_casefold.py
--- a/extern/re2/re2/make_unicode_groups.py
+++ b/extern/re2/re2/make_unicode_groups.py
--- a/extern/re2/re2/mimics_pcre.cc
+++ b/extern/re2/re2/mimics_pcre.cc
@ -38,14 +38,21 @@ static bool CanBeEmptyString(Regexp *re);
 class PCREWalker : public Regexp::Walker<bool> {
 public:
  PCREWalker() {}
-  bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args,
-                 int nchild_args);

-  bool ShortVisit(Regexp* re, bool a) {
-    // Should never be called: we use Walk not WalkExponential.
-    LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
+  virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                         bool* child_args, int nchild_args);
+
+  virtual bool ShortVisit(Regexp* re, bool a) {
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    LOG(DFATAL) << "PCREWalker::ShortVisit called";
+#endif
    return a;
  }
+
+ private:
+  PCREWalker(const PCREWalker&) = delete;
+  PCREWalker& operator=(const PCREWalker&) = delete;
 };

 // Called after visiting each of re's children and accumulating
@ -114,13 +121,16 @@ bool Regexp::MimicsPCRE() {

 class EmptyStringWalker : public Regexp::Walker<bool> {
 public:
-  EmptyStringWalker() { }
-  bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
-                 bool* child_args, int nchild_args);
+  EmptyStringWalker() {}

-  bool ShortVisit(Regexp* re, bool a) {
-    // Should never be called: we use Walk not WalkExponential.
+  virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                         bool* child_args, int nchild_args);
+
+  virtual bool ShortVisit(Regexp* re, bool a) {
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
    LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
+#endif
    return a;
  }

--- a/extern/re2/re2/nfa.cc
+++ b/extern/re2/re2/nfa.cc
@ -27,17 +27,18 @@
 #include <stdio.h>
 #include <string.h>
 #include <algorithm>
+#include <deque>
 #include <string>
 #include <utility>
 #include <vector>

+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/pod_array.h"
 #include "re2/prog.h"
 #include "re2/regexp.h"
-#include "util/logging.h"
-#include "util/pod_array.h"
-#include "util/sparse_array.h"
-#include "util/sparse_set.h"
-#include "util/strutil.h"
+#include "re2/sparse_array.h"
+#include "re2/sparse_set.h"

 namespace re2 {

@ -107,18 +108,21 @@ class NFA {
  // Returns text version of capture information, for debugging.
  std::string FormatCapture(const char** capture);

-  inline void CopyCapture(const char** dst, const char** src);
+  void CopyCapture(const char** dst, const char** src) {
+    memmove(dst, src, ncapture_*sizeof src[0]);
+  }

  Prog* prog_;                // underlying program
  int start_;                 // start instruction in program
  int ncapture_;              // number of submatches to track
  bool longest_;              // whether searching for longest match
  bool endmatch_;             // whether match must end at text.end()
-  const char* btext_;         // beginning of text being matched (for FormatSubmatch)
-  const char* etext_;         // end of text being matched (for endmatch_)
+  const char* btext_;         // beginning of text (for FormatSubmatch)
+  const char* etext_;         // end of text (for endmatch_)
  Threadq q0_, q1_;           // pre-allocated for Search.
  PODArray<AddState> stack_;  // pre-allocated for AddToThreadq
-  Thread* free_threads_;      // free list
+  std::deque<Thread> arena_;  // thread arena
+  Thread* freelist_;          // thread freelist
  const char** match_;        // best match so far
  bool matched_;              // any match so far?

@ -141,31 +145,30 @@ NFA::NFA(Prog* prog) {
               prog_->inst_count(kInstEmptyWidth) +
               prog_->inst_count(kInstNop) + 1;  // + 1 for start inst
  stack_ = PODArray<AddState>(nstack);
-  free_threads_ = NULL;
+  freelist_ = NULL;
  match_ = NULL;
  matched_ = false;
 }

 NFA::~NFA() {
  delete[] match_;
-  Thread* next;
-  for (Thread* t = free_threads_; t; t = next) {
-    next = t->next;
-    delete[] t->capture;
-    delete t;
-  }
+  for (const Thread& t : arena_)
+    delete[] t.capture;
 }

 NFA::Thread* NFA::AllocThread() {
-  Thread* t = free_threads_;
-  if (t == NULL) {
-    t = new Thread;
+  Thread* t = freelist_;
+  if (t != NULL) {
+    freelist_ = t->next;
    t->ref = 1;
-    t->capture = new const char*[ncapture_];
+    // We don't need to touch t->capture because
+    // the caller will immediately overwrite it.
    return t;
  }
-  free_threads_ = t->next;
+  arena_.emplace_back();
+  t = &arena_.back();
  t->ref = 1;
+  t->capture = new const char*[ncapture_];
  return t;
 }

@ -176,21 +179,13 @@ NFA::Thread* NFA::Incref(Thread* t) {
 }

 void NFA::Decref(Thread* t) {
-  if (t == NULL)
-    return;
+  DCHECK(t != NULL);
  t->ref--;
  if (t->ref > 0)
    return;
  DCHECK_EQ(t->ref, 0);
-  t->next = free_threads_;
-  free_threads_ = t;
-}
-
-void NFA::CopyCapture(const char** dst, const char** src) {
-  for (int i = 0; i < ncapture_; i+=2) {
-    dst[i] = src[i];
-    dst[i+1] = src[i+1];
-  }
+  t->next = freelist_;
+  freelist_ = t;
 }

 // Follows all empty arrows from id0 and enqueues all the states reached.
@ -372,8 +367,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
          matched_ = true;

          Decref(t);
-          for (++i; i != runq->end(); ++i)
-            Decref(i->value());
+          for (++i; i != runq->end(); ++i) {
+            if (i->value() != NULL)
+              Decref(i->value());
+          }
          runq->clear();
          if (ip->greedy(prog_))
            return ip->out1();
@ -382,10 +379,15 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
        break;

      case kInstMatch: {
-        // Avoid invoking undefined behavior when p happens
-        // to be null - and p-1 would be meaningless anyway.
-        if (p == NULL)
+        // Avoid invoking undefined behavior (arithmetic on a null pointer)
+        // by storing p instead of p-1. (What would the latter even mean?!)
+        // This complements the special case in NFA::Search().
+        if (p == NULL) {
+          CopyCapture(match_, t->capture);
+          match_[1] = p;
+          matched_ = true;
          break;
+        }

        if (endmatch_ && p-1 != etext_)
          break;
@ -411,8 +413,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
          // worse than the one we just found: don't run the
          // rest of the current Threadq.
          Decref(t);
-          for (++i; i != runq->end(); ++i)
-            Decref(i->value());
+          for (++i; i != runq->end(); ++i) {
+            if (i->value() != NULL)
+              Decref(i->value());
+          }
          runq->clear();
          return 0;
        }
@ -431,12 +435,12 @@ std::string NFA::FormatCapture(const char** capture) {
    if (capture[i] == NULL)
      s += "(?,?)";
    else if (capture[i+1] == NULL)
-      s += StringPrintf("(%d,?)",
-                        (int)(capture[i] - btext_));
+      s += StringPrintf("(%td,?)",
+                        capture[i] - btext_);
    else
-      s += StringPrintf("(%d,%d)",
-                        (int)(capture[i] - btext_),
-                        (int)(capture[i+1] - btext_));
+      s += StringPrintf("(%td,%td)",
+                        capture[i] - btext_,
+                        capture[i+1] - btext_);
  }
  return s;
 }
@ -448,7 +452,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
    return false;

  StringPiece context = const_context;
-  if (context.begin() == NULL)
+  if (context.data() == NULL)
    context = text;

  // Sanity check: make sure that text lies within context.
@ -465,7 +469,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
  if (prog_->anchor_end()) {
    longest = true;
    endmatch_ = true;
-    etext_ = text.end();
  }

  if (nsubmatch < 0) {
@ -485,32 +488,33 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
  }

  match_ = new const char*[ncapture_];
+  memset(match_, 0, ncapture_*sizeof match_[0]);
  matched_ = false;

  // For debugging prints.
-  btext_ = context.begin();
+  btext_ = context.data();
+  // For convenience.
+  etext_ = text.data() + text.size();

  if (ExtraDebug)
    fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
-            std::string(text).c_str(), std::string(context).c_str(), anchored,
-            longest);
+            std::string(text).c_str(), std::string(context).c_str(), anchored, longest);

  // Set up search.
  Threadq* runq = &q0_;
  Threadq* nextq = &q1_;
  runq->clear();
  nextq->clear();
-  memset(&match_[0], 0, ncapture_*sizeof match_[0]);

  // Loop over the text, stepping the machine.
-  for (const char* p = text.begin();; p++) {
+  for (const char* p = text.data();; p++) {
    if (ExtraDebug) {
      int c = 0;
-      if (p == context.begin())
+      if (p == btext_)
        c = '^';
-      else if (p > text.end())
+      else if (p > etext_)
        c = '$';
-      else if (p < text.end())
+      else if (p < etext_)
        c = p[0] & 0xFF;

      fprintf(stderr, "%c:", c);
@ -524,14 +528,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
    }

    // This is a no-op the first time around the loop because runq is empty.
-    int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p);
+    int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
    DCHECK_EQ(runq->size(), 0);
    using std::swap;
    swap(nextq, runq);
    nextq->clear();
    if (id != 0) {
      // We're done: full match ahead.
-      p = text.end();
+      p = etext_;
      for (;;) {
        Prog::Inst* ip = prog_->inst(id);
        switch (ip->opcode()) {
@ -559,30 +563,28 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
      break;
    }

-    if (p > text.end())
+    if (p > etext_)
      break;

    // Start a new thread if there have not been any matches.
    // (No point in starting a new thread if there have been
    // matches, since it would be to the right of the match
    // we already found.)
-    if (!matched_ && (!anchored || p == text.begin())) {
-      // If there's a required first byte for an unanchored search
-      // and we're not in the middle of any possible matches,
-      // use memchr to search for the byte quickly.
-      int fb = prog_->first_byte();
+    if (!matched_ && (!anchored || p == text.data())) {
+      // Try to use prefix accel (e.g. memchr) to skip ahead.
+      // The search must be unanchored and there must be zero
+      // possible matches already.
      if (!anchored && runq->size() == 0 &&
-          fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
-        p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
-        if (p == NULL) {
-          p = text.end();
-        }
+          p < etext_ && prog_->can_prefix_accel()) {
+        p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
+        if (p == NULL)
+          p = etext_;
      }

      Thread* t = AllocThread();
      CopyCapture(t->capture, match_);
      t->capture[0] = p;
-      AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p,
+      AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
                   t);
      Decref(t);
    }
@ -593,10 +595,24 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
        fprintf(stderr, "dead\n");
      break;
    }
+
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    // This complements the special case in NFA::Step().
+    if (p == NULL) {
+      (void) Step(runq, nextq, -1, context, p);
+      DCHECK_EQ(runq->size(), 0);
+      using std::swap;
+      swap(nextq, runq);
+      nextq->clear();
+      break;
+    }
  }

-  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
-    Decref(i->value());
+  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
+    if (i->value() != NULL)
+      Decref(i->value());
+  }

  if (matched_) {
    for (int i = 0; i < nsubmatch; i++)
@ -605,73 +621,13 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
                      static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
    if (ExtraDebug)
      fprintf(stderr, "match (%td,%td)\n",
-              match_[0] - btext_, match_[1] - btext_);
+              match_[0] - btext_,
+              match_[1] - btext_);
    return true;
  }
  return false;
 }

-// Computes whether all successful matches have a common first byte,
-// and if so, returns that byte.  If not, returns -1.
-int Prog::ComputeFirstByte() {
-  int b = -1;
-  SparseSet q(size());
-  q.insert(start());
-  for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) {
-    int id = *it;
-    Prog::Inst* ip = inst(id);
-    switch (ip->opcode()) {
-      default:
-        LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte";
-        break;
-
-      case kInstMatch:
-        // The empty string matches: no first byte.
-        return -1;
-
-      case kInstByteRange:
-        if (!ip->last())
-          q.insert(id+1);
-
-        // Must match only a single byte
-        if (ip->lo() != ip->hi())
-          return -1;
-        if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')
-          return -1;
-        // If we haven't seen any bytes yet, record it;
-        // otherwise must match the one we saw before.
-        if (b == -1)
-          b = ip->lo();
-        else if (b != ip->lo())
-          return -1;
-        break;
-
-      case kInstNop:
-      case kInstCapture:
-      case kInstEmptyWidth:
-        if (!ip->last())
-          q.insert(id+1);
-
-        // Continue on.
-        // Ignore ip->empty() flags for kInstEmptyWidth
-        // in order to be as conservative as possible
-        // (assume all possible empty-width flags are true).
-        if (ip->out())
-          q.insert(ip->out());
-        break;
-
-      case kInstAltMatch:
-        DCHECK(!ip->last());
-        q.insert(id+1);
-        break;
-
-      case kInstFail:
-        break;
-    }
-  }
-  return b;
-}
-
 bool
 Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
                Anchor anchor, MatchKind kind,
--- a/extern/re2/re2/onepass.cc
+++ b/extern/re2/re2/onepass.cc
@ -59,11 +59,11 @@

 #include "util/util.h"
 #include "util/logging.h"
-#include "util/pod_array.h"
-#include "util/sparse_set.h"
 #include "util/strutil.h"
 #include "util/utf.h"
+#include "re2/pod_array.h"
 #include "re2/prog.h"
+#include "re2/sparse_set.h"
 #include "re2/stringpiece.h"

 // Silence "zero-sized array in struct/union" warning for OneState::action.
@ -235,7 +235,7 @@ bool Prog::SearchOnePass(const StringPiece& text,
    matchcap[i] = NULL;

  StringPiece context = const_context;
-  if (context.begin() == NULL)
+  if (context.data() == NULL)
    context = text;
  if (anchor_start() && context.begin() != text.begin())
    return false;
@ -249,8 +249,8 @@ bool Prog::SearchOnePass(const StringPiece& text,
  // start() is always mapped to the zeroth OneState.
  OneState* state = IndexToNode(nodes, statesize, 0);
  uint8_t* bytemap = bytemap_;
-  const char* bp = text.begin();
-  const char* ep = text.end();
+  const char* bp = text.data();
+  const char* ep = text.data() + text.size();
  const char* p;
  bool matched = false;
  matchcap[0] = bp;
@ -550,7 +550,7 @@ bool Prog::IsOnePass() {
          if (!AddQ(&workq, ip->out())) {
            if (ExtraDebug)
              LOG(ERROR) << StringPrintf(
-                  "Not OnePass: multiple paths %d -> %d\n", *it, ip->out());
+                  "Not OnePass: multiple paths %d -> %d", *it, ip->out());
            goto fail;
          }
          id = ip->out();
@ -561,7 +561,7 @@ bool Prog::IsOnePass() {
            // (3) is violated
            if (ExtraDebug)
              LOG(ERROR) << StringPrintf(
-                  "Not OnePass: multiple matches from %d\n", *it);
+                  "Not OnePass: multiple matches from %d", *it);
            goto fail;
          }
          matched = true;
--- a/extern/re2/re2/parse.cc
+++ b/extern/re2/re2/parse.cc
@ -27,9 +27,9 @@

 #include "util/util.h"
 #include "util/logging.h"
-#include "util/pod_array.h"
 #include "util/strutil.h"
 #include "util/utf.h"
+#include "re2/pod_array.h"
 #include "re2/regexp.h"
 #include "re2/stringpiece.h"
 #include "re2/unicode_casefold.h"
@ -93,7 +93,7 @@ class Regexp::ParseState {
  bool PushSimpleOp(RegexpOp op);

  // Pushes a ^ onto the stack.
-  bool PushCarat();
+  bool PushCaret();

  // Pushes a \b (word == true) or \B (word == false) onto the stack.
  bool PushWordBoundary(bool word);
@ -423,7 +423,7 @@ bool Regexp::ParseState::PushLiteral(Rune r) {
 }

 // Pushes a ^ onto the stack.
-bool Regexp::ParseState::PushCarat() {
+bool Regexp::ParseState::PushCaret() {
  if (flags_ & OneLine) {
    return PushSimpleOp(kRegexpBeginText);
  }
@ -556,9 +556,10 @@ int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
 }

 int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
-  // This should never be called, since we use Walk and not
-  // WalkExponential.
+  // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
+#endif
  return 0;
 }

@ -684,7 +685,7 @@ bool Regexp::ParseState::DoRightParen() {
  if ((r1 = stacktop_) == NULL ||
      (r2 = r1->down_) == NULL ||
      r2->op() != kLeftParen) {
-    status_->set_code(kRegexpMissingParen);
+    status_->set_code(kRegexpUnexpectedParen);
    status_->set_error_arg(whole_regexp_);
    return false;
  }
@ -1323,14 +1324,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
 // Parses a decimal integer, storing it in *np.
 // Sets *s to span the remainder of the string.
 static bool ParseInteger(StringPiece* s, int* np) {
-  if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
+  if (s->empty() || !isdigit((*s)[0] & 0xFF))
    return false;
  // Disallow leading zeros.
  if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
    return false;
  int n = 0;
  int c;
-  while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
+  while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
    // Avoid overflow.
    if (n >= 100000000)
      return false;
@ -1352,16 +1353,16 @@ static bool ParseInteger(StringPiece* s, int* np) {
 // s must NOT be edited unless MaybeParseRepetition returns true.
 static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
  StringPiece s = *sp;
-  if (s.size() == 0 || s[0] != '{')
+  if (s.empty() || s[0] != '{')
    return false;
  s.remove_prefix(1);  // '{'
  if (!ParseInteger(&s, lo))
    return false;
-  if (s.size() == 0)
+  if (s.empty())
    return false;
  if (s[0] == ',') {
    s.remove_prefix(1);  // ','
-    if (s.size() == 0)
+    if (s.empty())
      return false;
    if (s[0] == '}') {
      // {2,} means at least 2
@ -1375,7 +1376,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
    // {2} means exactly two
    *hi = *lo;
  }
-  if (s.size() == 0 || s[0] != '}')
+  if (s.empty() || s[0] != '}')
    return false;
  s.remove_prefix(1);  // '}'
  *sp = s;
@ -1416,7 +1417,7 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
 static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
  StringPiece t = s;
  Rune r;
-  while (t.size() > 0) {
+  while (!t.empty()) {
    if (StringPieceToRune(&r, &t, status) < 0)
      return false;
  }
@ -1447,14 +1448,14 @@ static int UnHex(int c) {
 // Sets *rp to the named character.
 static bool ParseEscape(StringPiece* s, Rune* rp,
                        RegexpStatus* status, int rune_max) {
-  const char* begin = s->begin();
-  if (s->size() < 1 || (*s)[0] != '\\') {
+  const char* begin = s->data();
+  if (s->empty() || (*s)[0] != '\\') {
    // Should not happen - caller always checks.
    status->set_code(kRegexpInternalError);
    status->set_error_arg(StringPiece());
    return false;
  }
-  if (s->size() < 2) {
+  if (s->size() == 1) {
    status->set_code(kRegexpTrailingBackslash);
    status->set_error_arg(StringPiece());
    return false;
@ -1485,16 +1486,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
    case '6':
    case '7':
      // Single non-zero octal digit is a backreference; not supported.
-      if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7')
+      if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
        goto BadEscape;
      FALLTHROUGH_INTENDED;
    case '0':
      // consume up to three octal digits; already have one.
      code = c - '0';
-      if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') {
+      if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
        code = code * 8 + c - '0';
        s->remove_prefix(1);  // digit
-        if (s->size() > 0) {
+        if (!s->empty()) {
          c = (*s)[0];
          if ('0' <= c && c <= '7') {
            code = code * 8 + c - '0';
@ -1509,7 +1510,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,

    // Hexadecimal escapes
    case 'x':
-      if (s->size() == 0)
+      if (s->empty())
        goto BadEscape;
      if (StringPieceToRune(&c, s, status) < 0)
        return false;
@ -1529,7 +1530,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
          code = code * 16 + UnHex(c);
          if (code > rune_max)
            goto BadEscape;
-          if (s->size() == 0)
+          if (s->empty())
            goto BadEscape;
          if (StringPieceToRune(&c, s, status) < 0)
            return false;
@ -1540,7 +1541,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
        return true;
      }
      // Easy case: two hex digits.
-      if (s->size() == 0)
+      if (s->empty())
        goto BadEscape;
      if (StringPieceToRune(&c1, s, status) < 0)
        return false;
@ -1590,7 +1591,7 @@ BadEscape:
  // Unrecognized escape sequence.
  status->set_code(kRegexpBadEscape);
  status->set_error_arg(
-      StringPiece(begin, static_cast<size_t>(s->begin() - begin)));
+      StringPiece(begin, static_cast<size_t>(s->data() - begin)));
  return false;
 }

@ -1710,7 +1711,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl
    return NULL;
  // Could use StringPieceToRune, but there aren't
  // any non-ASCII Perl group names.
-  StringPiece name(s->begin(), 2);
+  StringPiece name(s->data(), 2);
  const UGroup *g = LookupPerlGroup(name);
  if (g == NULL)
    return NULL;
@ -1750,8 +1751,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
    return kParseError;
  if (c != '{') {
    // Name is the bit of string we just skipped over for c.
-    const char* p = seq.begin() + 2;
-    name = StringPiece(p, static_cast<size_t>(s->begin() - p));
+    const char* p = seq.data() + 2;
+    name = StringPiece(p, static_cast<size_t>(s->data() - p));
  } else {
    // Name is in braces. Look for closing }
    size_t end = s->find('}', 0);
@ -1762,16 +1763,16 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
      status->set_error_arg(seq);
      return kParseError;
    }
-    name = StringPiece(s->begin(), end);  // without '}'
+    name = StringPiece(s->data(), end);  // without '}'
    s->remove_prefix(end + 1);  // with '}'
    if (!IsValidUTF8(name, status))
      return kParseError;
  }

  // Chop seq where s now begins.
-  seq = StringPiece(seq.begin(), static_cast<size_t>(s->begin() - seq.begin()));
+  seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));

-  if (name.size() > 0 && name[0] == '^') {
+  if (!name.empty() && name[0] == '^') {
    sign = -sign;
    name.remove_prefix(1);  // '^'
  }
@ -1801,14 +1802,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,

  // Convert the UnicodeSet to a URange32 and UGroup that we can add.
  int nr = uset.getRangeCount();
-  URange32* r = new URange32[nr];
+  PODArray<URange32> r(nr);
  for (int i = 0; i < nr; i++) {
    r[i].lo = uset.getRangeStart(i);
    r[i].hi = uset.getRangeEnd(i);
  }
-  UGroup g = {"", +1, 0, 0, r, nr};
+  UGroup g = {"", +1, 0, 0, r.data(), nr};
  AddUGroup(cc, &g, sign, parse_flags);
-  delete[] r;
 #endif

  return kParseOk;
@ -1858,7 +1858,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
 bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
                                          const StringPiece& whole_class,
                                          RegexpStatus* status) {
-  if (s->size() == 0) {
+  if (s->empty()) {
    status->set_code(kRegexpMissingBracket);
    status->set_error_arg(whole_class);
    return false;
@ -1866,7 +1866,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,

  // Allow regular escape sequences even though
  // many need not be escaped in this context.
-  if (s->size() >= 1 && (*s)[0] == '\\')
+  if ((*s)[0] == '\\')
    return ParseEscape(s, rp, status, rune_max_);

  // Otherwise take the next rune.
@ -1908,7 +1908,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
                                        Regexp** out_re,
                                        RegexpStatus* status) {
  StringPiece whole_class = *s;
-  if (s->size() == 0 || (*s)[0] != '[') {
+  if (s->empty() || (*s)[0] != '[') {
    // Caller checked this.
    status->set_code(kRegexpInternalError);
    status->set_error_arg(StringPiece());
@ -1918,7 +1918,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
  Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
  re->ccb_ = new CharClassBuilder;
  s->remove_prefix(1);  // '['
-  if (s->size() > 0 && (*s)[0] == '^') {
+  if (!s->empty() && (*s)[0] == '^') {
    s->remove_prefix(1);  // '^'
    negated = true;
    if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
@ -1928,7 +1928,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
    }
  }
  bool first = true;  // ] is okay as first char in class
-  while (s->size() > 0 && ((*s)[0] != ']' || first)) {
+  while (!s->empty() && ((*s)[0] != ']' || first)) {
    // - is only okay unescaped as first or last in class.
    // Except that Perl allows - anywhere.
    if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
@ -1996,7 +1996,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
    // in the flags.
    re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
  }
-  if (s->size() == 0) {
+  if (s->empty()) {
    status->set_code(kRegexpMissingBracket);
    status->set_error_arg(whole_class);
    re->Decref();
@ -2016,7 +2016,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
 // Python rejects names starting with digits.
 // We don't enforce either of those.
 static bool IsValidCaptureName(const StringPiece& name) {
-  if (name.size() == 0)
+  if (name.empty())
    return false;
  for (size_t i = 0; i < name.size(); i++) {
    int c = name[i];
@ -2074,8 +2074,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
    }

    // t is "P<name>...", t[end] == '>'
-    StringPiece capture(t.begin()-2, end+3);  // "(?P<name>"
-    StringPiece name(t.begin()+2, end-2);     // "name"
+    StringPiece capture(t.data()-2, end+3);  // "(?P<name>"
+    StringPiece name(t.data()+2, end-2);     // "name"
    if (!IsValidUTF8(name, status_))
      return false;
    if (!IsValidCaptureName(name)) {
@ -2089,7 +2089,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
      return false;
    }

-    s->remove_prefix(static_cast<size_t>(capture.end() - s->begin()));
+    s->remove_prefix(
+        static_cast<size_t>(capture.data() + capture.size() - s->data()));
    return true;
  }

@ -2098,7 +2099,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
  int nflags = flags_;
  Rune c;
  for (bool done = false; !done; ) {
-    if (t.size() == 0)
+    if (t.empty())
      goto BadPerlOp;
    if (StringPieceToRune(&c, &t, status_) < 0)
      return false;
@ -2173,7 +2174,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
 BadPerlOp:
  status_->set_code(kRegexpBadPerlOp);
  status_->set_error_arg(
-      StringPiece(s->begin(), static_cast<size_t>(t.begin() - s->begin())));
+      StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
  return false;
 }

@ -2216,7 +2217,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,

  if (global_flags & Literal) {
    // Special parse loop for literal string.
-    while (t.size() > 0) {
+    while (!t.empty()) {
      Rune r;
      if (StringPieceToRune(&r, &t, status) < 0)
        return NULL;
@ -2227,7 +2228,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
  }

  StringPiece lastunary = StringPiece();
-  while (t.size() > 0) {
+  while (!t.empty()) {
    StringPiece isunary = StringPiece();
    switch (t[0]) {
      default: {
@ -2270,7 +2271,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
        break;

      case '^':  // Beginning of line.
-        if (!ps.PushCarat())
+        if (!ps.PushCaret())
          return NULL;
        t.remove_prefix(1);  // '^'
        break;
@ -2311,18 +2312,18 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
        bool nongreedy = false;
        t.remove_prefix(1);  // '*' or '+' or '?'
        if (ps.flags() & PerlX) {
-          if (t.size() > 0 && t[0] == '?') {
+          if (!t.empty() && t[0] == '?') {
            nongreedy = true;
            t.remove_prefix(1);  // '?'
          }
-          if (lastunary.size() > 0) {
+          if (!lastunary.empty()) {
            // In Perl it is not allowed to stack repetition operators:
            //   a** is a syntax error, not a double-star.
            // (and a++ means something else entirely, which we don't support!)
            status->set_code(kRegexpRepeatOp);
            status->set_error_arg(StringPiece(
-                lastunary.begin(),
-                static_cast<size_t>(t.begin() - lastunary.begin())));
+                lastunary.data(),
+                static_cast<size_t>(t.data() - lastunary.data())));
            return NULL;
          }
        }
@ -2346,16 +2347,16 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
        }
        bool nongreedy = false;
        if (ps.flags() & PerlX) {
-          if (t.size() > 0 && t[0] == '?') {
+          if (!t.empty() && t[0] == '?') {
            nongreedy = true;
            t.remove_prefix(1);  // '?'
          }
-          if (lastunary.size() > 0) {
+          if (!lastunary.empty()) {
            // Not allowed to stack repetition operators.
            status->set_code(kRegexpRepeatOp);
            status->set_error_arg(StringPiece(
-                lastunary.begin(),
-                static_cast<size_t>(t.begin() - lastunary.begin())));
+                lastunary.data(),
+                static_cast<size_t>(t.data() - lastunary.data())));
            return NULL;
          }
        }
@ -2404,7 +2405,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,

          if (t[1] == 'Q') {  // \Q ... \E: the ... is always literals
            t.remove_prefix(2);  // '\\', 'Q'
-            while (t.size() > 0) {
+            while (!t.empty()) {
              if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
                t.remove_prefix(2);  // '\\', 'E'
                break;
--- a/extern/re2/re2/perl_groups.cc
+++ b/extern/re2/re2/perl_groups.cc
@ -20,12 +20,12 @@ static const URange16 code3[] = {  /* \w */
 	{ 0x61, 0x7a },
 };
 const UGroup perl_groups[] = {
-	{ "\\d", +1, code1, 1 },
-	{ "\\D", -1, code1, 1 },
-	{ "\\s", +1, code2, 3 },
-	{ "\\S", -1, code2, 3 },
-	{ "\\w", +1, code3, 4 },
-	{ "\\W", -1, code3, 4 },
+	{ "\\d", +1, code1, 1, 0, 0 },
+	{ "\\D", -1, code1, 1, 0, 0 },
+	{ "\\s", +1, code2, 3, 0, 0 },
+	{ "\\S", -1, code2, 3, 0, 0 },
+	{ "\\w", +1, code3, 4, 0, 0 },
+	{ "\\W", -1, code3, 4, 0, 0 },
 };
 const int num_perl_groups = 6;
 static const URange16 code4[] = {  /* [:alnum:] */
@ -85,34 +85,34 @@ static const URange16 code17[] = {  /* [:xdigit:] */
 	{ 0x61, 0x66 },
 };
 const UGroup posix_groups[] = {
-	{ "[:alnum:]", +1, code4, 3 },
-	{ "[:^alnum:]", -1, code4, 3 },
-	{ "[:alpha:]", +1, code5, 2 },
-	{ "[:^alpha:]", -1, code5, 2 },
-	{ "[:ascii:]", +1, code6, 1 },
-	{ "[:^ascii:]", -1, code6, 1 },
-	{ "[:blank:]", +1, code7, 2 },
-	{ "[:^blank:]", -1, code7, 2 },
-	{ "[:cntrl:]", +1, code8, 2 },
-	{ "[:^cntrl:]", -1, code8, 2 },
-	{ "[:digit:]", +1, code9, 1 },
-	{ "[:^digit:]", -1, code9, 1 },
-	{ "[:graph:]", +1, code10, 1 },
-	{ "[:^graph:]", -1, code10, 1 },
-	{ "[:lower:]", +1, code11, 1 },
-	{ "[:^lower:]", -1, code11, 1 },
-	{ "[:print:]", +1, code12, 1 },
-	{ "[:^print:]", -1, code12, 1 },
-	{ "[:punct:]", +1, code13, 4 },
-	{ "[:^punct:]", -1, code13, 4 },
-	{ "[:space:]", +1, code14, 2 },
-	{ "[:^space:]", -1, code14, 2 },
-	{ "[:upper:]", +1, code15, 1 },
-	{ "[:^upper:]", -1, code15, 1 },
-	{ "[:word:]", +1, code16, 4 },
-	{ "[:^word:]", -1, code16, 4 },
-	{ "[:xdigit:]", +1, code17, 3 },
-	{ "[:^xdigit:]", -1, code17, 3 },
+	{ "[:alnum:]", +1, code4, 3, 0, 0 },
+	{ "[:^alnum:]", -1, code4, 3, 0, 0 },
+	{ "[:alpha:]", +1, code5, 2, 0, 0 },
+	{ "[:^alpha:]", -1, code5, 2, 0, 0 },
+	{ "[:ascii:]", +1, code6, 1, 0, 0 },
+	{ "[:^ascii:]", -1, code6, 1, 0, 0 },
+	{ "[:blank:]", +1, code7, 2, 0, 0 },
+	{ "[:^blank:]", -1, code7, 2, 0, 0 },
+	{ "[:cntrl:]", +1, code8, 2, 0, 0 },
+	{ "[:^cntrl:]", -1, code8, 2, 0, 0 },
+	{ "[:digit:]", +1, code9, 1, 0, 0 },
+	{ "[:^digit:]", -1, code9, 1, 0, 0 },
+	{ "[:graph:]", +1, code10, 1, 0, 0 },
+	{ "[:^graph:]", -1, code10, 1, 0, 0 },
+	{ "[:lower:]", +1, code11, 1, 0, 0 },
+	{ "[:^lower:]", -1, code11, 1, 0, 0 },
+	{ "[:print:]", +1, code12, 1, 0, 0 },
+	{ "[:^print:]", -1, code12, 1, 0, 0 },
+	{ "[:punct:]", +1, code13, 4, 0, 0 },
+	{ "[:^punct:]", -1, code13, 4, 0, 0 },
+	{ "[:space:]", +1, code14, 2, 0, 0 },
+	{ "[:^space:]", -1, code14, 2, 0, 0 },
+	{ "[:upper:]", +1, code15, 1, 0, 0 },
+	{ "[:^upper:]", -1, code15, 1, 0, 0 },
+	{ "[:word:]", +1, code16, 4, 0, 0 },
+	{ "[:^word:]", -1, code16, 4, 0, 0 },
+	{ "[:xdigit:]", +1, code17, 3, 0, 0 },
+	{ "[:^xdigit:]", -1, code17, 3, 0, 0 },
 };
 const int num_posix_groups = 28;

--- a/extern/re2/util/pod_array.h
+++ b/extern/re2/util/pod_array.h
@ -2,8 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#ifndef UTIL_POD_ARRAY_H_
-#define UTIL_POD_ARRAY_H_
+#ifndef RE2_POD_ARRAY_H_
+#define RE2_POD_ARRAY_H_

 #include <memory>
 #include <type_traits>
@ -13,7 +13,7 @@ namespace re2 {
 template <typename T>
 class PODArray {
 public:
-  static_assert(std::is_pod<T>::value,
+  static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
                "T must be POD");

  PODArray()
@ -52,4 +52,4 @@ class PODArray {

 }  // namespace re2

-#endif  // UTIL_POD_ARRAY_H_
+#endif  // RE2_POD_ARRAY_H_
--- a/extern/re2/re2/prefilter.cc
+++ b/extern/re2/re2/prefilter.cc
@ -648,14 +648,15 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
    return NULL;

  Regexp* simple = re->Simplify();
-  Prefilter::Info *info = BuildInfo(simple);
+  if (simple == NULL)
+    return NULL;

+  Prefilter::Info* info = BuildInfo(simple);
  simple->Decref();
  if (info == NULL)
    return NULL;

  Prefilter* m = info->TakeMatch();
-
  delete info;
  return m;
 }
--- a/extern/re2/re2/prefilter_tree.cc
+++ b/extern/re2/re2/prefilter_tree.cc
@ -107,7 +107,7 @@ void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {

 Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
  std::string node_string = NodeString(node);
-  std::map<std::string, Prefilter*>::iterator iter = nodes->find(node_string);
+  NodeMap::iterator iter = nodes->find(node_string);
  if (iter == nodes->end())
    return NULL;
  return (*iter).second;
@ -377,7 +377,7 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
      LOG(ERROR) << it->first;
  }
  LOG(ERROR) << "Map:";
-  for (std::map<std::string, Prefilter*>::const_iterator iter = nodes->begin();
+  for (NodeMap::const_iterator iter = nodes->begin();
       iter != nodes->end(); ++iter)
    LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
               << " Str: " << (*iter).first;
--- a/extern/re2/re2/prefilter_tree.h
+++ b/extern/re2/re2/prefilter_tree.h
@ -21,8 +21,8 @@
 #include <vector>

 #include "util/util.h"
-#include "util/sparse_array.h"
 #include "re2/prefilter.h"
+#include "re2/sparse_array.h"

 namespace re2 {

--- a/extern/re2/re2/prog.cc
+++ b/extern/re2/re2/prog.cc
@ -7,6 +7,12 @@

 #include "re2/prog.h"

+#if defined(__AVX2__)
+#include <immintrin.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#endif
 #include <stdint.h>
 #include <string.h>
 #include <algorithm>
@ -109,8 +115,9 @@ Prog::Prog()
    start_unanchored_(0),
    size_(0),
    bytemap_range_(0),
-    first_byte_(-1),
-    flags_(0),
+    prefix_size_(0),
+    prefix_front_(-1),
+    prefix_back_(-1),
    list_count_(0),
    dfa_mem_(0),
    dfa_first_(NULL),
@ -185,14 +192,31 @@ std::string Prog::DumpByteMap() {
  return map;
 }

-int Prog::first_byte() {
-  std::call_once(first_byte_once_, [](Prog* prog) {
-    prog->first_byte_ = prog->ComputeFirstByte();
-  }, this);
-  return first_byte_;
-}
+// Is ip a guaranteed match at end of text, perhaps after some capturing?
+static bool IsMatch(Prog* prog, Prog::Inst* ip) {
+  for (;;) {
+    switch (ip->opcode()) {
+      default:
+        LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
+        return false;

-static bool IsMatch(Prog*, Prog::Inst*);
+      case kInstAlt:
+      case kInstAltMatch:
+      case kInstByteRange:
+      case kInstFail:
+      case kInstEmptyWidth:
+        return false;
+
+      case kInstCapture:
+      case kInstNop:
+        ip = prog->inst(ip->out());
+        break;
+
+      case kInstMatch:
+        return true;
+    }
+  }
+}

 // Peep-hole optimizer.
 void Prog::Optimize() {
@ -258,54 +282,28 @@ void Prog::Optimize() {
  }
 }

-// Is ip a guaranteed match at end of text, perhaps after some capturing?
-static bool IsMatch(Prog* prog, Prog::Inst* ip) {
-  for (;;) {
-    switch (ip->opcode()) {
-      default:
-        LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
-        return false;
-
-      case kInstAlt:
-      case kInstAltMatch:
-      case kInstByteRange:
-      case kInstFail:
-      case kInstEmptyWidth:
-        return false;
-
-      case kInstCapture:
-      case kInstNop:
-        ip = prog->inst(ip->out());
-        break;
-
-      case kInstMatch:
-        return true;
-    }
-  }
-}
-
 uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
  int flags = 0;

  // ^ and \A
-  if (p == text.begin())
+  if (p == text.data())
    flags |= kEmptyBeginText | kEmptyBeginLine;
  else if (p[-1] == '\n')
    flags |= kEmptyBeginLine;

  // $ and \z
-  if (p == text.end())
+  if (p == text.data() + text.size())
    flags |= kEmptyEndText | kEmptyEndLine;
-  else if (p < text.end() && p[0] == '\n')
+  else if (p < text.data() + text.size() && p[0] == '\n')
    flags |= kEmptyEndLine;

  // \b and \B
-  if (p == text.begin() && p == text.end()) {
+  if (p == text.data() && p == text.data() + text.size()) {
    // no word boundary here
-  } else if (p == text.begin()) {
+  } else if (p == text.data()) {
    if (IsWordChar(p[0]))
      flags |= kEmptyWordBoundary;
-  } else if (p == text.end()) {
+  } else if (p == text.data() + text.size()) {
    if (IsWordChar(p[-1]))
      flags |= kEmptyWordBoundary;
  } else {
@ -918,4 +916,73 @@ void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
  }
 }

+#if defined(__AVX2__)
+// Finds the least significant non-zero bit in n.
+static int FindLSBSet(uint32_t n) {
+  DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+  return __builtin_ctz(n);
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+  unsigned long c;
+  _BitScanForward(&c, n);
+  return static_cast<int>(c);
+#else
+  int c = 31;
+  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+    uint32_t word = n << shift;
+    if (word != 0) {
+      n = word;
+      c -= shift;
+    }
+  }
+  return c;
+#endif
+}
+#endif
+
+const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
+  DCHECK_GE(prefix_size_, 2);
+  if (size < prefix_size_)
+    return NULL;
+  // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
+  // This also means that probing for prefix_back_ doesn't go out of bounds.
+  size -= prefix_size_-1;
+
+#if defined(__AVX2__)
+  // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
+  if (size >= sizeof(__m256i)) {
+    const __m256i* fp = reinterpret_cast<const __m256i*>(
+        reinterpret_cast<const char*>(data));
+    const __m256i* bp = reinterpret_cast<const __m256i*>(
+        reinterpret_cast<const char*>(data) + prefix_size_-1);
+    const __m256i* endfp = fp + size/sizeof(__m256i);
+    const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
+    const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
+    while (fp != endfp) {
+      const __m256i f_loadu = _mm256_loadu_si256(fp++);
+      const __m256i b_loadu = _mm256_loadu_si256(bp++);
+      const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
+      const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
+      const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
+      if (fb_testz == 0) {  // ZF: 1 means zero, 0 means non-zero.
+        const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
+        const int fb_movemask = _mm256_movemask_epi8(fb_and);
+        const int fb_ctz = FindLSBSet(fb_movemask);
+        return reinterpret_cast<const char*>(fp-1) + fb_ctz;
+      }
+    }
+    data = fp;
+    size = size%sizeof(__m256i);
+  }
+#endif
+
+  const char* p0 = reinterpret_cast<const char*>(data);
+  for (const char* p = p0;; p++) {
+    DCHECK_GE(size, static_cast<size_t>(p-p0));
+    p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
+    if (p == NULL || p[prefix_size_-1] == prefix_back_)
+      return p;
+  }
+}
+
 }  // namespace re2
--- a/extern/re2/re2/prog.h
+++ b/extern/re2/re2/prog.h
@ -18,10 +18,10 @@

 #include "util/util.h"
 #include "util/logging.h"
-#include "util/pod_array.h"
-#include "util/sparse_array.h"
-#include "util/sparse_set.h"
+#include "re2/pod_array.h"
 #include "re2/re2.h"
+#include "re2/sparse_array.h"
+#include "re2/sparse_set.h"

 namespace re2 {

@ -198,8 +198,8 @@ class Prog {

  Inst *inst(int id) { return &inst_[id]; }
  int start() { return start_; }
-  int start_unanchored() { return start_unanchored_; }
  void set_start(int start) { start_ = start; }
+  int start_unanchored() { return start_unanchored_; }
  void set_start_unanchored(int start) { start_unanchored_ = start; }
  int size() { return size_; }
  bool reversed() { return reversed_; }
@ -207,19 +207,27 @@ class Prog {
  int list_count() { return list_count_; }
  int inst_count(InstOp op) { return inst_count_[op]; }
  uint16_t* list_heads() { return list_heads_.data(); }
-  void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
  int64_t dfa_mem() { return dfa_mem_; }
-  int flags() { return flags_; }
-  void set_flags(int flags) { flags_ = flags; }
+  void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
  bool anchor_start() { return anchor_start_; }
  void set_anchor_start(bool b) { anchor_start_ = b; }
  bool anchor_end() { return anchor_end_; }
  void set_anchor_end(bool b) { anchor_end_ = b; }
  int bytemap_range() { return bytemap_range_; }
  const uint8_t* bytemap() { return bytemap_; }
+  bool can_prefix_accel() { return prefix_size_ != 0; }

-  // Lazily computed.
-  int first_byte();
+  // Accelerates to the first likely occurrence of the prefix.
+  // Returns a pointer to the first byte or NULL if not found.
+  const void* PrefixAccel(const void* data, size_t size) {
+    DCHECK_GE(prefix_size_, 1);
+    return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
+                             : PrefixAccel_FrontAndBack(data, size);
+  }
+
+  // An implementation of prefix accel that looks for prefix_front_ and
+  // prefix_back_ to return fewer false positives than memchr(3) alone.
+  const void* PrefixAccel_FrontAndBack(const void* data, size_t size);

  // Returns string representation of program for debugging.
  std::string Dump();
@ -297,10 +305,6 @@ class Prog {
  // Compute bytemap.
  void ComputeByteMap();

-  // Computes whether all matches must begin with the same first
-  // byte, and if so, returns that byte.  If not, returns -1.
-  int ComputeFirstByte();
-
  // Run peep-hole optimizer on program.
  void Optimize();

@ -402,8 +406,9 @@ class Prog {
  int start_unanchored_;    // unanchored entry point for program
  int size_;                // number of instructions
  int bytemap_range_;       // bytemap_[x] < bytemap_range_
-  int first_byte_;          // required first byte for match, or -1 if none
-  int flags_;               // regexp parse flags
+  size_t prefix_size_;      // size of prefix (0 if no prefix)
+  int prefix_front_;        // first byte of prefix (-1 if no prefix)
+  int prefix_back_;         // last byte of prefix (-1 if no prefix)

  int list_count_;                 // count of lists (see above)
  int inst_count_[kNumInst];       // count of instructions by opcode
@ -419,7 +424,6 @@ class Prog {

  uint8_t bytemap_[256];    // map from input bytes to byte classes

-  std::once_flag first_byte_once_;
  std::once_flag dfa_first_once_;
  std::once_flag dfa_longest_once_;

--- a/extern/re2/re2/re2.cc
+++ b/extern/re2/re2/re2.cc
@ -12,10 +12,14 @@
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <algorithm>
+#include <atomic>
 #include <iterator>
 #include <mutex>
 #include <string>
@ -24,11 +28,11 @@

 #include "util/util.h"
 #include "util/logging.h"
-#include "util/sparse_array.h"
 #include "util/strutil.h"
 #include "util/utf.h"
 #include "re2/prog.h"
 #include "re2/regexp.h"
+#include "re2/sparse_array.h"

 namespace re2 {

@ -79,6 +83,8 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
      return RE2::ErrorMissingBracket;
    case re2::kRegexpMissingParen:
      return RE2::ErrorMissingParen;
+    case re2::kRegexpUnexpectedParen:
+      return RE2::ErrorUnexpectedParen;
    case re2::kRegexpTrailingBackslash:
      return RE2::ErrorTrailingBackslash;
    case re2::kRegexpRepeatArgument:
@ -172,15 +178,20 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
    empty_group_names = new std::map<int, std::string>;
  });

-  pattern_ = std::string(pattern);
+  pattern_.assign(pattern.data(), pattern.size());
  options_.Copy(options);
  entire_regexp_ = NULL;
+  error_ = empty_string;
+  error_code_ = NoError;
+  error_arg_.clear();
+  prefix_.clear();
+  prefix_foldcase_ = false;
  suffix_regexp_ = NULL;
  prog_ = NULL;
  num_captures_ = -1;
+  is_one_pass_ = false;
+
  rprog_ = NULL;
-  error_ = empty_string;
-  error_code_ = NoError;
  named_groups_ = NULL;
  group_names_ = NULL;

@ -239,9 +250,11 @@ re2::Prog* RE2::ReverseProg() const {
    if (re->rprog_ == NULL) {
      if (re->options_.log_errors())
        LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
-      re->error_ =
-          new std::string("pattern too large - reverse compile failed");
-      re->error_code_ = RE2::ErrorPatternTooLarge;
+      // We no longer touch error_ and error_code_ because failing to compile
+      // the reverse Prog is not a showstopper: falling back to NFA execution
+      // is fine. More importantly, an RE2 object is supposed to be logically
+      // immutable: whatever ok() would have returned after Init() completed,
+      // it should continue to return that no matter what ReverseProg() does.
    }
  }, this);
  return rprog_;
@ -277,28 +290,54 @@ int RE2::ReverseProgramSize() const {
  return prog->size();
 }

-static int Fanout(Prog* prog, std::map<int, int>* histogram) {
-  SparseArray<int> fanout(prog->size());
-  prog->Fanout(&fanout);
-  histogram->clear();
-  for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
-    // TODO(junyer): Optimise this?
-    int bucket = 0;
-    while (1 << bucket < i->value()) {
-      bucket++;
+// Finds the most significant non-zero bit in n.
+static int FindMSBSet(uint32_t n) {
+  DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+  return 31 ^ __builtin_clz(n);
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+  unsigned long c;
+  _BitScanReverse(&c, n);
+  return static_cast<int>(c);
+#else
+  int c = 0;
+  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+    uint32_t word = n >> shift;
+    if (word != 0) {
+      n = word;
+      c += shift;
    }
-    (*histogram)[bucket]++;
  }
-  return histogram->rbegin()->first;
+  return c;
+#endif
 }

-int RE2::ProgramFanout(std::map<int, int>* histogram) const {
+static int Fanout(Prog* prog, std::vector<int>* histogram) {
+  SparseArray<int> fanout(prog->size());
+  prog->Fanout(&fanout);
+  int data[32] = {};
+  int size = 0;
+  for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
+    if (i->value() == 0)
+      continue;
+    uint32_t value = i->value();
+    int bucket = FindMSBSet(value);
+    bucket += value & (value-1) ? 1 : 0;
+    ++data[bucket];
+    size = std::max(size, bucket+1);
+  }
+  if (histogram != NULL)
+    histogram->assign(data, data+size);
+  return size-1;
+}
+
+int RE2::ProgramFanout(std::vector<int>* histogram) const {
  if (prog_ == NULL)
    return -1;
  return Fanout(prog_, histogram);
 }

-int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
+int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
  if (prog_ == NULL)
    return -1;
  Prog* prog = ReverseProg();
@ -368,6 +407,8 @@ bool RE2::Replace(std::string* str,
                  const StringPiece& rewrite) {
  StringPiece vec[kVecSize];
  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
  if (nvec > static_cast<int>(arraysize(vec)))
    return false;
  if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
@ -377,8 +418,8 @@ bool RE2::Replace(std::string* str,
  if (!re.Rewrite(&s, rewrite, vec, nvec))
    return false;

-  assert(vec[0].begin() >= str->data());
-  assert(vec[0].end() <= str->data()+str->size());
+  assert(vec[0].data() >= str->data());
+  assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
  str->replace(vec[0].data() - str->data(), vec[0].size(), s);
  return true;
 }
@ -388,6 +429,8 @@ int RE2::GlobalReplace(std::string* str,
                       const StringPiece& rewrite) {
  StringPiece vec[kVecSize];
  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
  if (nvec > static_cast<int>(arraysize(vec)))
    return false;

@ -406,9 +449,9 @@ int RE2::GlobalReplace(std::string* str,
    if (!re.Match(*str, static_cast<size_t>(p - str->data()),
                  str->size(), UNANCHORED, vec, nvec))
      break;
-    if (p < vec[0].begin())
-      out.append(p, vec[0].begin() - p);
-    if (vec[0].begin() == lastend && vec[0].size() == 0) {
+    if (p < vec[0].data())
+      out.append(p, vec[0].data() - p);
+    if (vec[0].data() == lastend && vec[0].empty()) {
      // Disallow empty match at end of last match: skip ahead.
      //
      // fullrune() takes int, not ptrdiff_t. However, it just looks
@ -439,7 +482,7 @@ int RE2::GlobalReplace(std::string* str,
      continue;
    }
    re.Rewrite(&out, rewrite, vec, nvec);
-    p = vec[0].end();
+    p = vec[0].data() + vec[0].size();
    lastend = p;
    count++;
  }
@ -460,9 +503,10 @@ bool RE2::Extract(const StringPiece& text,
                  std::string* out) {
  StringPiece vec[kVecSize];
  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
  if (nvec > static_cast<int>(arraysize(vec)))
    return false;
-
  if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
    return false;

@ -610,6 +654,8 @@ bool RE2::Match(const StringPiece& text,
  // If the regexp is anchored explicitly, must not be in middle of text.
  if (prog_->anchor_start() && startpos != 0)
    return false;
+  if (prog_->anchor_end() && endpos != text.size())
+    return false;

  // If the regexp is anchored explicitly, update re_anchor
  // so that we can potentially fall into a faster case below.
@ -643,7 +689,6 @@ bool RE2::Match(const StringPiece& text,
  Prog::MatchKind kind = Prog::kFirstMatch;
  if (options_.longest_match())
    kind = Prog::kLongestMatch;
-  bool skipped_test = false;

  bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);

@ -655,38 +700,82 @@ bool RE2::Match(const StringPiece& text,
  bool can_bit_state = prog_->CanBitState();
  size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();

+#ifdef RE2_HAVE_THREAD_LOCAL
+  hooks::context = this;
+#endif
  bool dfa_failed = false;
+  bool skipped_test = false;
  switch (re_anchor) {
    default:
+      LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
+      return false;
+
    case UNANCHORED: {
+      if (prog_->anchor_end()) {
+        // This is a very special case: we don't need the forward DFA because
+        // we already know where the match must end! Instead, the reverse DFA
+        // can say whether there is a match and (optionally) where it starts.
+        Prog* prog = ReverseProg();
+        if (prog == NULL) {
+          // Fall back to NFA below.
+          skipped_test = true;
+          break;
+        }
+        if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
+                             Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
+          if (dfa_failed) {
+            if (options_.log_errors())
+              LOG(ERROR) << "DFA out of memory: "
+                         << "pattern length " << pattern_.size() << ", "
+                         << "program size " << prog->size() << ", "
+                         << "list count " << prog->list_count() << ", "
+                         << "bytemap range " << prog->bytemap_range();
+            // Fall back to NFA below.
+            skipped_test = true;
+            break;
+          }
+          return false;
+        }
+        if (matchp == NULL)  // Matched.  Don't care where.
+          return true;
+        break;
+      }
+
      if (!prog_->SearchDFA(subtext, text, anchor, kind,
                            matchp, &dfa_failed, NULL)) {
        if (dfa_failed) {
          if (options_.log_errors())
-            LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
-                       << "bytemap range " << prog_->bytemap_range() << ", "
-                       << "list count " << prog_->list_count();
+            LOG(ERROR) << "DFA out of memory: "
+                       << "pattern length " << pattern_.size() << ", "
+                       << "program size " << prog_->size() << ", "
+                       << "list count " << prog_->list_count() << ", "
+                       << "bytemap range " << prog_->bytemap_range();
          // Fall back to NFA below.
          skipped_test = true;
          break;
        }
        return false;
      }
-      if (matchp == NULL)  // Matched.  Don't care where
+      if (matchp == NULL)  // Matched.  Don't care where.
        return true;
-      // SearchDFA set match[0].end() but didn't know where the
-      // match started.  Run the regexp backward from match[0].end()
+      // SearchDFA set match.end() but didn't know where the
+      // match started.  Run the regexp backward from match.end()
      // to find the longest possible match -- that's where it started.
      Prog* prog = ReverseProg();
-      if (prog == NULL)
-        return false;
+      if (prog == NULL) {
+        // Fall back to NFA below.
+        skipped_test = true;
+        break;
+      }
      if (!prog->SearchDFA(match, text, Prog::kAnchored,
                           Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
        if (dfa_failed) {
          if (options_.log_errors())
-            LOG(ERROR) << "DFA out of memory: size " << prog->size() << ", "
-                       << "bytemap range " << prog->bytemap_range() << ", "
-                       << "list count " << prog->list_count();
+            LOG(ERROR) << "DFA out of memory: "
+                       << "pattern length " << pattern_.size() << ", "
+                       << "program size " << prog->size() << ", "
+                       << "list count " << prog->list_count() << ", "
+                       << "bytemap range " << prog->bytemap_range();
          // Fall back to NFA below.
          skipped_test = true;
          break;
@ -724,9 +813,11 @@ bool RE2::Match(const StringPiece& text,
                            &match, &dfa_failed, NULL)) {
        if (dfa_failed) {
          if (options_.log_errors())
-            LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
-                       << "bytemap range " << prog_->bytemap_range() << ", "
-                       << "list count " << prog_->list_count();
+            LOG(ERROR) << "DFA out of memory: "
+                       << "pattern length " << pattern_.size() << ", "
+                       << "program size " << prog_->size() << ", "
+                       << "list count " << prog_->list_count() << ", "
+                       << "bytemap range " << prog_->bytemap_range();
          // Fall back to NFA below.
          skipped_test = true;
          break;
@ -928,13 +1019,13 @@ bool RE2::Rewrite(std::string* out,
      int n = (c - '0');
      if (n >= veclen) {
        if (options_.log_errors()) {
-          LOG(ERROR) << "requested group " << n
-                     << " in regexp " << rewrite.data();
+          LOG(ERROR) << "invalid substitution \\" << n
+                     << " from " << veclen << " groups";
        }
        return false;
      }
      StringPiece snip = vec[n];
-      if (snip.size() > 0)
+      if (!snip.empty())
        out->append(snip.data(), snip.size());
    } else if (c == '\\') {
      out->push_back('\\');
@ -949,41 +1040,49 @@ bool RE2::Rewrite(std::string* out,

 /***** Parsers for various types *****/

-bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
+namespace re2_internal {
+
+template <>
+bool Parse(const char* str, size_t n, void* dest) {
  // We fail if somebody asked us to store into a non-NULL void* pointer
  return (dest == NULL);
 }

-bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
+template <>
+bool Parse(const char* str, size_t n, std::string* dest) {
  if (dest == NULL) return true;
-  reinterpret_cast<std::string*>(dest)->assign(str, n);
+  dest->assign(str, n);
  return true;
 }

-bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
+template <>
+bool Parse(const char* str, size_t n, StringPiece* dest) {
  if (dest == NULL) return true;
-  *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
+  *dest = StringPiece(str, n);
  return true;
 }

-bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) {
+template <>
+bool Parse(const char* str, size_t n, char* dest) {
  if (n != 1) return false;
  if (dest == NULL) return true;
-  *(reinterpret_cast<char*>(dest)) = str[0];
+  *dest = str[0];
  return true;
 }

-bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) {
+template <>
+bool Parse(const char* str, size_t n, signed char* dest) {
  if (n != 1) return false;
  if (dest == NULL) return true;
-  *(reinterpret_cast<signed char*>(dest)) = str[0];
+  *dest = str[0];
  return true;
 }

-bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) {
+template <>
+bool Parse(const char* str, size_t n, unsigned char* dest) {
  if (n != 1) return false;
  if (dest == NULL) return true;
-  *(reinterpret_cast<unsigned char*>(dest)) = str[0];
+  *dest = str[0];
  return true;
 }

@ -1047,10 +1146,40 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
  return buf;
 }

-bool RE2::Arg::parse_long_radix(const char* str,
-                                size_t n,
-                                void* dest,
-                                int radix) {
+template <>
+bool Parse(const char* str, size_t n, float* dest) {
+  if (n == 0) return false;
+  static const int kMaxLength = 200;
+  char buf[kMaxLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, true);
+  char* end;
+  errno = 0;
+  float r = strtof(str, &end);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, double* dest) {
+  if (n == 0) return false;
+  static const int kMaxLength = 200;
+  char buf[kMaxLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, true);
+  char* end;
+  errno = 0;
+  double r = strtod(str, &end);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, long* dest, int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
@ -1060,14 +1189,12 @@ bool RE2::Arg::parse_long_radix(const char* str,
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
-  *(reinterpret_cast<long*>(dest)) = r;
+  *dest = r;
  return true;
 }

-bool RE2::Arg::parse_ulong_radix(const char* str,
-                                 size_t n,
-                                 void* dest,
-                                 int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
@ -1083,62 +1210,52 @@ bool RE2::Arg::parse_ulong_radix(const char* str,
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
-  *(reinterpret_cast<unsigned long*>(dest)) = r;
+  *dest = r;
  return true;
 }

-bool RE2::Arg::parse_short_radix(const char* str,
-                                 size_t n,
-                                 void* dest,
-                                 int radix) {
+template <>
+bool Parse(const char* str, size_t n, short* dest, int radix) {
  long r;
-  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
-  if ((short)r != r) return false;                         // Out of range
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((short)r != r) return false;              // Out of range
  if (dest == NULL) return true;
-  *(reinterpret_cast<short*>(dest)) = (short)r;
+  *dest = (short)r;
  return true;
 }

-bool RE2::Arg::parse_ushort_radix(const char* str,
-                                  size_t n,
-                                  void* dest,
-                                  int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
  unsigned long r;
-  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
-  if ((unsigned short)r != r) return false;                 // Out of range
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned short)r != r) return false;     // Out of range
  if (dest == NULL) return true;
-  *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
+  *dest = (unsigned short)r;
  return true;
 }

-bool RE2::Arg::parse_int_radix(const char* str,
-                               size_t n,
-                               void* dest,
-                               int radix) {
+template <>
+bool Parse(const char* str, size_t n, int* dest, int radix) {
  long r;
-  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
-  if ((int)r != r) return false;                           // Out of range
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((int)r != r) return false;                // Out of range
  if (dest == NULL) return true;
-  *(reinterpret_cast<int*>(dest)) = (int)r;
+  *dest = (int)r;
  return true;
 }

-bool RE2::Arg::parse_uint_radix(const char* str,
-                                size_t n,
-                                void* dest,
-                                int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
  unsigned long r;
-  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
-  if ((unsigned int)r != r) return false;                   // Out of range
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned int)r != r) return false;       // Out of range
  if (dest == NULL) return true;
-  *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
+  *dest = (unsigned int)r;
  return true;
 }

-bool RE2::Arg::parse_longlong_radix(const char* str,
-                                    size_t n,
-                                    void* dest,
-                                    int radix) {
+template <>
+bool Parse(const char* str, size_t n, long long* dest, int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
@ -1148,14 +1265,12 @@ bool RE2::Arg::parse_longlong_radix(const char* str,
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
-  *(reinterpret_cast<long long*>(dest)) = r;
+  *dest = r;
  return true;
 }

-bool RE2::Arg::parse_ulonglong_radix(const char* str,
-                                     size_t n,
-                                     void* dest,
-                                     int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
  if (n == 0) return false;
  char buf[kMaxNumberLength+1];
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
@ -1170,67 +1285,47 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str,
  if (end != str + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
-  *(reinterpret_cast<unsigned long long*>(dest)) = r;
+  *dest = r;
  return true;
 }

-static bool parse_double_float(const char* str, size_t n, bool isfloat,
-                               void* dest) {
-  if (n == 0) return false;
-  static const int kMaxLength = 200;
-  char buf[kMaxLength+1];
-  str = TerminateNumber(buf, sizeof buf, str, &n, true);
-  char* end;
-  errno = 0;
-  double r;
-  if (isfloat) {
-    r = strtof(str, &end);
-  } else {
-    r = strtod(str, &end);
-  }
-  if (end != str + n) return false;   // Leftover junk
-  if (errno) return false;
-  if (dest == NULL) return true;
-  if (isfloat) {
-    *(reinterpret_cast<float*>(dest)) = (float)r;
-  } else {
-    *(reinterpret_cast<double*>(dest)) = r;
-  }
-  return true;
-}
+}  // namespace re2_internal

-bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) {
-  return parse_double_float(str, n, false, dest);
-}
+namespace hooks {

-bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
-  return parse_double_float(str, n, true, dest);
-}
+#ifdef RE2_HAVE_THREAD_LOCAL
+thread_local const RE2* context = NULL;
+#endif

-#define DEFINE_INTEGER_PARSER(name)                                            \
-  bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) {         \
-    return parse_##name##_radix(str, n, dest, 10);                             \
-  }                                                                            \
-  bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) {   \
-    return parse_##name##_radix(str, n, dest, 16);                             \
-  }                                                                            \
-  bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
-    return parse_##name##_radix(str, n, dest, 8);                              \
-  }                                                                            \
-  bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n,              \
-                                       void* dest) {                           \
-    return parse_##name##_radix(str, n, dest, 0);                              \
-  }
+template <typename T>
+union Hook {
+  void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
+  T* Load() const { return cb_.load(std::memory_order_acquire); }

-DEFINE_INTEGER_PARSER(short);
-DEFINE_INTEGER_PARSER(ushort);
-DEFINE_INTEGER_PARSER(int);
-DEFINE_INTEGER_PARSER(uint);
-DEFINE_INTEGER_PARSER(long);
-DEFINE_INTEGER_PARSER(ulong);
-DEFINE_INTEGER_PARSER(longlong);
-DEFINE_INTEGER_PARSER(ulonglong);
+#if !defined(__clang__) && defined(_MSC_VER)
+  // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
+  // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
+  static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
+                "std::atomic<T*> must be always lock-free");
+  T* cb_for_constinit_;
+#endif

-#undef DEFINE_INTEGER_PARSER
+  std::atomic<T*> cb_;
+};
+
+template <typename T>
+static void DoNothing(const T&) {}
+
+#define DEFINE_HOOK(type, name)                                       \
+  static Hook<type##Callback> name##_hook = {{&DoNothing<type>}};     \
+  void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
+  type##Callback* Get##type##Hook() { return name##_hook.Load(); }
+
+DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
+DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
+
+#undef DEFINE_HOOK
+
+}  // namespace hooks

 }  // namespace re2
--- a/extern/re2/re2/re2.h
+++ b/extern/re2/re2/re2.h
@ -30,6 +30,19 @@
 //   "(?i)hello"           -- (?i) turns on case-insensitive matching
 //   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
 //
+// The double backslashes are needed when writing C++ string literals.
+// However, they should NOT be used when writing C++11 raw string literals:
+//
+//   R"(hello (\w+) world)"  -- \w matches a "word" character
+//   R"(version (\d+))"      -- \d matches a digit
+//   R"(hello\s+world)"      -- \s matches any whitespace character
+//   R"(\b(\w+)\b)"          -- \b matches non-empty string at word boundary
+//   R"((?i)hello)"          -- (?i) turns on case-insensitive matching
+//   R"(/\*(.*?)\*/)"        -- .*? matches . minimum no. of times possible
+//
+// When using UTF-8 encoding, case-insensitive matching will perform
+// simple case folding, not full case folding.
+//
 // -----------------------------------------------------------------------
 // MATCHING INTERFACE:
 //
@ -195,6 +208,12 @@
 #include <map>
 #include <mutex>
 #include <string>
+#include <type_traits>
+#include <vector>
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif

 #include "re2/stringpiece.h"

@ -229,6 +248,7 @@ class RE2 {
    ErrorBadCharRange,       // bad character class range
    ErrorMissingBracket,     // missing closing ]
    ErrorMissingParen,       // missing closing )
+    ErrorUnexpectedParen,    // unexpected closing )
    ErrorTrailingBackslash,  // trailing \ at end of regexp
    ErrorRepeatArgument,     // repeat argument missing, e.g. "*"
    ErrorRepeatSize,         // bad repetition argument
@ -287,11 +307,11 @@ class RE2 {
  int ProgramSize() const;
  int ReverseProgramSize() const;

-  // EXPERIMENTAL! SUBJECT TO CHANGE!
-  // Outputs the program fanout as a histogram bucketed by powers of 2.
+  // If histogram is not null, outputs the program fanout
+  // as a histogram bucketed by powers of 2.
  // Returns the number of the largest non-empty bucket.
-  int ProgramFanout(std::map<int, int>* histogram) const;
-  int ReverseProgramFanout(std::map<int, int>* histogram) const;
+  int ProgramFanout(std::vector<int>* histogram) const;
+  int ReverseProgramFanout(std::vector<int>* histogram) const;

  // Returns the underlying Regexp; not for general use.
  // Returns entire_regexp_ so that callers don't need
@ -349,12 +369,12 @@ class RE2 {
  //    (void*)NULL     (the corresponding matched sub-pattern is not copied)
  //
  // Returns true iff all of the following conditions are satisfied:
-  //   a. "text" matches "re" exactly
-  //   b. The number of matched sub-patterns is >= number of supplied pointers
+  //   a. "text" matches "re" fully - from the beginning to the end of "text".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
  //   c. The "i"th argument has a suitable type for holding the
  //      string captured as the "i"th sub-pattern.  If you pass in
  //      NULL for the "i"th argument, or pass fewer arguments than
-  //      number of sub-patterns, "i"th captured sub-pattern is
+  //      number of sub-patterns, the "i"th captured sub-pattern is
  //      ignored.
  //
  // CAVEAT: An optional sub-pattern that does not exist in the
@ -368,8 +388,17 @@ class RE2 {
    return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
  }

-  // Exactly like FullMatch(), except that "re" is allowed to match
-  // a substring of "text".
+  // Like FullMatch(), except that "re" is allowed to match a substring
+  // of "text".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "text" matches "re" partially - for some substring of "text".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
  template <typename... A>
  static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
    return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
@ -378,7 +407,16 @@ class RE2 {
  // Like FullMatch() and PartialMatch(), except that "re" has to match
  // a prefix of the text, and "input" is advanced past the matched
  // text.  Note: "input" is modified iff this routine returns true
-  // and "re" matched a non-empty substring of "text".
+  // and "re" matched a non-empty substring of "input".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "input" matches "re" partially - for some prefix of "input".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
  template <typename... A>
  static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
    return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
@ -388,6 +426,15 @@ class RE2 {
  // the text.  That is, "re" need not start its match at the beginning
  // of "input".  For example, "FindAndConsume(s, "(\\w+)", &word)" finds
  // the next word in "s" and stores it in "word".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "input" matches "re" partially - for some substring of "input".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
  template <typename... A>
  static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
    return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
@ -443,7 +490,7 @@ class RE2 {

  // Escapes all potentially meaningful regexp characters in
  // 'unquoted'.  The returned string, used as a regular expression,
-  // will exactly match the original string.  For example,
+  // will match exactly the original string.  For example,
  //           1.5-2.0?
  // may become:
  //           1\.5\-2\.0\?
@ -626,17 +673,6 @@ class RE2 {
    Encoding encoding() const { return encoding_; }
    void set_encoding(Encoding encoding) { encoding_ = encoding; }

-    // Legacy interface to encoding.
-    // TODO(rsc): Remove once clients have been converted.
-    bool utf8() const { return encoding_ == EncodingUTF8; }
-    void set_utf8(bool b) {
-      if (b) {
-        encoding_ = EncodingUTF8;
-      } else {
-        encoding_ = EncodingLatin1;
-      }
-    }
-
    bool posix_syntax() const { return posix_syntax_; }
    void set_posix_syntax(bool b) { posix_syntax_ = b; }

@ -699,32 +735,12 @@ class RE2 {
  const Options& options() const { return options_; }

  // Argument converters; see below.
-  static inline Arg CRadix(short* x);
-  static inline Arg CRadix(unsigned short* x);
-  static inline Arg CRadix(int* x);
-  static inline Arg CRadix(unsigned int* x);
-  static inline Arg CRadix(long* x);
-  static inline Arg CRadix(unsigned long* x);
-  static inline Arg CRadix(long long* x);
-  static inline Arg CRadix(unsigned long long* x);
-
-  static inline Arg Hex(short* x);
-  static inline Arg Hex(unsigned short* x);
-  static inline Arg Hex(int* x);
-  static inline Arg Hex(unsigned int* x);
-  static inline Arg Hex(long* x);
-  static inline Arg Hex(unsigned long* x);
-  static inline Arg Hex(long long* x);
-  static inline Arg Hex(unsigned long long* x);
-
-  static inline Arg Octal(short* x);
-  static inline Arg Octal(unsigned short* x);
-  static inline Arg Octal(int* x);
-  static inline Arg Octal(unsigned int* x);
-  static inline Arg Octal(long* x);
-  static inline Arg Octal(unsigned long* x);
-  static inline Arg Octal(long long* x);
-  static inline Arg Octal(unsigned long long* x);
+  template <typename T>
+  static Arg CRadix(T* ptr);
+  template <typename T>
+  static Arg Hex(T* ptr);
+  template <typename T>
+  static Arg Octal(T* ptr);

 private:
  void Init(const StringPiece& pattern, const Options& options);
@ -737,29 +753,26 @@ class RE2 {

  re2::Prog* ReverseProg() const;

-  std::string   pattern_;          // string regular expression
-  Options       options_;          // option flags
-  std::string   prefix_;           // required prefix (before regexp_)
-  bool          prefix_foldcase_;  // prefix is ASCII case-insensitive
-  re2::Regexp*  entire_regexp_;    // parsed regular expression
-  re2::Regexp*  suffix_regexp_;    // parsed regular expression, prefix removed
-  re2::Prog*    prog_;             // compiled program for regexp
-  int           num_captures_;     // Number of capturing groups
-  bool          is_one_pass_;      // can use prog_->SearchOnePass?
-
-  mutable re2::Prog*          rprog_;    // reverse program for regexp
-  mutable const std::string*  error_;    // Error indicator
-                                         // (or points to empty string)
-  mutable ErrorCode      error_code_;    // Error code
-  mutable std::string    error_arg_;     // Fragment of regexp showing error
+  std::string pattern_;         // string regular expression
+  Options options_;             // option flags
+  re2::Regexp* entire_regexp_;  // parsed regular expression
+  const std::string* error_;    // error indicator (or points to empty string)
+  ErrorCode error_code_;        // error code
+  std::string error_arg_;       // fragment of regexp showing error
+  std::string prefix_;          // required prefix (before suffix_regexp_)
+  bool prefix_foldcase_;        // prefix_ is ASCII case-insensitive
+  re2::Regexp* suffix_regexp_;  // parsed regular expression, prefix_ removed
+  re2::Prog* prog_;             // compiled program for regexp
+  int num_captures_;            // number of capturing groups
+  bool is_one_pass_;            // can use prog_->SearchOnePass?

+  // Reverse Prog for DFA execution only
+  mutable re2::Prog* rprog_;
  // Map from capture names to indices
  mutable const std::map<std::string, int>* named_groups_;
-
  // Map from capture indices to names
  mutable const std::map<int, std::string>* group_names_;

-  // Onces for lazy computations.
  mutable std::once_flag rprog_once_;
  mutable std::once_flag named_groups_once_;
  mutable std::once_flag group_names_once_;
@ -770,137 +783,134 @@ class RE2 {

 /***** Implementation details *****/

-// Hex/Octal/Binary?
+namespace re2_internal {

-// Special class for parsing into objects that define a ParseFrom() method
-template <class T>
-class _RE2_MatchObject {
- public:
-  static inline bool Parse(const char* str, size_t n, void* dest) {
-    if (dest == NULL) return true;
-    T* object = reinterpret_cast<T*>(dest);
-    return object->ParseFrom(str, n);
-  }
-};
+// Types for which the 3-ary Parse() function template has specializations.
+template <typename T> struct Parse3ary : public std::false_type {};
+template <> struct Parse3ary<void> : public std::true_type {};
+template <> struct Parse3ary<std::string> : public std::true_type {};
+template <> struct Parse3ary<StringPiece> : public std::true_type {};
+template <> struct Parse3ary<char> : public std::true_type {};
+template <> struct Parse3ary<signed char> : public std::true_type {};
+template <> struct Parse3ary<unsigned char> : public std::true_type {};
+template <> struct Parse3ary<float> : public std::true_type {};
+template <> struct Parse3ary<double> : public std::true_type {};
+
+template <typename T>
+bool Parse(const char* str, size_t n, T* dest);
+
+// Types for which the 4-ary Parse() function template has specializations.
+template <typename T> struct Parse4ary : public std::false_type {};
+template <> struct Parse4ary<long> : public std::true_type {};
+template <> struct Parse4ary<unsigned long> : public std::true_type {};
+template <> struct Parse4ary<short> : public std::true_type {};
+template <> struct Parse4ary<unsigned short> : public std::true_type {};
+template <> struct Parse4ary<int> : public std::true_type {};
+template <> struct Parse4ary<unsigned int> : public std::true_type {};
+template <> struct Parse4ary<long long> : public std::true_type {};
+template <> struct Parse4ary<unsigned long long> : public std::true_type {};
+
+template <typename T>
+bool Parse(const char* str, size_t n, T* dest, int radix);
+
+}  // namespace re2_internal

 class RE2::Arg {
- public:
-  // Empty constructor so we can declare arrays of RE2::Arg
-  Arg();
+ private:
+  template <typename T>
+  using CanParse3ary = typename std::enable_if<
+      re2_internal::Parse3ary<T>::value,
+      int>::type;

-  // Constructor specially designed for NULL arguments
-  Arg(void*);
-  Arg(std::nullptr_t);
+  template <typename T>
+  using CanParse4ary = typename std::enable_if<
+      re2_internal::Parse4ary<T>::value,
+      int>::type;
+
+#if !defined(_MSC_VER)
+  template <typename T>
+  using CanParseFrom = typename std::enable_if<
+      std::is_member_function_pointer<
+          decltype(static_cast<bool (T::*)(const char*, size_t)>(
+              &T::ParseFrom))>::value,
+      int>::type;
+#endif
+
+ public:
+  Arg() : Arg(nullptr) {}
+  Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {}
+
+  template <typename T, CanParse3ary<T> = 0>
+  Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {}
+
+  template <typename T, CanParse4ary<T> = 0>
+  Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
+
+#if !defined(_MSC_VER)
+  template <typename T, CanParseFrom<T> = 0>
+  Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
+#endif

  typedef bool (*Parser)(const char* str, size_t n, void* dest);

-// Type-specific parsers
-#define MAKE_PARSER(type, name)            \
-  Arg(type* p) : arg_(p), parser_(name) {} \
-  Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
+  template <typename T>
+  Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {}

-  MAKE_PARSER(char,               parse_char)
-  MAKE_PARSER(signed char,        parse_schar)
-  MAKE_PARSER(unsigned char,      parse_uchar)
-  MAKE_PARSER(float,              parse_float)
-  MAKE_PARSER(double,             parse_double)
-  MAKE_PARSER(std::string,        parse_string)
-  MAKE_PARSER(StringPiece,        parse_stringpiece)
-
-  MAKE_PARSER(short,              parse_short)
-  MAKE_PARSER(unsigned short,     parse_ushort)
-  MAKE_PARSER(int,                parse_int)
-  MAKE_PARSER(unsigned int,       parse_uint)
-  MAKE_PARSER(long,               parse_long)
-  MAKE_PARSER(unsigned long,      parse_ulong)
-  MAKE_PARSER(long long,          parse_longlong)
-  MAKE_PARSER(unsigned long long, parse_ulonglong)
-
-#undef MAKE_PARSER
-
-  // Generic constructor templates
-  template <class T> Arg(T* p)
-      : arg_(p), parser_(_RE2_MatchObject<T>::Parse) { }
-  template <class T> Arg(T* p, Parser parser)
-      : arg_(p), parser_(parser) { }
-
-  // Parse the data
-  bool Parse(const char* str, size_t n) const;
-
- private:
-  void*         arg_;
-  Parser        parser_;
-
-  static bool parse_null          (const char* str, size_t n, void* dest);
-  static bool parse_char          (const char* str, size_t n, void* dest);
-  static bool parse_schar         (const char* str, size_t n, void* dest);
-  static bool parse_uchar         (const char* str, size_t n, void* dest);
-  static bool parse_float         (const char* str, size_t n, void* dest);
-  static bool parse_double        (const char* str, size_t n, void* dest);
-  static bool parse_string        (const char* str, size_t n, void* dest);
-  static bool parse_stringpiece   (const char* str, size_t n, void* dest);
-
-#define DECLARE_INTEGER_PARSER(name)                                       \
- private:                                                                  \
-  static bool parse_##name(const char* str, size_t n, void* dest);         \
-  static bool parse_##name##_radix(const char* str, size_t n, void* dest,  \
-                                   int radix);                             \
-                                                                           \
- public:                                                                   \
-  static bool parse_##name##_hex(const char* str, size_t n, void* dest);   \
-  static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
-  static bool parse_##name##_cradix(const char* str, size_t n, void* dest);
-
-  DECLARE_INTEGER_PARSER(short)
-  DECLARE_INTEGER_PARSER(ushort)
-  DECLARE_INTEGER_PARSER(int)
-  DECLARE_INTEGER_PARSER(uint)
-  DECLARE_INTEGER_PARSER(long)
-  DECLARE_INTEGER_PARSER(ulong)
-  DECLARE_INTEGER_PARSER(longlong)
-  DECLARE_INTEGER_PARSER(ulonglong)
-
-#undef DECLARE_INTEGER_PARSER
-
-};
-
-inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
-inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
-inline RE2::Arg::Arg(std::nullptr_t p) : arg_(p), parser_(parse_null) { }
-
-inline bool RE2::Arg::Parse(const char* str, size_t n) const {
-  return (*parser_)(str, n, arg_);
-}
-
-// This part of the parser, appropriate only for ints, deals with bases
-#define MAKE_INTEGER_PARSER(type, name)                    \
-  inline RE2::Arg RE2::Hex(type* ptr) {                    \
-    return RE2::Arg(ptr, RE2::Arg::parse_##name##_hex);    \
-  }                                                        \
-  inline RE2::Arg RE2::Octal(type* ptr) {                  \
-    return RE2::Arg(ptr, RE2::Arg::parse_##name##_octal);  \
-  }                                                        \
-  inline RE2::Arg RE2::CRadix(type* ptr) {                 \
-    return RE2::Arg(ptr, RE2::Arg::parse_##name##_cradix); \
+  bool Parse(const char* str, size_t n) const {
+    return (*parser_)(str, n, arg_);
  }

-MAKE_INTEGER_PARSER(short,              short)
-MAKE_INTEGER_PARSER(unsigned short,     ushort)
-MAKE_INTEGER_PARSER(int,                int)
-MAKE_INTEGER_PARSER(unsigned int,       uint)
-MAKE_INTEGER_PARSER(long,               long)
-MAKE_INTEGER_PARSER(unsigned long,      ulong)
-MAKE_INTEGER_PARSER(long long,          longlong)
-MAKE_INTEGER_PARSER(unsigned long long, ulonglong)
+ private:
+  static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) {
+    return true;
+  }

-#undef MAKE_INTEGER_PARSER
+  template <typename T>
+  static bool DoParse3ary(const char* str, size_t n, void* dest) {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));
+  }
+
+  template <typename T>
+  static bool DoParse4ary(const char* str, size_t n, void* dest) {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
+  }
+
+#if !defined(_MSC_VER)
+  template <typename T>
+  static bool DoParseFrom(const char* str, size_t n, void* dest) {
+    if (dest == NULL) return true;
+    return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
+  }
+#endif
+
+  void*         arg_;
+  Parser        parser_;
+};
+
+template <typename T>
+inline RE2::Arg RE2::CRadix(T* ptr) {
+  return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);
+  });
+}
+
+template <typename T>
+inline RE2::Arg RE2::Hex(T* ptr) {
+  return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);
+  });
+}
+
+template <typename T>
+inline RE2::Arg RE2::Octal(T* ptr) {
+  return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);
+  });
+}

 #ifndef SWIG
-
 // Silence warnings about missing initializers for members of LazyRE2.
-// Note that we test for Clang first because it defines __GNUC__ as well.
-#if defined(__clang__)
-#elif defined(__GNUC__) && __GNUC__ >= 6
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 #endif

@ -949,7 +959,52 @@ class LazyRE2 {

  void operator=(const LazyRE2&);  // disallowed
 };
-#endif  // SWIG
+#endif
+
+namespace hooks {
+
+// Most platforms support thread_local. Older versions of iOS don't support
+// thread_local, but for the sake of brevity, we lump together all versions
+// of Apple platforms that aren't macOS. If an iOS application really needs
+// the context pointee someday, we can get more specific then...
+#define RE2_HAVE_THREAD_LOCAL
+#if defined(__APPLE__) && !TARGET_OS_OSX
+#undef RE2_HAVE_THREAD_LOCAL
+#endif
+
+// A hook must not make any assumptions regarding the lifetime of the context
+// pointee beyond the current invocation of the hook. Pointers and references
+// obtained via the context pointee should be considered invalidated when the
+// hook returns. Hence, any data about the context pointee (e.g. its pattern)
+// would have to be copied in order for it to be kept for an indefinite time.
+//
+// A hook must not use RE2 for matching. Control flow reentering RE2::Match()
+// could result in infinite mutual recursion. To discourage that possibility,
+// RE2 will not maintain the context pointer correctly when used in that way.
+#ifdef RE2_HAVE_THREAD_LOCAL
+extern thread_local const RE2* context;
+#endif
+
+struct DFAStateCacheReset {
+  int64_t state_budget;
+  size_t state_cache_size;
+};
+
+struct DFASearchFailure {
+  // Nothing yet...
+};
+
+#define DECLARE_HOOK(type)                  \
+  using type##Callback = void(const type&); \
+  void Set##type##Hook(type##Callback* cb); \
+  type##Callback* Get##type##Hook();
+
+DECLARE_HOOK(DFAStateCacheReset)
+DECLARE_HOOK(DFASearchFailure)
+
+#undef DECLARE_HOOK
+
+}  // namespace hooks

 }  // namespace re2

--- a/extern/re2/re2/regexp.cc
+++ b/extern/re2/re2/regexp.cc
@ -20,6 +20,7 @@
 #include "util/logging.h"
 #include "util/mutex.h"
 #include "util/utf.h"
+#include "re2/pod_array.h"
 #include "re2/stringpiece.h"
 #include "re2/walker-inl.h"

@ -243,16 +244,15 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
      return new Regexp(kRegexpEmptyMatch, flags);
  }

-  Regexp** subcopy = NULL;
+  PODArray<Regexp*> subcopy;
  if (op == kRegexpAlternate && can_factor) {
    // Going to edit sub; make a copy so we don't step on caller.
-    subcopy = new Regexp*[nsub];
-    memmove(subcopy, sub, nsub * sizeof sub[0]);
-    sub = subcopy;
+    subcopy = PODArray<Regexp*>(nsub);
+    memmove(subcopy.data(), sub, nsub * sizeof sub[0]);
+    sub = subcopy.data();
    nsub = FactorAlternation(sub, nsub, flags);
    if (nsub == 1) {
      Regexp* re = sub[0];
-      delete[] subcopy;
      return re;
    }
  }
@ -269,7 +269,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
    subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,
                                          nsub - (nbigsub-1)*kMaxNsub, flags,
                                          false);
-    delete[] subcopy;
    return re;
  }

@ -278,8 +277,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
  Regexp** subs = re->sub();
  for (int i = 0; i < nsub; i++)
    subs[i] = sub[i];
-
-  delete[] subcopy;
  return re;
 }

@ -501,6 +498,7 @@ static const char *kErrorStrings[] = {
  "invalid character class range",
  "missing ]",
  "missing )",
+  "unexpected )",
  "trailing \\",
  "no argument for repetition operator",
  "invalid repetition size",
@ -544,9 +542,12 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> {
      ncapture_++;
    return ignored;
  }
+
  virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
-    // Should never be called: we use Walk not WalkExponential.
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
    LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
+#endif
    return ignored;
  }

@ -575,7 +576,7 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
    return m;
  }

-  Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
+  virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
    if (re->op() == kRegexpCapture && re->name() != NULL) {
      // Allocate map once we find a name.
      if (map_ == NULL)
@ -591,8 +592,10 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
  }

  virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
-    // Should never be called: we use Walk not WalkExponential.
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
    LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
+#endif
    return ignored;
  }

@ -621,7 +624,7 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
    return m;
  }

-  Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
+  virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
    if (re->op() == kRegexpCapture && re->name() != NULL) {
      // Allocate map once we find a name.
      if (map_ == NULL)
@ -633,8 +636,10 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
  }

  virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
-    // Should never be called: we use Walk not WalkExponential.
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
    LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
+#endif
    return ignored;
  }

@ -651,78 +656,89 @@ std::map<int, std::string>* Regexp::CaptureNames() {
  return w.TakeMap();
 }

+void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes,
+                         std::string* bytes) {
+  if (latin1) {
+    bytes->resize(nrunes);
+    for (int i = 0; i < nrunes; i++)
+      (*bytes)[i] = static_cast<char>(runes[i]);
+  } else {
+    bytes->resize(nrunes * UTFmax);  // worst case
+    char* p = &(*bytes)[0];
+    for (int i = 0; i < nrunes; i++)
+      p += runetochar(p, &runes[i]);
+    bytes->resize(p - &(*bytes)[0]);
+    bytes->shrink_to_fit();
+  }
+}
+
 // Determines whether regexp matches must be anchored
 // with a fixed string prefix.  If so, returns the prefix and
 // the regexp that remains after the prefix.  The prefix might
 // be ASCII case-insensitive.
 bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
                            Regexp** suffix) {
+  prefix->clear();
+  *foldcase = false;
+  *suffix = NULL;
+
  // No need for a walker: the regexp must be of the form
  // 1. some number of ^ anchors
  // 2. a literal char or string
  // 3. the rest
-  prefix->clear();
-  *foldcase = false;
-  *suffix = NULL;
  if (op_ != kRegexpConcat)
    return false;
-
-  // Some number of anchors, then a literal or concatenation.
  int i = 0;
-  Regexp** sub = this->sub();
-  while (i < nsub_ && sub[i]->op_ == kRegexpBeginText)
+  while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
    i++;
  if (i == 0 || i >= nsub_)
    return false;
-
-  Regexp* re = sub[i];
-  switch (re->op_) {
-    default:
-      return false;
-
-    case kRegexpLiteralString:
-      // Convert to string in proper encoding.
-      if (re->parse_flags() & Latin1) {
-        prefix->resize(re->nrunes_);
-        for (int j = 0; j < re->nrunes_; j++)
-          (*prefix)[j] = static_cast<char>(re->runes_[j]);
-      } else {
-        // Convert to UTF-8 in place.
-        // Assume worst-case space and then trim.
-        prefix->resize(re->nrunes_ * UTFmax);
-        char *p = &(*prefix)[0];
-        for (int j = 0; j < re->nrunes_; j++) {
-          Rune r = re->runes_[j];
-          if (r < Runeself)
-            *p++ = static_cast<char>(r);
-          else
-            p += runetochar(p, &r);
-        }
-        prefix->resize(p - &(*prefix)[0]);
-      }
-      break;
-
-    case kRegexpLiteral:
-      if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) {
-        prefix->append(1, static_cast<char>(re->rune_));
-      } else {
-        char buf[UTFmax];
-        prefix->append(buf, runetochar(buf, &re->rune_));
-      }
-      break;
-  }
-  *foldcase = (sub[i]->parse_flags() & FoldCase) != 0;
+  Regexp* re = sub()[i];
+  if (re->op_ != kRegexpLiteral &&
+      re->op_ != kRegexpLiteralString)
+    return false;
  i++;
-
-  // The rest.
  if (i < nsub_) {
    for (int j = i; j < nsub_; j++)
-      sub[j]->Incref();
-    re = Concat(sub + i, nsub_ - i, parse_flags());
+      sub()[j]->Incref();
+    *suffix = Concat(sub() + i, nsub_ - i, parse_flags());
  } else {
-    re = new Regexp(kRegexpEmptyMatch, parse_flags());
+    *suffix = new Regexp(kRegexpEmptyMatch, parse_flags());
  }
-  *suffix = re;
+
+  bool latin1 = (re->parse_flags() & Latin1) != 0;
+  Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
+  int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
+  ConvertRunesToBytes(latin1, runes, nrunes, prefix);
+  *foldcase = (re->parse_flags() & FoldCase) != 0;
+  return true;
+}
+
+// Determines whether regexp matches must be unanchored
+// with a fixed string prefix.  If so, returns the prefix.
+// The prefix might be ASCII case-insensitive.
+bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) {
+  prefix->clear();
+  *foldcase = false;
+
+  // No need for a walker: the regexp must either begin with or be
+  // a literal char or string. We "see through" capturing groups,
+  // but make no effort to glue multiple prefix fragments together.
+  Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
+  while (re->op_ == kRegexpCapture) {
+    re = re->sub()[0];
+    if (re->op_ == kRegexpConcat && re->nsub_ > 0)
+      re = re->sub()[0];
+  }
+  if (re->op_ != kRegexpLiteral &&
+      re->op_ != kRegexpLiteralString)
+    return false;
+
+  bool latin1 = (re->parse_flags() & Latin1) != 0;
+  Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
+  int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
+  ConvertRunesToBytes(latin1, runes, nrunes, prefix);
+  *foldcase = (re->parse_flags() & FoldCase) != 0;
  return true;
 }

@ -903,7 +919,7 @@ void CharClassBuilder::Negate() {
 // The ranges are allocated in the same block as the header,
 // necessitating a special allocator and Delete method.

-CharClass* CharClass::New(int maxranges) {
+CharClass* CharClass::New(size_t maxranges) {
  CharClass* cc;
  uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
  cc = reinterpret_cast<CharClass*>(data);
@ -920,7 +936,7 @@ void CharClass::Delete() {
 }

 CharClass* CharClass::Negate() {
-  CharClass* cc = CharClass::New(nranges_+1);
+  CharClass* cc = CharClass::New(static_cast<size_t>(nranges_+1));
  cc->folds_ascii_ = folds_ascii_;
  cc->nrunes_ = Runemax + 1 - nrunes_;
  int n = 0;
@ -957,7 +973,7 @@ bool CharClass::Contains(Rune r) {
 }

 CharClass* CharClassBuilder::GetCharClass() {
-  CharClass* cc = CharClass::New(static_cast<int>(ranges_.size()));
+  CharClass* cc = CharClass::New(ranges_.size());
  int n = 0;
  for (iterator it = begin(); it != end(); ++it)
    cc->ranges_[n++] = *it;
--- a/extern/re2/re2/regexp.h
+++ b/extern/re2/re2/regexp.h
@ -86,6 +86,7 @@
 // form accessible to clients, so that client code can analyze the
 // parsed regular expressions.

+#include <stddef.h>
 #include <stdint.h>
 #include <map>
 #include <set>
@ -177,6 +178,7 @@ enum RegexpStatusCode {
  kRegexpBadCharRange,       // bad character class range
  kRegexpMissingBracket,     // missing closing ]
  kRegexpMissingParen,       // missing closing )
+  kRegexpUnexpectedParen,    // unexpected closing )
  kRegexpTrailingBackslash,  // at end of regexp
  kRegexpRepeatArgument,     // repeat argument missing, e.g. "*"
  kRegexpRepeatSize,         // bad repetition argument
@ -258,7 +260,7 @@ class CharClass {
 private:
  CharClass();  // not implemented
  ~CharClass();  // not implemented
-  static CharClass* New(int maxranges);
+  static CharClass* New(size_t maxranges);

  friend class CharClassBuilder;

@ -440,6 +442,13 @@ class Regexp {
  bool RequiredPrefix(std::string* prefix, bool* foldcase,
                      Regexp** suffix);

+  // Whether every match of this regexp must be unanchored and
+  // begin with a non-empty fixed string (perhaps after ASCII
+  // case-folding).  If so, returns the prefix.
+  // Callers should expect *prefix and *foldcase to be "zeroed"
+  // regardless of the return value.
+  bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
+
 private:
  // Constructor allocates vectors as appropriate for operator.
  explicit Regexp(RegexpOp op, ParseFlags parse_flags);
--- a/extern/re2/re2/set.cc
+++ b/extern/re2/re2/set.cc
@ -7,30 +7,49 @@
 #include <stddef.h>
 #include <algorithm>
 #include <memory>
+#include <utility>

 #include "util/util.h"
 #include "util/logging.h"
-#include "util/pod_array.h"
-#include "re2/stringpiece.h"
+#include "re2/pod_array.h"
 #include "re2/prog.h"
 #include "re2/re2.h"
 #include "re2/regexp.h"
+#include "re2/stringpiece.h"

 namespace re2 {

-RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
-  options_.Copy(options);
+RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
+    : options_(options),
+      anchor_(anchor),
+      compiled_(false),
+      size_(0) {
  options_.set_never_capture(true);  // might unblock some optimisations
-  anchor_ = anchor;
-  prog_ = NULL;
-  compiled_ = false;
-  size_ = 0;
 }

 RE2::Set::~Set() {
  for (size_t i = 0; i < elem_.size(); i++)
    elem_[i].second->Decref();
-  delete prog_;
+}
+
+RE2::Set::Set(Set&& other)
+    : options_(other.options_),
+      anchor_(other.anchor_),
+      elem_(std::move(other.elem_)),
+      compiled_(other.compiled_),
+      size_(other.size_),
+      prog_(std::move(other.prog_)) {
+  other.elem_.clear();
+  other.elem_.shrink_to_fit();
+  other.compiled_ = false;
+  other.size_ = 0;
+  other.prog_.reset();
+}
+
+RE2::Set& RE2::Set::operator=(Set&& other) {
+  this->~Set();
+  (void) new (this) Set(std::move(other));
+  return *this;
 }

 int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
@ -97,9 +116,9 @@ bool RE2::Set::Compile() {
    options_.ParseFlags());
  re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);

-  prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
+  prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
  re->Decref();
-  return prog_ != NULL;
+  return prog_ != nullptr;
 }

 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
@ -124,9 +143,10 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
                              NULL, &dfa_failed, matches.get());
  if (dfa_failed) {
    if (options_.log_errors())
-      LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
-                 << "bytemap range " << prog_->bytemap_range() << ", "
-                 << "list count " << prog_->list_count();
+      LOG(ERROR) << "DFA out of memory: "
+                 << "program size " << prog_->size() << ", "
+                 << "list count " << prog_->list_count() << ", "
+                 << "bytemap range " << prog_->bytemap_range();
    if (error_info != NULL)
      error_info->kind = kOutOfMemory;
    return false;
--- a/extern/re2/re2/set.h
+++ b/extern/re2/re2/set.h
@ -5,6 +5,7 @@
 #ifndef RE2_SET_H_
 #define RE2_SET_H_

+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@ -36,6 +37,13 @@ class RE2::Set {
  Set(const RE2::Options& options, RE2::Anchor anchor);
  ~Set();

+  // Not copyable.
+  Set(const Set&) = delete;
+  Set& operator=(const Set&) = delete;
+  // Movable.
+  Set(Set&& other);
+  Set& operator=(Set&& other);
+
  // Adds pattern to the set using the options passed to the constructor.
  // Returns the index that will identify the regexp in the output of Match(),
  // or -1 if the regexp cannot be parsed.
@ -67,12 +75,9 @@ class RE2::Set {
  RE2::Options options_;
  RE2::Anchor anchor_;
  std::vector<Elem> elem_;
-  re2::Prog* prog_;
  bool compiled_;
  int size_;
-
-  Set(const Set&) = delete;
-  Set& operator=(const Set&) = delete;
+  std::unique_ptr<re2::Prog> prog_;
 };

 }  // namespace re2
--- a/extern/re2/re2/simplify.cc
+++ b/extern/re2/re2/simplify.cc
@ -10,8 +10,8 @@

 #include "util/util.h"
 #include "util/logging.h"
-#include "util/pod_array.h"
 #include "util/utf.h"
+#include "re2/pod_array.h"
 #include "re2/regexp.h"
 #include "re2/walker-inl.h"

@ -28,8 +28,6 @@ bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
  Regexp* sre = re->Simplify();
  re->Decref();
  if (sre == NULL) {
-    // Should not happen, since Simplify never fails.
-    LOG(ERROR) << "Simplify failed on " << src;
    if (status) {
      status->set_code(kRegexpInternalError);
      status->set_error_arg(src);
@ -180,10 +178,20 @@ Regexp* Regexp::Simplify() {
  CoalesceWalker cw;
  Regexp* cre = cw.Walk(this, NULL);
  if (cre == NULL)
-    return cre;
+    return NULL;
+  if (cw.stopped_early()) {
+    cre->Decref();
+    return NULL;
+  }
  SimplifyWalker sw;
  Regexp* sre = sw.Walk(cre, NULL);
  cre->Decref();
+  if (sre == NULL)
+    return NULL;
+  if (sw.stopped_early()) {
+    sre->Decref();
+    return NULL;
+  }
  return sre;
 }

@ -212,9 +220,10 @@ Regexp* CoalesceWalker::Copy(Regexp* re) {
 }

 Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
-  // This should never be called, since we use Walk and not
-  // WalkExponential.
+  // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
+#endif
  return re->Incref();
 }

@ -437,9 +446,10 @@ Regexp* SimplifyWalker::Copy(Regexp* re) {
 }

 Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
-  // This should never be called, since we use Walk and not
-  // WalkExponential.
+  // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
+#endif
  return re->Incref();
 }

--- a/extern/re2/util/sparse_array.h
+++ b/extern/re2/util/sparse_array.h
@ -2,8 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#ifndef UTIL_SPARSE_ARRAY_H_
-#define UTIL_SPARSE_ARRAY_H_
+#ifndef RE2_SPARSE_ARRAY_H_
+#define RE2_SPARSE_ARRAY_H_

 // DESCRIPTION
 //
@ -102,7 +102,7 @@
 #include <memory>
 #include <utility>

-#include "util/pod_array.h"
+#include "re2/pod_array.h"

 namespace re2 {

@ -389,4 +389,4 @@ template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,

 }  // namespace re2

-#endif  // UTIL_SPARSE_ARRAY_H_
+#endif  // RE2_SPARSE_ARRAY_H_
--- a/extern/re2/util/sparse_set.h
+++ b/extern/re2/util/sparse_set.h
@ -2,8 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#ifndef UTIL_SPARSE_SET_H_
-#define UTIL_SPARSE_SET_H_
+#ifndef RE2_SPARSE_SET_H_
+#define RE2_SPARSE_SET_H_

 // DESCRIPTION
 //
@ -61,7 +61,7 @@
 #include <memory>
 #include <utility>

-#include "util/pod_array.h"
+#include "re2/pod_array.h"

 namespace re2 {

@ -261,4 +261,4 @@ typedef SparseSetT<void> SparseSet;

 }  // namespace re2

-#endif  // UTIL_SPARSE_SET_H_
+#endif  // RE2_SPARSE_SET_H_
--- a/extern/re2/re2/testing/backtrack.cc
+++ b/extern/re2/re2/testing/backtrack.cc
@ -29,6 +29,7 @@

 #include "util/util.h"
 #include "util/logging.h"
+#include "re2/pod_array.h"
 #include "re2/prog.h"
 #include "re2/regexp.h"

@ -53,7 +54,6 @@ namespace re2 {
 class Backtracker {
 public:
  explicit Backtracker(Prog* prog);
-  ~Backtracker();

  bool Search(const StringPiece& text, const StringPiece& context,
              bool anchored, bool longest,
@ -79,9 +79,11 @@ class Backtracker {
  int nsubmatch_;           //   # of submatches to fill in

  // Search state
-  const char* cap_[64];     // capture registers
-  uint32_t *visited_;       // bitmap: (Inst*, char*) pairs already backtracked
-  size_t nvisited_;         //   # of words in bitmap
+  const char* cap_[64];         // capture registers
+  PODArray<uint32_t> visited_;  // bitmap: (Inst*, char*) pairs visited
+
+  Backtracker(const Backtracker&) = delete;
+  Backtracker& operator=(const Backtracker&) = delete;
 };

 Backtracker::Backtracker(Prog* prog)
@ -90,13 +92,7 @@ Backtracker::Backtracker(Prog* prog)
    longest_(false),
    endmatch_(false),
    submatch_(NULL),
-    nsubmatch_(0),
-    visited_(NULL),
-    nvisited_(0) {
-}
-
-Backtracker::~Backtracker() {
-  delete[] visited_;
+    nsubmatch_(0) {
 }

 // Runs a backtracking search.
@ -105,7 +101,7 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
                         StringPiece* submatch, int nsubmatch) {
  text_ = text;
  context_ = context;
-  if (context_.begin() == NULL)
+  if (context_.data() == NULL)
    context_ = text;
  if (prog_->anchor_start() && text.begin() > context_.begin())
    return false;
@ -130,24 +126,28 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,

  // Allocate new visited_ bitmap -- size is proportional
  // to text, so have to reallocate on each call to Search.
-  delete[] visited_;
-  nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
-  visited_ = new uint32_t[nvisited_];
-  memset(visited_, 0, nvisited_*sizeof visited_[0]);
+  int nvisited = prog_->size() * static_cast<int>(text.size()+1);
+  nvisited = (nvisited + 31) / 32;
+  visited_ = PODArray<uint32_t>(nvisited);
+  memset(visited_.data(), 0, nvisited*sizeof visited_[0]);

  // Anchored search must start at text.begin().
  if (anchored_) {
-    cap_[0] = text.begin();
-    return Visit(prog_->start(), text.begin());
+    cap_[0] = text.data();
+    return Visit(prog_->start(), text.data());
  }

  // Unanchored search, starting from each possible text position.
  // Notice that we have to try the empty string at the end of
  // the text, so the loop condition is p <= text.end(), not p < text.end().
-  for (const char* p = text.begin(); p <= text.end(); p++) {
+  for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
    cap_[0] = p;
    if (Visit(prog_->start(), p))  // Match must be leftmost; done.
      return true;
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    if (p == NULL)
+      break;
  }
  return false;
 }
@ -158,9 +158,10 @@ bool Backtracker::Visit(int id, const char* p) {
  // Check bitmap.  If we've already explored from here,
  // either it didn't match or it did but we're hoping for a better match.
  // Either way, don't go down that road again.
-  CHECK(p <= text_.end());
-  size_t n = id*(text_.size()+1) + (p - text_.begin());
-  CHECK_LT(n/32, nvisited_);
+  CHECK(p <= text_.data() + text_.size());
+  int n = id * static_cast<int>(text_.size()+1) +
+          static_cast<int>(p-text_.data());
+  CHECK_LT(n/32, visited_.size());
  if (visited_[n/32] & (1 << (n&31)))
    return false;
  visited_[n/32] |= 1 << (n&31);
@ -182,7 +183,7 @@ bool Backtracker::Try(int id, const char* p) {
  // Pick out byte at current position.  If at end of string,
  // have to explore in hope of finishing a match.  Use impossible byte -1.
  int c = -1;
-  if (p < text_.end())
+  if (p < text_.data() + text_.size())
    c = *p & 0xFF;

  Prog::Inst* ip = prog_->inst(id);
@ -224,11 +225,12 @@ bool Backtracker::Try(int id, const char* p) {
    case kInstMatch:
      // We found a match.  If it's the best so far, record the
      // parameters in the caller's submatch_ array.
-      if (endmatch_ && p != context_.end())
+      if (endmatch_ && p != context_.data() + context_.size())
        return false;
      cap_[1] = p;
-      if (submatch_[0].data() == NULL ||           // First match so far ...
-          (longest_ && p > submatch_[0].end())) {  // ... or better match
+      if (submatch_[0].data() == NULL ||
+          (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
+        // First match so far - or better match.
        for (int i = 0; i < nsubmatch_; i++)
          submatch_[i] = StringPiece(
              cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
--- a/extern/re2/re2/testing/charclass_test.cc
+++ b/extern/re2/re2/testing/charclass_test.cc
@ -85,7 +85,7 @@ static CCTest tests[] = {
    { {-1} } },
 };

-template<class CharClass>
+template <typename CharClass>
 static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
  if (t == NULL) {
    printf("\t%s:", desc);
@ -136,7 +136,7 @@ void Delete(CharClassBuilder* cc) {
  delete cc;
 }

-template<class CharClass>
+template <typename CharClass>
 bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
  typename CharClass::iterator it = cc->begin();
  int size = 0;
--- a/extern/re2/re2/testing/compile_test.cc
+++ b/extern/re2/re2/testing/compile_test.cc
@ -147,10 +147,19 @@ static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
  Regexp* re = Regexp::Parse(pattern, flags, NULL);
  EXPECT_TRUE(re != NULL);

-  Prog* prog = re->CompileToProg(0);
-  EXPECT_TRUE(prog != NULL);
-  *bytemap = prog->DumpByteMap();
-  delete prog;
+  {
+    Prog* prog = re->CompileToProg(0);
+    EXPECT_TRUE(prog != NULL);
+    *bytemap = prog->DumpByteMap();
+    delete prog;
+  }
+
+  {
+    Prog* prog = re->CompileToReverseProg(0);
+    EXPECT_TRUE(prog != NULL);
+    EXPECT_EQ(*bytemap, prog->DumpByteMap());
+    delete prog;
+  }

  re->Decref();
 }
@ -213,16 +222,11 @@ TEST(TestCompile, UTF8Ranges) {
  EXPECT_EQ("[00-09] -> 0\n"
            "[0a-0a] -> 1\n"
            "[0b-7f] -> 0\n"
-            "[80-8f] -> 2\n"
-            "[90-9f] -> 3\n"
-            "[a0-bf] -> 4\n"
+            "[80-bf] -> 2\n"
            "[c0-c1] -> 1\n"
-            "[c2-df] -> 5\n"
-            "[e0-e0] -> 6\n"
-            "[e1-ef] -> 7\n"
-            "[f0-f0] -> 8\n"
-            "[f1-f3] -> 9\n"
-            "[f4-f4] -> 10\n"
+            "[c2-df] -> 3\n"
+            "[e0-ef] -> 4\n"
+            "[f0-f4] -> 5\n"
            "[f5-ff] -> 1\n",
            bytemap);
 }
@ -232,7 +236,7 @@ TEST(TestCompile, InsufficientMemory) {
      "^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
      Regexp::LikePerl, NULL);
  EXPECT_TRUE(re != NULL);
-  Prog* prog = re->CompileToProg(920);
+  Prog* prog = re->CompileToProg(850);
  // If the memory budget has been exhausted, compilation should fail
  // and return NULL instead of trying to do anything with NoMatch().
  EXPECT_TRUE(prog == NULL);
@ -299,20 +303,22 @@ TEST(TestCompile, Bug26705922) {
            "8. byte [f0-f0] 0 -> 7\n",
            reverse);

-  Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse);
-  EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
-            "4+ byte [c2-df] 0 -> 7\n"
-            "5+ byte [a0-bf] 1 -> 8\n"
-            "6. byte [80-bf] 0 -> 9\n"
+  Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, &forward, &reverse);
+  EXPECT_EQ("3+ byte [c2-df] 0 -> 6\n"
+            "4+ byte [e0-ef] 0 -> 8\n"
+            "5. byte [f0-f4] 0 -> 9\n"
+            "6. byte [80-bf] 0 -> 7\n"
            "7. match! 0\n"
-            "8. byte [e0-e0] 0 -> 7\n"
-            "9+ byte [e1-ef] 0 -> 7\n"
-            "10+ byte [90-bf] 1 -> 13\n"
-            "11+ byte [80-bf] 1 -> 14\n"
-            "12. byte [80-8f] 0 -> 15\n"
-            "13. byte [f0-f0] 0 -> 7\n"
-            "14. byte [f1-f3] 0 -> 7\n"
-            "15. byte [f4-f4] 0 -> 7\n",
+            "8. byte [80-bf] 0 -> 6\n"
+            "9. byte [80-bf] 0 -> 8\n",
+            forward);
+  EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
+            "4+ byte [c2-df] 0 -> 6\n"
+            "5. byte [80-bf] 0 -> 7\n"
+            "6. match! 0\n"
+            "7+ byte [e0-ef] 0 -> 6\n"
+            "8. byte [80-bf] 0 -> 9\n"
+            "9. byte [f0-f4] 0 -> 6\n",
            reverse);
 }

--- a/extern/re2/re2/testing/dfa_test.cc
+++ b/extern/re2/re2/testing/dfa_test.cc
@ -8,7 +8,9 @@
 #include <vector>

 #include "util/test.h"
+#include "util/flags.h"
 #include "util/logging.h"
+#include "util/malloc_counter.h"
 #include "util/strutil.h"
 #include "re2/prog.h"
 #include "re2/re2.h"
@ -18,12 +20,26 @@

 static const bool UsingMallocCounter = false;

-DEFINE_int32(size, 8, "log2(number of DFA nodes)");
-DEFINE_int32(repeat, 2, "Repetition count.");
-DEFINE_int32(threads, 4, "number of threads");
+DEFINE_FLAG(int, size, 8, "log2(number of DFA nodes)");
+DEFINE_FLAG(int, repeat, 2, "Repetition count.");
+DEFINE_FLAG(int, threads, 4, "number of threads");

 namespace re2 {

+static int state_cache_resets = 0;
+static int search_failures = 0;
+
+struct SetHooks {
+  SetHooks() {
+    hooks::SetDFAStateCacheResetHook([](const hooks::DFAStateCacheReset&) {
+      ++state_cache_resets;
+    });
+    hooks::SetDFASearchFailureHook([](const hooks::DFASearchFailure&) {
+      ++search_failures;
+    });
+  }
+} set_hooks;
+
 // Check that multithreaded access to DFA class works.

 // Helper function: builds entire DFA for prog.
@ -34,7 +50,7 @@ static void DoBuild(Prog* prog) {
 TEST(Multithreaded, BuildEntireDFA) {
  // Create regexp with 2^FLAGS_size states in DFA.
  std::string s = "a";
-  for (int i = 0; i < FLAGS_size; i++)
+  for (int i = 0; i < GetFlag(FLAGS_size); i++)
    s += "[ab]";
  s += "b";
  Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
@ -52,14 +68,14 @@ TEST(Multithreaded, BuildEntireDFA) {
  }

  // Build the DFA simultaneously in a bunch of threads.
-  for (int i = 0; i < FLAGS_repeat; i++) {
+  for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
    Prog* prog = re->CompileToProg(0);
    ASSERT_TRUE(prog != NULL);

    std::vector<std::thread> threads;
-    for (int j = 0; j < FLAGS_threads; j++)
+    for (int j = 0; j < GetFlag(FLAGS_threads); j++)
      threads.emplace_back(DoBuild, prog);
-    for (int j = 0; j < FLAGS_threads; j++)
+    for (int j = 0; j < GetFlag(FLAGS_threads); j++)
      threads[j].join();

    // One more compile, to make sure everything is okay.
@ -106,44 +122,6 @@ TEST(SingleThreaded, BuildEntireDFA) {
  re->Decref();
 }

-// Generates and returns a string over binary alphabet {0,1} that contains
-// all possible binary sequences of length n as subsequences.  The obvious
-// brute force method would generate a string of length n * 2^n, but this
-// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
-// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-// Such a string is useful for testing a DFA.  If you have a DFA
-// where distinct last n bytes implies distinct states, then running on a
-// DeBruijn string causes the DFA to need to create a new state at every
-// position in the input, never reusing any states until it gets to the
-// end of the string.  This is the worst possible case for DFA execution.
-static std::string DeBruijnString(int n) {
-  CHECK_LT(n, static_cast<int>(8*sizeof(int)));
-  CHECK_GT(n, 0);
-
-  std::vector<bool> did(size_t{1}<<n);
-  for (int i = 0; i < 1<<n; i++)
-    did[i] = false;
-
-  std::string s;
-  for (int i = 0; i < n-1; i++)
-    s.append("0");
-  int bits = 0;
-  int mask = (1<<n) - 1;
-  for (int i = 0; i < (1<<n); i++) {
-    bits <<= 1;
-    bits &= mask;
-    if (!did[bits|1]) {
-      bits |= 1;
-      s.append("1");
-    } else {
-      s.append("0");
-    }
-    CHECK(!did[bits]);
-    did[bits] = true;
-  }
-  return s;
-}
-
 // Test that the DFA gets the right result even if it runs
 // out of memory during a search.  The regular expression
 // 0[01]{n}$ matches a binary string of 0s and 1s only if
@ -166,6 +144,8 @@ TEST(SingleThreaded, SearchDFA) {
  // if it can't get a good cache hit rate.)
  // Tell the DFA to trudge along instead.
  Prog::TEST_dfa_should_bail_when_slow(false);
+  state_cache_resets = 0;
+  search_failures = 0;

  // Choice of n is mostly arbitrary, except that:
  //   * making n too big makes the test run for too long.
@ -215,6 +195,8 @@ TEST(SingleThreaded, SearchDFA) {

  // Reset to original behaviour.
  Prog::TEST_dfa_should_bail_when_slow(true);
+  ASSERT_GT(state_cache_resets, 0);
+  ASSERT_EQ(search_failures, 0);
 }

 // Helper function: searches for match, which should match,
@ -237,6 +219,8 @@ static void DoSearch(Prog* prog, const StringPiece& match,

 TEST(Multithreaded, SearchDFA) {
  Prog::TEST_dfa_should_bail_when_slow(false);
+  state_cache_resets = 0;
+  search_failures = 0;

  // Same as single-threaded test above.
  const int n = 18;
@ -259,14 +243,14 @@ TEST(Multithreaded, SearchDFA) {

  // Run the search simultaneously in a bunch of threads.
  // Reuse same flags for Multithreaded.BuildDFA above.
-  for (int i = 0; i < FLAGS_repeat; i++) {
+  for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
    Prog* prog = re->CompileToProg(1<<n);
    ASSERT_TRUE(prog != NULL);

    std::vector<std::thread> threads;
-    for (int j = 0; j < FLAGS_threads; j++)
+    for (int j = 0; j < GetFlag(FLAGS_threads); j++)
      threads.emplace_back(DoSearch, prog, match, no_match);
-    for (int j = 0; j < FLAGS_threads; j++)
+    for (int j = 0; j < GetFlag(FLAGS_threads); j++)
      threads[j].join();

    delete prog;
@ -276,6 +260,8 @@ TEST(Multithreaded, SearchDFA) {

  // Reset to original behaviour.
  Prog::TEST_dfa_should_bail_when_slow(true);
+  ASSERT_GT(state_cache_resets, 0);
+  ASSERT_EQ(search_failures, 0);
 }

 struct ReverseTest {
--- a/extern/re2/re2/testing/dump.cc
+++ b/extern/re2/re2/testing/dump.cc
@ -25,9 +25,6 @@
 #include "re2/stringpiece.h"
 #include "re2/regexp.h"

-// Cause a link error if this file is used outside of testing.
-DECLARE_string(test_tmpdir);
-
 namespace re2 {

 static const char* kOpcodeNames[] = {
@ -154,14 +151,11 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
 }

 std::string Regexp::Dump() {
+  // Make sure that we are being called from a unit test.
+  // Should cause a link error if used outside of testing.
+  CHECK(!::testing::TempDir().empty());
+
  std::string s;
-
-  // Make sure being called from a unit test.
-  if (FLAGS_test_tmpdir.empty()) {
-    LOG(ERROR) << "Cannot use except for testing.";
-    return s;
-  }
-
  DumpRegexpAppending(this, &s);
  return s;
 }
--- a/extern/re2/re2/testing/exhaustive1_test.cc
+++ b/extern/re2/re2/testing/exhaustive1_test.cc
@ -10,8 +10,6 @@
 #include "util/test.h"
 #include "re2/testing/exhaustive_tester.h"

-DECLARE_string(regexp_engines);
-
 namespace re2 {

 // Test simple repetition operators
@ -34,11 +32,8 @@ TEST(Repetition, Capturing) {
    "%s* %s+ %s? %s*? %s+? %s??");
  ExhaustiveTest(3, 2, Split(" ", "a (a) b"), ops,
                 7, Explode("ab"), "(?:%s)", "");
-
-  // This would be a great test, but it runs forever when PCRE is enabled.
-  if (FLAGS_regexp_engines.find("PCRE") == std::string::npos)
-    ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
-                   50, Explode("a"), "(?:%s)", "");
+  ExhaustiveTest(3, 2, Split(" ", "a (a)"), ops,
+                 50, Explode("a"), "(?:%s)", "");
 }

 }  // namespace re2
--- a/extern/re2/re2/testing/exhaustive2_test.cc
+++ b/extern/re2/re2/testing/exhaustive2_test.cc
@ -10,7 +10,6 @@
 #include <vector>

 #include "util/test.h"
-#include "re2/re2.h"
 #include "re2/testing/exhaustive_tester.h"

 namespace re2 {
--- a/extern/re2/re2/testing/exhaustive_tester.cc
+++ b/extern/re2/re2/testing/exhaustive_tester.cc
@ -14,6 +14,7 @@
 #include <stdio.h>

 #include "util/test.h"
+#include "util/flags.h"
 #include "util/logging.h"
 #include "util/strutil.h"
 #include "re2/testing/exhaustive_tester.h"
@ -24,11 +25,11 @@
 #define LOGGING 0
 #endif

-DEFINE_bool(show_regexps, false, "show regexps during testing");
+DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");

-DEFINE_int32(max_bad_regexp_inputs, 1,
-             "Stop testing a regular expression after finding this many "
-             "strings that break it.");
+DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
+            "Stop testing a regular expression after finding this many "
+            "strings that break it.");

 namespace re2 {

@ -62,11 +63,12 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
  for (int i = 0; i < n; i++) {
    if (i > 0)
      printf(" ");
-    if (m[i].begin() == NULL)
+    if (m[i].data() == NULL)
      printf("-");
    else
      printf("%td-%td",
-             m[i].begin() - input.begin(), m[i].end() - input.begin());
+             m[i].begin() - input.begin(),
+             m[i].end() - input.begin());
  }
 }

@ -76,10 +78,11 @@ static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
 void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
  regexps_++;
  std::string regexp = const_regexp;
-  if (!topwrapper_.empty())
+  if (!topwrapper_.empty()) {
    regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
+  }

-  if (FLAGS_show_regexps) {
+  if (GetFlag(FLAGS_show_regexps)) {
    printf("\r%s", regexp.c_str());
    fflush(stdout);
  }
@ -134,7 +137,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
    tests_++;
    if (!tester.TestInput(strgen_.Next())) {
      failures_++;
-      if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
+      if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
        break;
    }
  }
--- a/extern/re2/re2/testing/filtered_re2_test.cc
+++ b/extern/re2/re2/testing/filtered_re2_test.cc
@ -7,6 +7,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <utility>

 #include "util/test.h"
 #include "util/logging.h"
@ -291,4 +292,49 @@ TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
                                 "EmptyStringInStringSetBug", &v));
 }

+TEST(FilteredRE2Test, MoveSemantics) {
+  FilterTestVars v1;
+  int id;
+  v1.f.Add("foo\\d+", v1.opts, &id);
+  EXPECT_EQ(0, id);
+  v1.f.Compile(&v1.atoms);
+  EXPECT_EQ(1, v1.atoms.size());
+  EXPECT_EQ("foo", v1.atoms[0]);
+  v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+  EXPECT_EQ(1, v1.matches.size());
+  EXPECT_EQ(0, v1.matches[0]);
+  v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+  EXPECT_EQ(0, v1.matches.size());
+
+  // The moved-to object should do what the moved-from object did.
+  FilterTestVars v2;
+  v2.f = std::move(v1.f);
+  v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
+  EXPECT_EQ(1, v2.matches.size());
+  EXPECT_EQ(0, v2.matches[0]);
+  v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
+  EXPECT_EQ(0, v2.matches.size());
+
+  // The moved-from object should have been reset and be reusable.
+  v1.f.Add("bar\\d+", v1.opts, &id);
+  EXPECT_EQ(0, id);
+  v1.f.Compile(&v1.atoms);
+  EXPECT_EQ(1, v1.atoms.size());
+  EXPECT_EQ("bar", v1.atoms[0]);
+  v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+  EXPECT_EQ(0, v1.matches.size());
+  v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+  EXPECT_EQ(1, v1.matches.size());
+  EXPECT_EQ(0, v1.matches[0]);
+
+  // Verify that "overwriting" works and also doesn't leak memory.
+  // (The latter will need a leak detector such as LeakSanitizer.)
+  v1.f = std::move(v2.f);
+  v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+  EXPECT_EQ(1, v1.matches.size());
+  EXPECT_EQ(0, v1.matches[0]);
+  v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+  EXPECT_EQ(0, v1.matches.size());
+}
+
 }  //  namespace re2
--- a/extern/re2/re2/testing/null_walker.cc
+++ b/extern/re2/re2/testing/null_walker.cc
@ -13,13 +13,16 @@ namespace re2 {

 class NullWalker : public Regexp::Walker<bool> {
 public:
-  NullWalker() { }
-  bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
-                 bool* child_args, int nchild_args);
+  NullWalker() {}

-  bool ShortVisit(Regexp* re, bool a) {
-    // Should never be called: we use Walk not WalkExponential.
+  virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                         bool* child_args, int nchild_args);
+
+  virtual bool ShortVisit(Regexp* re, bool a) {
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
    LOG(DFATAL) << "NullWalker::ShortVisit called";
+#endif
    return a;
  }

--- a/extern/re2/re2/testing/random_test.cc
+++ b/extern/re2/re2/testing/random_test.cc
@ -9,12 +9,13 @@
 #include <vector>

 #include "util/test.h"
+#include "util/flags.h"
 #include "re2/testing/exhaustive_tester.h"

-DEFINE_int32(regexpseed, 404, "Random regexp seed.");
-DEFINE_int32(regexpcount, 100, "How many random regexps to generate.");
-DEFINE_int32(stringseed, 200, "Random string seed.");
-DEFINE_int32(stringcount, 100, "How many random strings to generate.");
+DEFINE_FLAG(int, regexpseed, 404, "Random regexp seed.");
+DEFINE_FLAG(int, regexpcount, 100, "How many random regexps to generate.");
+DEFINE_FLAG(int, stringseed, 200, "Random string seed.");
+DEFINE_FLAG(int, stringcount, 100, "How many random strings to generate.");

 namespace re2 {

@ -37,8 +38,10 @@ static void RandomTest(int maxatoms, int maxops,

  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
                     maxstrlen, stralphabet, wrapper, "");
-  t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
-  t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount);
+  t.RandomStrings(GetFlag(FLAGS_stringseed),
+                  GetFlag(FLAGS_stringcount));
+  t.GenerateRandom(GetFlag(FLAGS_regexpseed),
+                   GetFlag(FLAGS_regexpcount));
  printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
         t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
  EXPECT_EQ(0, t.failures());
@ -96,4 +99,3 @@ TEST(Random, Complicated) {
 }

 }  // namespace re2
-
--- a/extern/re2/re2/testing/re2_arg_test.cc
+++ b/extern/re2/re2/testing/re2_arg_test.cc
@ -11,6 +11,7 @@
 #include <string.h>

 #include "util/test.h"
+#include "util/logging.h"
 #include "re2/re2.h"

 namespace re2 {
@ -132,4 +133,28 @@ TEST(RE2ArgTest, Uint64Test) {
  PARSE_FOR_TYPE(uint64_t, 5);
 }

+TEST(RE2ArgTest, ParseFromTest) {
+#if !defined(_MSC_VER)
+  struct {
+    bool ParseFrom(const char* str, size_t n) {
+      LOG(INFO) << "str = " << str << ", n = " << n;
+      return true;
+    }
+  } obj1;
+  RE2::Arg arg1(&obj1);
+  EXPECT_TRUE(arg1.Parse("one", 3));
+
+  struct {
+    bool ParseFrom(const char* str, size_t n) {
+      LOG(INFO) << "str = " << str << ", n = " << n;
+      return false;
+    }
+    // Ensure that RE2::Arg works even with overloaded ParseFrom().
+    void ParseFrom(const char* str) {}
+  } obj2;
+  RE2::Arg arg2(&obj2);
+  EXPECT_FALSE(arg2.Parse("two", 3));
+#endif
+}
+
 }  // namespace re2
--- a/extern/re2/re2/testing/re2_test.cc
+++ b/extern/re2/re2/testing/re2_test.cc
@ -12,6 +12,7 @@
 #include <map>
 #include <string>
 #include <utility>
+#include <vector>
 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
 #include <sys/mman.h>
 #include <unistd.h>  /* for sysconf */
@ -223,6 +224,15 @@ TEST(RE2, Extract) {
  ASSERT_EQ(s, "'foo'");
 }

+TEST(RE2, MaxSubmatchTooLarge) {
+  std::string s;
+  ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
+  s = "foo";
+  ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
+  s = "foo";
+  ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
+}
+
 TEST(RE2, Consume) {
  RE2 r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
  std::string word;
@ -473,28 +483,27 @@ TEST(ProgramFanout, BigProgram) {
  RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
  RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");

-  std::map<int, int> histogram;
+  std::vector<int> histogram;

  // 3 is the largest non-empty bucket and has 1 element.
  ASSERT_EQ(3, re1.ProgramFanout(&histogram));
  ASSERT_EQ(1, histogram[3]);

-  // 7 is the largest non-empty bucket and has 10 elements.
-  ASSERT_EQ(7, re10.ProgramFanout(&histogram));
-  ASSERT_EQ(10, histogram[7]);
+  // 6 is the largest non-empty bucket and has 10 elements.
+  ASSERT_EQ(6, re10.ProgramFanout(&histogram));
+  ASSERT_EQ(10, histogram[6]);

-  // 10 is the largest non-empty bucket and has 100 elements.
-  ASSERT_EQ(10, re100.ProgramFanout(&histogram));
-  ASSERT_EQ(100, histogram[10]);
+  // 9 is the largest non-empty bucket and has 100 elements.
+  ASSERT_EQ(9, re100.ProgramFanout(&histogram));
+  ASSERT_EQ(100, histogram[9]);

  // 13 is the largest non-empty bucket and has 1000 elements.
  ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
  ASSERT_EQ(1000, histogram[13]);

-  // 2 is the largest non-empty bucket and has 3 elements.
-  // This differs from the others due to how reverse `.' works.
+  // 2 is the largest non-empty bucket and has 1 element.
  ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
-  ASSERT_EQ(3, histogram[2]);
+  ASSERT_EQ(1, histogram[2]);

  // 5 is the largest non-empty bucket and has 10 elements.
  ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
@ -1232,11 +1241,10 @@ TEST(RE2, DeepRecursion) {
 // Suggested by Josh Hyman.  Failed when SearchOnePass was
 // not implementing case-folding.
 TEST(CaseInsensitive, MatchAndConsume) {
-  std::string result;
  std::string text = "A fish named *Wanda*";
  StringPiece sp(text);
-
-  EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
+  StringPiece result;
+  EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
  EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
 }

@ -1269,38 +1277,43 @@ TEST(RE2, CL8622304) {
  EXPECT_EQ(val, "1,0x2F,030,4,5");
 }

-
 // Check that RE2 returns correct regexp pieces on error.
 // In particular, make sure it returns whole runes
 // and that it always reports invalid UTF-8.
 // Also check that Perl error flag piece is big enough.
 static struct ErrorTest {
  const char *regexp;
-  const char *error;
+  RE2::ErrorCode error_code;
+  const char *error_arg;
 } error_tests[] = {
-  { "ab\\αcd", "\\α" },
-  { "ef\\x☺01", "\\x☺0" },
-  { "gh\\x1☺01", "\\x1☺" },
-  { "ij\\x1", "\\x1" },
-  { "kl\\x", "\\x" },
-  { "uv\\x{0000☺}", "\\x{0000☺" },
-  { "wx\\p{ABC", "\\p{ABC" },
-  { "yz(?smiUX:abc)", "(?smiUX" },   // used to return (?s but the error is X
-  { "aa(?sm☺i", "(?sm☺" },
-  { "bb[abc", "[abc" },
+  { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
+  { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
+  { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
+  { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
+  { "kl\\x", RE2::ErrorBadEscape, "\\x" },
+  { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
+  { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
+  // used to return (?s but the error is X
+  { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
+  { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
+  { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
+  { "abc(def", RE2::ErrorMissingParen, "abc(def" },
+  { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },

-  { "mn\\x1\377", "" },  // no argument string returned for invalid UTF-8
-  { "op\377qr", "" },
-  { "st\\x{00000\377", "" },
-  { "zz\\p{\377}", "" },
-  { "zz\\x{00\377}", "" },
-  { "zz(?P<name\377>abc)", "" },
+  // no argument string returned for invalid UTF-8
+  { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
+  { "op\377qr", RE2::ErrorBadUTF8, "" },
+  { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
+  { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
+  { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
+  { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
 };
-TEST(RE2, ErrorArgs) {
+TEST(RE2, ErrorCodeAndArg) {
  for (size_t i = 0; i < arraysize(error_tests); i++) {
    RE2 re(error_tests[i].regexp, RE2::Quiet);
    EXPECT_FALSE(re.ok());
-    EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
+    EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
+    EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
  }
 }

--- a/extern/re2/re2/testing/regexp_benchmark.cc
+++ b/extern/re2/re2/testing/regexp_benchmark.cc
--- a/extern/re2/re2/testing/regexp_generator.cc
+++ b/extern/re2/re2/testing/regexp_generator.cc
@ -241,7 +241,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
 std::vector<std::string> Explode(const StringPiece& s) {
  std::vector<std::string> v;

-  for (const char *q = s.begin(); q < s.end(); ) {
+  for (const char *q = s.data(); q < s.data() + s.size(); ) {
    const char* p = q;
    Rune r;
    q += chartorune(&r, q);
@ -256,11 +256,11 @@ std::vector<std::string> Explode(const StringPiece& s) {
 std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
  std::vector<std::string> v;

-  if (sep.size() == 0)
+  if (sep.empty())
    return Explode(s);

-  const char *p = s.begin();
-  for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
+  const char *p = s.data();
+  for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) {
    if (StringPiece(q, sep.size()) == sep) {
      v.push_back(std::string(p, q - p));
      p = q + sep.size();
@ -268,8 +268,8 @@ std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
      continue;
    }
  }
-  if (p < s.end())
-    v.push_back(std::string(p, s.end() - p));
+  if (p < s.data() + s.size())
+    v.push_back(std::string(p, s.data() + s.size() - p));
  return v;
 }

--- a/extern/re2/re2/testing/required_prefix_test.cc
+++ b/extern/re2/re2/testing/required_prefix_test.cc
@ -6,6 +6,7 @@

 #include "util/test.h"
 #include "util/logging.h"
+#include "re2/prog.h"
 #include "re2/regexp.h"

 namespace re2 {
@ -19,15 +20,18 @@ struct PrefixTest {
 };

 static PrefixTest tests[] = {
-  // If the regexp is missing a ^, there's no required prefix.
-  { "abc", false },
+  // Empty cases.
  { "", false },
  { "(?m)^", false },
+  { "(?-m)^", false },
+
+  // If the regexp has no ^, there's no required prefix.
+  { "abc", false },

  // If the regexp immediately goes into
  // something not a literal match, there's no required prefix.
-  { "^(abc)", false },
  { "^a*",  false },
+  { "^(abc)", false },

  // Otherwise, it should work.
  { "^abc$", true, "abc", false, "(?-m:$)" },
@ -53,15 +57,15 @@ TEST(RequiredPrefix, SimpleTests) {
      bool f;
      Regexp* s;
      ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
-        << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf")
+        << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
        << " " << re->Dump();
      if (t.return_value) {
        ASSERT_EQ(p, std::string(t.prefix))
-          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
+          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
        ASSERT_EQ(f, t.foldcase)
-          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
+          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
        ASSERT_EQ(s->ToString(), std::string(t.suffix))
-          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
+          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
        s->Decref();
      }
      re->Decref();
@ -69,4 +73,81 @@ TEST(RequiredPrefix, SimpleTests) {
  }
 }

+static PrefixTest for_accel_tests[] = {
+  // Empty cases.
+  { "", false },
+  { "(?m)^", false },
+  { "(?-m)^", false },
+
+  // If the regexp has a ^, there's no required prefix.
+  { "^abc", false },
+
+  // If the regexp immediately goes into
+  // something not a literal match, there's no required prefix.
+  { "a*",  false },
+
+  // Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
+  // capturing groups, but doesn't try to glue prefix fragments together.
+  { "(a?)def", false },
+  { "(ab?)def", true, "a", false },
+  { "(abc?)def", true, "ab", false },
+  { "(()a)def", false },
+  { "((a)b)def", true, "a", false },
+  { "((ab)c)def", true, "ab", false },
+
+  // Otherwise, it should work.
+  { "abc$", true, "abc", false },
+  { "abc", true, "abc", false },
+  { "(?i)abc", true, "abc", true },
+  { "abcd*", true, "abc", false },
+  { "[Aa][Bb]cd*", true, "ab", true },
+  { "ab[Cc]d*", true, "ab", false },
+  { "☺abc", true, "☺abc", false },
+};
+
+TEST(RequiredPrefixForAccel, SimpleTests) {
+  for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
+    const PrefixTest& t = for_accel_tests[i];
+    for (size_t j = 0; j < 2; j++) {
+      Regexp::ParseFlags flags = Regexp::LikePerl;
+      if (j == 0)
+        flags = flags | Regexp::Latin1;
+      Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
+      ASSERT_TRUE(re != NULL) << " " << t.regexp;
+
+      std::string p;
+      bool f;
+      ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
+        << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
+        << " " << re->Dump();
+      if (t.return_value) {
+        ASSERT_EQ(p, std::string(t.prefix))
+          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+        ASSERT_EQ(f, t.foldcase)
+          << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+      }
+      re->Decref();
+    }
+  }
+}
+
+TEST(PrefixAccel, BasicTest) {
+  Regexp* re = Regexp::Parse("abc\\d+", Regexp::LikePerl, NULL);
+  ASSERT_TRUE(re != NULL);
+  Prog* prog = re->CompileToProg(0);
+  ASSERT_TRUE(prog != NULL);
+  for (int i = 0; i < 100; i++) {
+    std::string text(i, 'a');
+    const char* p = reinterpret_cast<const char*>(
+        prog->PrefixAccel(text.data(), text.size()));
+    EXPECT_TRUE(p == NULL);
+    text.append("abc");
+    p = reinterpret_cast<const char*>(
+        prog->PrefixAccel(text.data(), text.size()));
+    EXPECT_EQ(i, p-text.data());
+  }
+  delete prog;
+  re->Decref();
+}
+
 }  // namespace re2
--- a/extern/re2/re2/testing/set_test.cc
+++ b/extern/re2/re2/testing/set_test.cc
@ -5,6 +5,7 @@
 #include <stddef.h>
 #include <string>
 #include <vector>
+#include <utility>

 #include "util/test.h"
 #include "util/logging.h"
@ -201,4 +202,29 @@ TEST(Set, Prefix) {
  ASSERT_EQ(v[0], 0);
 }

+TEST(Set, MoveSemantics) {
+  RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED);
+  ASSERT_EQ(s1.Add("foo\\d+", NULL), 0);
+  ASSERT_EQ(s1.Compile(), true);
+  ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+  ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+
+  // The moved-to object should do what the moved-from object did.
+  RE2::Set s2 = std::move(s1);
+  ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true);
+  ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false);
+
+  // The moved-from object should have been reset and be reusable.
+  ASSERT_EQ(s1.Add("bar\\d+", NULL), 0);
+  ASSERT_EQ(s1.Compile(), true);
+  ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false);
+  ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true);
+
+  // Verify that "overwriting" works and also doesn't leak memory.
+  // (The latter will need a leak detector such as LeakSanitizer.)
+  s1 = std::move(s2);
+  ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+  ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+}
+
 }  // namespace re2
--- a/extern/re2/re2/testing/string_generator.cc
+++ b/extern/re2/re2/testing/string_generator.cc
@ -111,4 +111,31 @@ void StringGenerator::GenerateNULL() {
  hasnext_ = true;
 }

+std::string DeBruijnString(int n) {
+  CHECK_GE(n, 1);
+  CHECK_LE(n, 29);
+  const size_t size = size_t{1} << static_cast<size_t>(n);
+  const size_t mask = size - 1;
+  std::vector<bool> did(size, false);
+  std::string s;
+  s.reserve(static_cast<size_t>(n) + size);
+  for (size_t i = 0; i < static_cast<size_t>(n - 1); i++)
+    s += '0';
+  size_t bits = 0;
+  for (size_t i = 0; i < size; i++) {
+    bits <<= 1;
+    bits &= mask;
+    if (!did[bits | 1]) {
+      bits |= 1;
+      s += '1';
+    } else {
+      s += '0';
+    }
+    CHECK(!did[bits]);
+    did[bits] = true;
+  }
+  CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
+  return s;
+}
+
 }  // namespace re2
--- a/extern/re2/re2/testing/string_generator.h
+++ b/extern/re2/re2/testing/string_generator.h
@ -58,6 +58,19 @@ class StringGenerator {
  StringGenerator& operator=(const StringGenerator&) = delete;
 };

+// Generates and returns a string over binary alphabet {0,1} that contains
+// all possible binary sequences of length n as subsequences.  The obvious
+// brute force method would generate a string of length n * 2^n, but this
+// generates a string of length n-1 + 2^n called a De Bruijn cycle.
+// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
+//
+// Such a string is useful for testing a DFA.  If you have a DFA
+// where distinct last n bytes implies distinct states, then running on a
+// DeBruijn string causes the DFA to need to create a new state at every
+// position in the input, never reusing any states until it gets to the
+// end of the string.  This is the worst possible case for DFA execution.
+std::string DeBruijnString(int n);
+
 }  // namespace re2

 #endif  // RE2_TESTING_STRING_GENERATOR_H_
--- a/extern/re2/re2/testing/tester.cc
+++ b/extern/re2/re2/testing/tester.cc
@ -18,14 +18,15 @@
 #include "re2/re2.h"
 #include "re2/regexp.h"

-DEFINE_bool(dump_prog, false, "dump regexp program");
-DEFINE_bool(log_okay, false, "log successful runs");
-DEFINE_bool(dump_rprog, false, "dump reversed regexp program");
+DEFINE_FLAG(bool, dump_prog, false, "dump regexp program");
+DEFINE_FLAG(bool, log_okay, false, "log successful runs");
+DEFINE_FLAG(bool, dump_rprog, false, "dump reversed regexp program");

-DEFINE_int32(max_regexp_failures, 100,
-             "maximum number of regexp test failures (-1 = unlimited)");
+DEFINE_FLAG(int, max_regexp_failures, 100,
+            "maximum number of regexp test failures (-1 = unlimited)");

-DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test");
+DEFINE_FLAG(std::string, regexp_engines, "",
+            "pattern to select regexp engines to test");

 namespace re2 {

@ -62,11 +63,11 @@ static uint32_t Engines() {
  if (did_parse)
    return cached_engines;

-  if (FLAGS_regexp_engines.empty()) {
+  if (GetFlag(FLAGS_regexp_engines).empty()) {
    cached_engines = ~0;
  } else {
    for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
-      if (FLAGS_regexp_engines.find(EngineName(i)) != std::string::npos)
+      if (GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos)
        cached_engines |= 1<<i;
  }

@ -85,6 +86,20 @@ static uint32_t Engines() {

 // The result of running a match.
 struct TestInstance::Result {
+  Result()
+      : skipped(false),
+        matched(false),
+        untrusted(false),
+        have_submatch(false),
+        have_submatch0(false) {
+    ClearSubmatch();
+  }
+
+  void ClearSubmatch() {
+    for (int i = 0; i < kMaxSubmatch; i++)
+      submatch[i] = StringPiece();
+  }
+
  bool skipped;         // test skipped: wasn't applicable
  bool matched;         // found a match
  bool untrusted;       // don't really trust the answer
@ -99,10 +114,11 @@ typedef TestInstance::Result Result;
 // where a and b are the starting and ending offsets of s in text.
 static std::string FormatCapture(const StringPiece& text,
                                 const StringPiece& s) {
-  if (s.begin() == NULL)
+  if (s.data() == NULL)
    return "(?,?)";
  return StringPrintf("(%td,%td)",
-                      s.begin() - text.begin(), s.end() - text.begin());
+                      s.begin() - text.begin(),
+                      s.end() - text.begin());
 }

 // Returns whether text contains non-ASCII (>= 0x80) bytes.
@ -198,7 +214,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
    error_ = true;
    return;
  }
-  if (FLAGS_dump_prog) {
+  if (GetFlag(FLAGS_dump_prog)) {
    LOG(INFO) << "Prog for "
              << " regexp "
              << CEscape(regexp_str_)
@ -216,7 +232,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
      error_ = true;
      return;
    }
-    if (FLAGS_dump_rprog)
+    if (GetFlag(FLAGS_dump_rprog))
      LOG(INFO) << rprog_->Dump();
  }

@ -290,9 +306,6 @@ void TestInstance::RunSearch(Engine type,
                             const StringPiece& orig_context,
                             Prog::Anchor anchor,
                             Result* result) {
-  // Result is not trivial, so we cannot freely clear it with memset(3),
-  // but zeroing objects like so is safe and expedient for our purposes.
-  memset(reinterpret_cast<void*>(result), 0, sizeof *result);
  if (regexp_ == NULL) {
    result->skipped = true;
    return;
@ -476,7 +489,7 @@ void TestInstance::RunSearch(Engine type,
  }

  if (!result->matched)
-    memset(result->submatch, 0, sizeof result->submatch);
+    result->ClearSubmatch();
 }

 // Checks whether r is okay given that correct is the right answer.
@ -489,7 +502,7 @@ static bool ResultOkay(const Result& r, const Result& correct) {
    return false;
  if (r.have_submatch || r.have_submatch0) {
    for (int i = 0; i < kMaxSubmatch; i++) {
-      if (correct.submatch[i].begin() != r.submatch[i].begin() ||
+      if (correct.submatch[i].data() != r.submatch[i].data() ||
          correct.submatch[i].size() != r.submatch[i].size())
        return false;
      if (!r.have_submatch)
@ -528,7 +541,7 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
    Result r;
    RunSearch(i, text, context, anchor, &r);
    if (ResultOkay(r, correct)) {
-      if (FLAGS_log_okay)
+      if (GetFlag(FLAGS_log_okay))
        LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
      continue;
    }
@ -555,8 +568,8 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
      }
    }
    for (int i = 0; i < 1+num_captures_; i++) {
-      if (r.submatch[i].begin() != correct.submatch[i].begin() ||
-          r.submatch[i].end() != correct.submatch[i].end()) {
+      if (r.submatch[i].data() != correct.submatch[i].data() ||
+          r.submatch[i].size() != correct.submatch[i].size()) {
        LOG(INFO) <<
          StringPrintf("   $%d: should be %s is %s",
                       i,
@ -571,7 +584,10 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
  }

  if (!all_okay) {
-    if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0)
+    // This will be initialised once (after flags have been initialised)
+    // and that is desirable because we want to enforce a global limit.
+    static int max_regexp_failures = GetFlag(FLAGS_max_regexp_failures);
+    if (max_regexp_failures > 0 && --max_regexp_failures == 0)
      LOG(QFATAL) << "Too many regexp failures.";
  }

@ -640,7 +656,7 @@ static Prog::Anchor anchors[] = {

 bool Tester::TestInput(const StringPiece& text) {
  bool okay = TestInputInContext(text, text);
-  if (text.size() > 0) {
+  if (!text.empty()) {
    StringPiece sp;
    sp = text;
    sp.remove_prefix(1);
--- a/extern/re2/re2/unicode.py
+++ b/extern/re2/re2/unicode.py
@ -13,7 +13,7 @@ import re
 from six.moves import urllib

 # Directory or URL where Unicode tables reside.
-_UNICODE_DIR = "https://www.unicode.org/Public/12.1.0/ucd"
+_UNICODE_DIR = "https://www.unicode.org/Public/13.0.0/ucd"

 # Largest valid Unicode code value.
 _RUNE_MAX = 0x10FFFF
--- a/extern/re2/re2/unicode_casefold.cc
+++ b/extern/re2/re2/unicode_casefold.cc
@ -7,7 +7,7 @@
 namespace re2 {


-// 1381 groups, 2792 pairs, 356 ranges
+// 1384 groups, 2798 pairs, 358 ranges
 const CaseFold unicode_casefold[] = {
 	{ 65, 90, 32 },
 	{ 97, 106, -32 },
@ -349,6 +349,8 @@ const CaseFold unicode_casefold[] = {
 	{ 42948, 42948, -48 },
 	{ 42949, 42949, -42307 },
 	{ 42950, 42950, -35384 },
+	{ 42951, 42954, OddEven },
+	{ 42997, 42998, OddEven },
 	{ 43859, 43859, -928 },
 	{ 43888, 43967, -38864 },
 	{ 65313, 65338, 32 },
@ -366,9 +368,9 @@ const CaseFold unicode_casefold[] = {
 	{ 125184, 125217, 34 },
 	{ 125218, 125251, -34 },
 };
-const int num_unicode_casefold = 356;
+const int num_unicode_casefold = 358;

-// 1381 groups, 1411 pairs, 198 ranges
+// 1384 groups, 1414 pairs, 200 ranges
 const CaseFold unicode_tolower[] = {
 	{ 65, 90, 32 },
 	{ 181, 181, 775 },
@ -560,6 +562,8 @@ const CaseFold unicode_tolower[] = {
 	{ 42948, 42948, -48 },
 	{ 42949, 42949, -42307 },
 	{ 42950, 42950, -35384 },
+	{ 42951, 42953, OddEvenSkip },
+	{ 42997, 42997, OddEven },
 	{ 43888, 43967, -38864 },
 	{ 65313, 65338, 32 },
 	{ 66560, 66599, 40 },
@ -569,7 +573,7 @@ const CaseFold unicode_tolower[] = {
 	{ 93760, 93791, 32 },
 	{ 125184, 125217, 34 },
 };
-const int num_unicode_tolower = 198;
+const int num_unicode_tolower = 200;



--- a/extern/re2/re2/unicode_groups.cc
+++ b/extern/re2/re2/unicode_groups.cc
--- a/extern/re2/re2/walker-inl.h
+++ b/extern/re2/re2/walker-inl.h
@ -89,7 +89,7 @@ template<typename T> class Regexp::Walker {

 private:
  // Walk state for the entire traversal.
-  std::stack<WalkState<T> >* stack_;
+  std::stack<WalkState<T>> stack_;
  bool stopped_early_;
  int max_visits_;

@ -119,7 +119,7 @@ template<typename T> T Regexp::Walker<T>::Copy(T arg) {

 // State about a single level in the traversal.
 template<typename T> struct WalkState {
-  WalkState<T>(Regexp* re, T parent)
+  WalkState(Regexp* re, T parent)
    : re(re),
      n(-1),
      parent_arg(parent),
@ -134,24 +134,22 @@ template<typename T> struct WalkState {
 };

 template<typename T> Regexp::Walker<T>::Walker() {
-  stack_ = new std::stack<WalkState<T> >;
  stopped_early_ = false;
 }

 template<typename T> Regexp::Walker<T>::~Walker() {
  Reset();
-  delete stack_;
 }

 // Clears the stack.  Should never be necessary, since
 // Walk always enters and exits with an empty stack.
 // Logs DFATAL if stack is not already clear.
 template<typename T> void Regexp::Walker<T>::Reset() {
-  if (stack_ && stack_->size() > 0) {
+  if (!stack_.empty()) {
    LOG(DFATAL) << "Stack not empty.";
-    while (stack_->size() > 0) {
-      delete stack_->top().child_args;
-      stack_->pop();
+    while (!stack_.empty()) {
+      delete[] stack_.top().child_args;
+      stack_.pop();
    }
  }
 }
@ -165,12 +163,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
    return top_arg;
  }

-  stack_->push(WalkState<T>(re, top_arg));
+  stack_.push(WalkState<T>(re, top_arg));

  WalkState<T>* s;
  for (;;) {
    T t;
-    s = &stack_->top();
+    s = &stack_.top();
    Regexp* re = s->re;
    switch (s->n) {
      case -1: {
@ -201,7 +199,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
              s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
              s->n++;
            } else {
-              stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
+              stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
            }
            continue;
          }
@ -214,12 +212,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
      }
    }

-    // We've finished stack_->top().
+    // We've finished stack_.top().
    // Update next guy down.
-    stack_->pop();
-    if (stack_->size() == 0)
+    stack_.pop();
+    if (stack_.empty())
      return t;
-    s = &stack_->top();
+    s = &stack_.top();
    if (s->child_args != NULL)
      s->child_args[s->n] = t;
    else
--- a/extern/re2/re2_test.bzl
+++ b/extern/re2/re2_test.bzl
@ -1,12 +0,0 @@
-# Copyright 2009 The RE2 Authors.  All Rights Reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-# Defines a Bazel macro that instantiates a native cc_test rule for an RE2 test.
-def re2_test(name, deps=[], size="medium"):
-  native.cc_test(
-      name=name,
-      srcs=["re2/testing/%s.cc" % (name)],
-      deps=[":test"] + deps,
-      size=size,
-  )
--- a/extern/re2/runtests
+++ b/extern/re2/runtests
--- a/extern/re2/testinstall.cc
+++ b/extern/re2/testinstall.cc
@ -2,23 +2,26 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-#include <re2/re2.h>
-#include <re2/filtered_re2.h>
 #include <stdio.h>
+#include <re2/filtered_re2.h>
+#include <re2/re2.h>

-int main(void) {
-	re2::FilteredRE2 f;
-	int id;
-	f.Add("a.*b.*c", RE2::DefaultOptions, &id);
-	std::vector<std::string> v;
-	f.Compile(&v);
-	std::vector<int> ids;
-	f.FirstMatch("abbccc", ids);
+int main() {
+  re2::FilteredRE2 f;
+  int id;
+  f.Add("a.*b.*c", RE2::DefaultOptions, &id);
+  std::vector<std::string> v;
+  f.Compile(&v);
+  std::vector<int> ids;
+  f.FirstMatch("abbccc", ids);

-	if(RE2::FullMatch("axbyc", "a.*b.*c")) {
-		printf("PASS\n");
-		return 0;
-	}
-	printf("FAIL\n");
-	return 2;
+  int n;
+  if (RE2::FullMatch("axbyc", "a.*b.*c") &&
+      RE2::PartialMatch("foo123bar", "(\\d+)", &n) && n == 123) {
+    printf("PASS\n");
+    return 0;
+  }
+
+  printf("FAIL\n");
+  return 2;
 }
--- a/extern/re2/util/benchmark.cc
+++ b/extern/re2/util/benchmark.cc
@ -7,155 +7,125 @@
 #include <stdlib.h>
 #include <algorithm>
 #include <chrono>
-#include <thread>

-#include "util/util.h"
-#include "util/flags.h"
 #include "util/benchmark.h"
+#include "util/flags.h"
 #include "re2/re2.h"

-DEFINE_string(test_tmpdir, "/var/tmp", "temp directory");
-
 #ifdef _WIN32
 #define snprintf _snprintf
 #endif

-using testing::Benchmark;
+using ::testing::Benchmark;

 static Benchmark* benchmarks[10000];
 static int nbenchmarks;

 void Benchmark::Register() {
-	benchmarks[nbenchmarks] = this;
-	if(lo < 1)
-		lo = 1;
-	if(hi < lo)
-		hi = lo;
-	nbenchmarks++;
+  lo_ = std::max(1, lo_);
+  hi_ = std::max(lo_, hi_);
+  benchmarks[nbenchmarks++] = this;
 }

 static int64_t nsec() {
-	return std::chrono::duration_cast<std::chrono::nanoseconds>(
-		std::chrono::steady_clock::now().time_since_epoch()).count();
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(
+             std::chrono::steady_clock::now().time_since_epoch())
+      .count();
 }

-static int64_t bytes;
-static int64_t ns;
 static int64_t t0;
+static int64_t ns;
+static int64_t bytes;
 static int64_t items;

-void SetBenchmarkBytesProcessed(int64_t x) {
-	bytes = x;
+void StartBenchmarkTiming() {
+  if (t0 == 0) {
+    t0 = nsec();
+  }
 }

 void StopBenchmarkTiming() {
-	if(t0 != 0)
-		ns += nsec() - t0;
-	t0 = 0;
+  if (t0 != 0) {
+    ns += nsec() - t0;
+    t0 = 0;
+  }
 }

-void StartBenchmarkTiming() {
-	if(t0 == 0)
-		t0 = nsec();
-}
+void SetBenchmarkBytesProcessed(int64_t b) { bytes = b; }

-void SetBenchmarkItemsProcessed(int n) {
-	items = n;
-}
+void SetBenchmarkItemsProcessed(int64_t i) { items = i; }

-void BenchmarkMemoryUsage() {
-	// TODO(rsc): Implement.
-}
-
-int NumCPUs() {
-	return static_cast<int>(std::thread::hardware_concurrency());
-}
-
-static void runN(Benchmark *b, int n, int siz) {
-	bytes = 0;
-	items = 0;
-	ns = 0;
-	t0 = nsec();
-	if(b->fn)
-		b->fn(n);
-	else if(b->fnr)
-		b->fnr(n, siz);
-	else {
-		fprintf(stderr, "%s: missing function\n", b->name);
-		abort();
-	}
-	if(t0 != 0)
-		ns += nsec() - t0;
+static void RunFunc(Benchmark* b, int iters, int arg) {
+  t0 = nsec();
+  ns = 0;
+  bytes = 0;
+  items = 0;
+  b->func()(iters, arg);
+  StopBenchmarkTiming();
 }

 static int round(int n) {
-	int base = 1;
-	
-	while(base*10 < n)
-		base *= 10;
-	if(n < 2*base)
-		return 2*base;
-	if(n < 5*base)
-		return 5*base;
-	return 10*base;
+  int base = 1;
+  while (base * 10 < n) base *= 10;
+  if (n < 2 * base) return 2 * base;
+  if (n < 5 * base) return 5 * base;
+  return 10 * base;
 }

-void RunBench(Benchmark* b, int nthread, int siz) {
-	int n, last;
+static void RunBench(Benchmark* b, int arg) {
+  int iters, last;

-	// TODO(rsc): Threaded benchmarks.
-	if(nthread != 1)
-		return;
-	
-	// run once in case it's expensive
-	n = 1;
-	runN(b, n, siz);
-	while(ns < (int)1e9 && n < (int)1e9) {
-		last = n;
-		if(ns/n == 0)
-			n = (int)1e9;
-		else
-			n = (int)1e9 / static_cast<int>(ns/n);
-		
-		n = std::max(last+1, std::min(n+n/2, 100*last));
-		n = round(n);
-		runN(b, n, siz);
-	}
-	
-	char mb[100];
-	char suf[100];
-	mb[0] = '\0';
-	suf[0] = '\0';
-	if(ns > 0 && bytes > 0)
-		snprintf(mb, sizeof mb, "\t%7.2f MB/s", ((double)bytes/1e6)/((double)ns/1e9));
-	if(b->fnr || b->lo != b->hi) {
-		if(siz >= (1<<20))
-			snprintf(suf, sizeof suf, "/%dM", siz/(1<<20));
-		else if(siz >= (1<<10))
-			snprintf(suf, sizeof suf, "/%dK", siz/(1<<10));
-		else
-			snprintf(suf, sizeof suf, "/%d", siz);
-	}
-	printf("%s%s\t%8lld\t%10lld ns/op%s\n", b->name, suf, (long long)n, (long long)ns/n, mb);
-	fflush(stdout);
+  // Run once just in case it's expensive.
+  iters = 1;
+  RunFunc(b, iters, arg);
+  while (ns < (int)1e9 && iters < (int)1e9) {
+    last = iters;
+    if (ns / iters == 0) {
+      iters = (int)1e9;
+    } else {
+      iters = (int)1e9 / static_cast<int>(ns / iters);
+    }
+    iters = std::max(last + 1, std::min(iters + iters / 2, 100 * last));
+    iters = round(iters);
+    RunFunc(b, iters, arg);
+  }
+
+  char mb[100];
+  char suf[100];
+  mb[0] = '\0';
+  suf[0] = '\0';
+  if (ns > 0 && bytes > 0)
+    snprintf(mb, sizeof mb, "\t%7.2f MB/s",
+             ((double)bytes / 1e6) / ((double)ns / 1e9));
+  if (b->has_arg()) {
+    if (arg >= (1 << 20)) {
+      snprintf(suf, sizeof suf, "/%dM", arg / (1 << 20));
+    } else if (arg >= (1 << 10)) {
+      snprintf(suf, sizeof suf, "/%dK", arg / (1 << 10));
+    } else {
+      snprintf(suf, sizeof suf, "/%d", arg);
+    }
+  }
+  printf("%s%s\t%8d\t%10lld ns/op%s\n", b->name(), suf, iters,
+         (long long)ns / iters, mb);
+  fflush(stdout);
 }

-static int match(const char* name, int argc, const char** argv) {
-	if(argc == 1)
-		return 1;
-	for(int i = 1; i < argc; i++)
-		if(RE2::PartialMatch(name, argv[i]))
-			return 1;
-	return 0;
+static bool WantBench(const char* name, int argc, const char** argv) {
+  if (argc == 1) return true;
+  for (int i = 1; i < argc; i++) {
+    if (RE2::PartialMatch(name, argv[i]))
+      return true;
+  }
+  return false;
 }

 int main(int argc, const char** argv) {
-	for(int i = 0; i < nbenchmarks; i++) {
-		Benchmark* b = benchmarks[i];
-		if(match(b->name, argc, argv))
-			for(int j = b->threadlo; j <= b->threadhi; j++)
-				for(int k = std::max(b->lo, 1); k <= std::max(b->hi, 1); k<<=1)
-					RunBench(b, j, k);
-	}
+  for (int i = 0; i < nbenchmarks; i++) {
+    Benchmark* b = benchmarks[i];
+    if (!WantBench(b->name(), argc, argv))
+      continue;
+    for (int arg = b->lo(); arg <= b->hi(); arg <<= 1)
+      RunBench(b, arg);
+  }
 }
-
--- a/extern/re2/util/benchmark.h
+++ b/extern/re2/util/benchmark.h
@ -6,38 +6,151 @@
 #define UTIL_BENCHMARK_H_

 #include <stdint.h>
+#include <functional>
+
+#include "util/logging.h"
+#include "util/util.h"
+
+// Globals for the old benchmark API.
+void StartBenchmarkTiming();
+void StopBenchmarkTiming();
+void SetBenchmarkBytesProcessed(int64_t b);
+void SetBenchmarkItemsProcessed(int64_t i);
+
+namespace benchmark {
+
+// The new benchmark API implemented as a layer over the old benchmark API.
+// (Please refer to https://github.com/google/benchmark for documentation.)
+class State {
+ private:
+  class Iterator {
+   public:
+    // Benchmark code looks like this:
+    //
+    //   for (auto _ : state) {
+    //     // ...
+    //   }
+    //
+    // We try to avoid compiler warnings about such variables being unused.
+    struct ATTRIBUTE_UNUSED Value {};
+
+    explicit Iterator(int64_t iters) : iters_(iters) {}
+
+    bool operator!=(const Iterator& that) const {
+      if (iters_ != that.iters_) {
+        return true;
+      } else {
+        // We are about to stop the loop, so stop timing.
+        StopBenchmarkTiming();
+        return false;
+      }
+    }
+
+    Value operator*() const {
+      return Value();
+    }
+
+    Iterator& operator++() {
+      --iters_;
+      return *this;
+    }
+
+   private:
+    int64_t iters_;
+  };
+
+ public:
+  explicit State(int64_t iters)
+      : iters_(iters), arg_(0), has_arg_(false) {}
+
+  State(int64_t iters, int64_t arg)
+      : iters_(iters), arg_(arg), has_arg_(true) {}
+
+  Iterator begin() {
+    // We are about to start the loop, so start timing.
+    StartBenchmarkTiming();
+    return Iterator(iters_);
+  }
+
+  Iterator end() {
+    return Iterator(0);
+  }
+
+  void SetBytesProcessed(int64_t b) { SetBenchmarkBytesProcessed(b); }
+  void SetItemsProcessed(int64_t i) { SetBenchmarkItemsProcessed(i); }
+  int64_t iterations() const { return iters_; }
+  // Pretend to support multiple arguments.
+  int64_t range(int pos) const { CHECK(has_arg_); return arg_; }
+
+ private:
+  int64_t iters_;
+  int64_t arg_;
+  bool has_arg_;
+
+  State(const State&) = delete;
+  State& operator=(const State&) = delete;
+};
+
+}  // namespace benchmark

 namespace testing {
-struct Benchmark {
-  const char* name;
-  void (*fn)(int);
-  void (*fnr)(int, int);
-  int lo;
-  int hi;
-  int threadlo;
-  int threadhi;

+class Benchmark {
+ public:
+  Benchmark(const char* name, void (*func)(benchmark::State&))
+      : name_(name),
+        func_([func](int iters, int arg) {
+          benchmark::State state(iters);
+          func(state);
+        }),
+        lo_(0),
+        hi_(0),
+        has_arg_(false) {
+    Register();
+  }
+
+  Benchmark(const char* name, void (*func)(benchmark::State&), int lo, int hi)
+      : name_(name),
+        func_([func](int iters, int arg) {
+          benchmark::State state(iters, arg);
+          func(state);
+        }),
+        lo_(lo),
+        hi_(hi),
+        has_arg_(true) {
+    Register();
+  }
+
+  // Pretend to support multiple threads.
+  Benchmark* ThreadRange(int lo, int hi) { return this; }
+
+  const char* name() const { return name_; }
+  const std::function<void(int, int)>& func() const { return func_; }
+  int lo() const { return lo_; }
+  int hi() const { return hi_; }
+  bool has_arg() const { return has_arg_; }
+
+ private:
  void Register();
-  Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); }
-  Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); }
-  void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; }
-  Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; }
+
+  const char* name_;
+  std::function<void(int, int)> func_;
+  int lo_;
+  int hi_;
+  bool has_arg_;
+
+  Benchmark(const Benchmark&) = delete;
+  Benchmark& operator=(const Benchmark&) = delete;
 };
+
 }  // namespace testing

-void SetBenchmarkBytesProcessed(int64_t);
-void StopBenchmarkTiming();
-void StartBenchmarkTiming();
-void BenchmarkMemoryUsage();
-void SetBenchmarkItemsProcessed(int);
+#define BENCHMARK(f)                     \
+  ::testing::Benchmark* _benchmark_##f = \
+      (new ::testing::Benchmark(#f, f))

-int NumCPUs();
-
-#define BENCHMARK(f) \
-	::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f))
-
-#define BENCHMARK_RANGE(f, lo, hi) \
-	::testing::Benchmark* _benchmark_##f = \
-	(new ::testing::Benchmark(#f, f, lo, hi))
+#define BENCHMARK_RANGE(f, lo, hi)       \
+  ::testing::Benchmark* _benchmark_##f = \
+      (new ::testing::Benchmark(#f, f, lo, hi))

 #endif  // UTIL_BENCHMARK_H_
--- a/extern/re2/util/flags.h
+++ b/extern/re2/util/flags.h
@ -10,20 +10,17 @@
 // If you want to do that, see
 // https://gflags.github.io/gflags/

-#include <stdint.h>
-
-#define DEFINE_flag(type, name, deflt, desc) \
+#define DEFINE_FLAG(type, name, deflt, desc) \
 	namespace re2 { type FLAGS_##name = deflt; }

-#define DECLARE_flag(type, name) \
+#define DECLARE_FLAG(type, name) \
 	namespace re2 { extern type FLAGS_##name; }

-#define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc)
-#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32_t, name, deflt, desc)
-#define DEFINE_string(name, deflt, desc) DEFINE_flag(std::string, name, deflt, desc)
-
-#define DECLARE_bool(name) DECLARE_flag(bool, name)
-#define DECLARE_int32(name) DECLARE_flag(int32_t, name)
-#define DECLARE_string(name) DECLARE_flag(std::string, name)
+namespace re2 {
+template <typename T>
+T GetFlag(const T& flag) {
+  return flag;
+}
+}  // namespace re2

 #endif  // UTIL_FLAGS_H_
--- a/extern/re2/util/malloc_counter.h
+++ b/extern/re2/util/malloc_counter.h
@ -0,0 +1,19 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MALLOC_COUNTER_H_
+#define UTIL_MALLOC_COUNTER_H_
+
+namespace testing {
+class MallocCounter {
+ public:
+  MallocCounter(int x) {}
+  static const int THIS_THREAD_ONLY = 0;
+  long long HeapGrowth() { return 0; }
+  long long PeakHeapGrowth() { return 0; }
+  void Reset() {}
+};
+}  // namespace testing
+
+#endif  // UTIL_MALLOC_COUNTER_H_
--- a/extern/re2/util/mutex.h
+++ b/extern/re2/util/mutex.h
@ -10,7 +10,13 @@
 * You should assume the locks are *not* re-entrant.
 */

-#if !defined(_WIN32)
+#ifdef _WIN32
+// Requires Windows Vista or Windows Server 2008 at minimum.
+#include <windows.h>
+#if defined(WINVER) && WINVER >= 0x0600
+#define MUTEX_IS_WIN32_SRWLOCK
+#endif
+#else
 #ifndef _POSIX_C_SOURCE
 #define _POSIX_C_SOURCE 200809L
 #endif
@ -20,7 +26,9 @@
 #endif
 #endif

-#if defined(MUTEX_IS_PTHREAD_RWLOCK)
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
+typedef SRWLOCK MutexType;
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
 #include <pthread.h>
 #include <stdlib.h>
 typedef pthread_rwlock_t MutexType;
@ -56,7 +64,16 @@ class Mutex {
  Mutex& operator=(const Mutex&) = delete;
 };

-#if defined(MUTEX_IS_PTHREAD_RWLOCK)
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
+
+Mutex::Mutex()             { InitializeSRWLock(&mutex_); }
+Mutex::~Mutex()            { }
+void Mutex::Lock()         { AcquireSRWLockExclusive(&mutex_); }
+void Mutex::Unlock()       { ReleaseSRWLockExclusive(&mutex_); }
+void Mutex::ReaderLock()   { AcquireSRWLockShared(&mutex_); }
+void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
+
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)

 #define SAFE_PTHREAD(fncall)    \
  do {                          \
--- a/extern/re2/util/pcre.cc
+++ b/extern/re2/util/pcre.cc
@ -22,9 +22,7 @@
 #include "util/strutil.h"

 // Silence warnings about the wacky formatting in the operator() functions.
-// Note that we test for Clang first because it defines __GNUC__ as well.
-#if defined(__clang__)
-#elif defined(__GNUC__) && __GNUC__ >= 6
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
 #pragma GCC diagnostic ignored "-Wmisleading-indentation"
 #endif

@ -35,9 +33,10 @@
 // not exceed main thread stacks.  Note that other threads
 // often have smaller stacks, and therefore tightening
 // regexp_stack_limit may frequently be necessary.
-DEFINE_int32(regexp_stack_limit, 256<<10, "default PCRE stack limit (bytes)");
-DEFINE_int32(regexp_match_limit, 1000000,
-             "default PCRE match limit (function calls)");
+DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
+            "default PCRE stack limit (bytes)");
+DEFINE_FLAG(int, regexp_match_limit, 1000000,
+            "default PCRE match limit (function calls)");

 #ifndef USEPCRE

@ -523,12 +522,12 @@ int PCRE::TryMatch(const StringPiece& text,

  int match_limit = match_limit_;
  if (match_limit <= 0) {
-    match_limit = FLAGS_regexp_match_limit;
+    match_limit = GetFlag(FLAGS_regexp_match_limit);
  }

  int stack_limit = stack_limit_;
  if (stack_limit <= 0) {
-    stack_limit = FLAGS_regexp_stack_limit;
+    stack_limit = GetFlag(FLAGS_regexp_stack_limit);
  }

  pcre_extra extra = { 0 };
@ -977,32 +976,7 @@ static bool parse_double_float(const char* str, size_t n, bool isfloat,
  } else {
    r = strtod(buf, &end);
  }
-  if (end != buf + n) {
-#ifdef _WIN32
-    // Microsoft's strtod() doesn't handle inf and nan, so we have to
-    // handle it explicitly.  Speed is not important here because this
-    // code is only called in unit tests.
-    bool pos = true;
-    const char* i = buf;
-    if ('-' == *i) {
-      pos = false;
-      ++i;
-    } else if ('+' == *i) {
-      ++i;
-    }
-    if (0 == _stricmp(i, "inf") || 0 == _stricmp(i, "infinity")) {
-      r = std::numeric_limits<double>::infinity();
-      if (!pos)
-        r = -r;
-    } else if (0 == _stricmp(i, "nan")) {
-      r = std::numeric_limits<double>::quiet_NaN();
-    } else {
-      return false;
-    }
-#else
-    return false;   // Leftover junk
-#endif
-  }
+  if (end != buf + n) return false;   // Leftover junk
  if (errno) return false;
  if (dest == NULL) return true;
  if (isfloat) {
--- a/extern/re2/util/pcre.h
+++ b/extern/re2/util/pcre.h
@ -555,7 +555,7 @@ class PCRE_Options {
 // Hex/Octal/Binary?

 // Special class for parsing into objects that define a ParseFrom() method
-template <class T>
+template <typename T>
 class _PCRE_MatchObject {
 public:
  static inline bool Parse(const char* str, size_t n, void* dest) {
@ -600,9 +600,9 @@ class PCRE::Arg {
 #undef MAKE_PARSER

  // Generic constructor
-  template <class T> Arg(T*, Parser parser);
+  template <typename T> Arg(T*, Parser parser);
  // Generic constructor template
-  template <class T> Arg(T* p)
+  template <typename T> Arg(T* p)
    : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
  }

--- a/Show More
+++ b/Show More