should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/third_party/re2/AUTHORS
+++ b/external/duckdb/third_party/re2/AUTHORS
@@ -0,0 +1,13 @@
+# This is the official list of RE2 authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+
+# Names should be added to this file as
+#	Name or Organization <email address>
+# The email address is not required for organizations.
+
+# Please keep the list sorted.
+
+Google Inc.
+Samsung Electronics
+Stefano Rivera <stefano.rivera@gmail.com>
--- a/external/duckdb/third_party/re2/CMakeLists.txt
+++ b/external/duckdb/third_party/re2/CMakeLists.txt
@@ -0,0 +1,104 @@
+# Copyright 2015 The RE2 Authors.  All Rights Reserved. Use of this source code
+# is governed by a BSD-style license that can be found in the LICENSE file.
+
+cmake_minimum_required(VERSION 3.5...3.29)
+
+if(POLICY CMP0048)
+  cmake_policy(SET CMP0048 NEW)
+endif()
+
+if(POLICY CMP0063)
+  cmake_policy(SET CMP0063 NEW)
+endif()
+
+project(RE2 CXX)
+
+set(CMAKE_CXX_VISIBILITY_PRESET hidden)
+
+include(CTest)
+
+# CMake seems to have no way to enable/disable testing per subproject, so we
+# provide an option similar to BUILD_TESTING, but just for RE2.
+option(RE2_BUILD_TESTING "enable testing for RE2" OFF)
+
+set(EXTRA_TARGET_LINK_LIBRARIES)
+
+if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+  if(MSVC_VERSION LESS 1900)
+    message(FATAL_ERROR "you need Visual Studio 2015 or later")
+  endif()
+  if(BUILD_SHARED_LIBS)
+    # See http://www.kitware.com/blog/home/post/939 for details.
+    cmake_minimum_required(VERSION 3.4)
+    set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
+  endif()
+  # CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX, so we
+  # disable various warnings that aren't particularly helpful.
+  add_compile_options(/wd4100
+                      /wd4201
+                      /wd4456
+                      /wd4457
+                      /wd4702
+                      /wd4815)
+  # Without a byte order mark (BOM), Visual Studio assumes that the source file
+  # is encoded using the current user code page, so we specify UTF-8.
+  add_compile_options(/utf-8)
+elseif(CYGWIN OR MINGW)
+  # See https://stackoverflow.com/questions/38139631 for details.
+  add_compile_options(-std=gnu++11)
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+  add_compile_options(-std=c++11)
+endif()
+
+add_definitions(-DRE2_ON_VALGRIND)
+
+if(WIN32)
+  add_definitions(-DUNICODE
+                  -D_UNICODE
+                  -DSTRICT
+                  -DNOMINMAX)
+  add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS)
+elseif(UNIX)
+  # add_compile_options(-pthread) list(APPEND EXTRA_TARGET_LINK_LIBRARIES
+  # -pthread)
+endif()
+
+set(RE2_SOURCES
+
+        re2/bitmap256.cc
+        re2/compile.cc
+    re2/bitstate.cc
+    re2/dfa.cc
+    re2/filtered_re2.cc
+    re2/mimics_pcre.cc
+    re2/nfa.cc
+    re2/onepass.cc
+    re2/parse.cc
+    re2/perl_groups.cc
+    re2/prefilter.cc
+    re2/prefilter_tree.cc
+    re2/prog.cc
+    re2/re2.cc
+    re2/regexp.cc
+    re2/set.cc
+    re2/simplify.cc
+    re2/stringpiece.cc
+    re2/tostring.cc
+    re2/unicode_casefold.cc
+    re2/unicode_groups.cc
+    util/rune.cc
+    util/strutil.cc
+)
+
+add_library(duckdb_re2 STATIC ${RE2_SOURCES})
+
+target_include_directories(
+  duckdb_re2
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
+
+install(TARGETS duckdb_re2
+        EXPORT "${DUCKDB_EXPORT_SET}"
+        LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
+        ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
+
+disable_target_warnings(duckdb_re2)
--- a/external/duckdb/third_party/re2/LICENSE
+++ b/external/duckdb/third_party/re2/LICENSE
@@ -0,0 +1,27 @@
+// Copyright (c) 2009 The RE2 Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/external/duckdb/third_party/re2/re2/bitmap256.cc
+++ b/external/duckdb/third_party/re2/re2/bitmap256.cc
@@ -0,0 +1,44 @@
+// Copyright 2023 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/bitmap256.h"
+
+#include <stdint.h>
+
+#include "util/util.h"
+#include "util/logging.h"
+
+namespace duckdb_re2 {
+
+int Bitmap256::FindNextSetBit(int c) const {
+  DCHECK_GE(c, 0);
+  DCHECK_LE(c, 255);
+
+  // Check the word that contains the bit. Mask out any lower bits.
+  int i = c / 64;
+  uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
+  if (word != 0)
+    return (i * 64) + FindLSBSet(word);
+
+  // Check any following words.
+  i++;
+  switch (i) {
+    case 1:
+      if (words_[1] != 0)
+        return (1 * 64) + FindLSBSet(words_[1]);
+      FALLTHROUGH_INTENDED;
+    case 2:
+      if (words_[2] != 0)
+        return (2 * 64) + FindLSBSet(words_[2]);
+      FALLTHROUGH_INTENDED;
+    case 3:
+      if (words_[3] != 0)
+        return (3 * 64) + FindLSBSet(words_[3]);
+      FALLTHROUGH_INTENDED;
+    default:
+      return -1;
+  }
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/bitmap256.h
+++ b/external/duckdb/third_party/re2/re2/bitmap256.h
@@ -0,0 +1,86 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_BITMAP256_H_
+#define RE2_BITMAP256_H_
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#include <stdint.h>
+#include <string.h>
+
+#include "util/logging.h"
+
+namespace duckdb_re2 {
+
+class Bitmap256 {
+ public:
+  Bitmap256() {
+    Clear();
+  }
+
+  // Clears all of the bits.
+  void Clear() {
+    memset(words_, 0, sizeof words_);
+  }
+
+  // Tests the bit with index c.
+  bool Test(int c) const {
+    DCHECK_GE(c, 0);
+    DCHECK_LE(c, 255);
+
+    return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
+  }
+
+  // Sets the bit with index c.
+  void Set(int c) {
+    DCHECK_GE(c, 0);
+    DCHECK_LE(c, 255);
+
+    words_[c / 64] |= (uint64_t{1} << (c % 64));
+  }
+
+  // Finds the next non-zero bit with index >= c.
+  // Returns -1 if no such bit exists.
+  int FindNextSetBit(int c) const;
+
+ private:
+  // Finds the least significant non-zero bit in n.
+  static int FindLSBSet(uint64_t n) {
+    DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+    return __builtin_ctzll(n);
+#elif defined(_MSC_VER) && defined(_M_X64)
+    unsigned long c;
+    _BitScanForward64(&c, n);
+    return static_cast<int>(c);
+#elif defined(_MSC_VER) && defined(_M_IX86)
+    unsigned long c;
+    if (static_cast<uint32_t>(n) != 0) {
+      _BitScanForward(&c, static_cast<uint32_t>(n));
+      return static_cast<int>(c);
+    } else {
+      _BitScanForward(&c, static_cast<uint32_t>(n >> 32));
+      return static_cast<int>(c) + 32;
+    }
+#else
+    int c = 63;
+    for (int shift = 1 << 5; shift != 0; shift >>= 1) {
+      uint64_t word = n << shift;
+      if (word != 0) {
+        n = word;
+        c -= shift;
+      }
+    }
+    return c;
+#endif
+  }
+
+  uint64_t words_[4];
+};
+
+}  // namespace re2
+
+#endif  // RE2_BITMAP256_H_
--- a/external/duckdb/third_party/re2/re2/bitstate.cc
+++ b/external/duckdb/third_party/re2/re2/bitstate.cc
@@ -0,0 +1,385 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tested by search_test.cc, exhaustive_test.cc, tester.cc
+
+// Prog::SearchBitState is a regular expression search with submatch
+// tracking for small regular expressions and texts.  Similarly to
+// testing/backtrack.cc, it allocates a bitmap with (count of
+// lists) * (length of text) bits to make sure it never explores the
+// same (instruction list, character position) multiple times.  This
+// limits the search to run in time linear in the length of the text.
+//
+// Unlike testing/backtrack.cc, SearchBitState is not recursive
+// on the text.
+//
+// SearchBitState is a fast replacement for the NFA code on small
+// regexps and texts when SearchOnePass cannot be used.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits>
+#include <utility>
+
+#include "util/logging.h"
+#include "re2/pod_array.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+
+namespace duckdb_re2 {
+
+struct Job {
+  int id;
+  int rle;  // run length encoding
+  const char* p;
+};
+
+class BitState {
+ public:
+  explicit BitState(Prog* prog);
+
+  // The usual Search prototype.
+  // Can only call Search once per BitState.
+  bool Search(const StringPiece& text, const StringPiece& context,
+              bool anchored, bool longest,
+              StringPiece* submatch, int nsubmatch);
+
+ private:
+  inline bool ShouldVisit(int id, const char* p);
+  void Push(int id, const char* p);
+  void GrowStack();
+  bool TrySearch(int id, const char* p);
+
+  // Search parameters
+  Prog* prog_;              // program being run
+  StringPiece text_;        // text being searched
+  StringPiece context_;     // greater context of text being searched
+  bool anchored_;           // whether search is anchored at text.begin()
+  bool longest_;            // whether search wants leftmost-longest match
+  bool endmatch_;           // whether match must end at text.end()
+  StringPiece* submatch_;   // submatches to fill in
+  int nsubmatch_;           //   # of submatches to fill in
+
+  // Search state
+  static constexpr int kVisitedBits = 64;
+  PODArray<uint64_t> visited_;  // bitmap: (list ID, char*) pairs visited
+  PODArray<const char*> cap_;   // capture registers
+  PODArray<Job> job_;           // stack of text positions to explore
+  int njob_;                    // stack size
+
+  BitState(const BitState&) = delete;
+  BitState& operator=(const BitState&) = delete;
+};
+
+BitState::BitState(Prog* prog)
+  : prog_(prog),
+    anchored_(false),
+    longest_(false),
+    endmatch_(false),
+    submatch_(NULL),
+    nsubmatch_(0),
+    njob_(0) {
+}
+
+// Given id, which *must* be a list head, we can look up its list ID.
+// Then the question is: Should the search visit the (list ID, p) pair?
+// If so, remember that it was visited so that the next time,
+// we don't repeat the visit.
+bool BitState::ShouldVisit(int id, const char* p) {
+  int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
+          static_cast<int>(p-text_.data());
+  if (visited_[n/kVisitedBits] & (uint64_t{1} << (n & (kVisitedBits-1))))
+    return false;
+  visited_[n/kVisitedBits] |= uint64_t{1} << (n & (kVisitedBits-1));
+  return true;
+}
+
+// Grow the stack.
+void BitState::GrowStack() {
+  PODArray<Job> tmp(2*job_.size());
+  memmove(tmp.data(), job_.data(), njob_*sizeof job_[0]);
+  job_ = std::move(tmp);
+}
+
+// Push (id, p) onto the stack, growing it if necessary.
+void BitState::Push(int id, const char* p) {
+  if (njob_ >= job_.size()) {
+    GrowStack();
+    if (njob_ >= job_.size()) {
+      LOG(DFATAL) << "GrowStack() failed: "
+                  << "njob_ = " << njob_ << ", "
+                  << "job_.size() = " << job_.size();
+      return;
+    }
+  }
+
+  // If id < 0, it's undoing a Capture,
+  // so we mustn't interfere with that.
+  if (id >= 0 && njob_ > 0) {
+    Job* top = &job_[njob_-1];
+    if (id == top->id &&
+        p == top->p + top->rle + 1 &&
+        top->rle < std::numeric_limits<int>::max()) {
+      ++top->rle;
+      return;
+    }
+  }
+
+  Job* top = &job_[njob_++];
+  top->id = id;
+  top->rle = 0;
+  top->p = p;
+}
+
+// Try a search from instruction id0 in state p0.
+// Return whether it succeeded.
+bool BitState::TrySearch(int id0, const char* p0) {
+  bool matched = false;
+  const char* end = text_.data() + text_.size();
+  njob_ = 0;
+  // Push() no longer checks ShouldVisit(),
+  // so we must perform the check ourselves.
+  if (ShouldVisit(id0, p0))
+    Push(id0, p0);
+  while (njob_ > 0) {
+    // Pop job off stack.
+    --njob_;
+    int id = job_[njob_].id;
+    int& rle = job_[njob_].rle;
+    const char* p = job_[njob_].p;
+
+    if (id < 0) {
+      // Undo the Capture.
+      cap_[prog_->inst(-id)->cap()] = p;
+      continue;
+    }
+
+    if (rle > 0) {
+      p += rle;
+      // Revivify job on stack.
+      --rle;
+      ++njob_;
+    }
+
+  Loop:
+    // Visit id, p.
+    Prog::Inst* ip = prog_->inst(id);
+    switch (ip->opcode()) {
+      default:
+        LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
+        return false;
+
+      case kInstFail:
+        break;
+
+      case kInstAltMatch:
+        if (ip->greedy(prog_)) {
+          // out1 is the Match instruction.
+          id = ip->out1();
+          p = end;
+          goto Loop;
+        }
+        if (longest_) {
+          // ip must be non-greedy...
+          // out is the Match instruction.
+          id = ip->out();
+          p = end;
+          goto Loop;
+        }
+        goto Next;
+
+      case kInstByteRange: {
+        int c = -1;
+        if (p < end)
+          c = *p & 0xFF;
+        if (!ip->Matches(c))
+          goto Next;
+
+        if (ip->hint() != 0)
+          Push(id+ip->hint(), p);  // try the next when we're done
+        id = ip->out();
+        p++;
+        goto CheckAndLoop;
+      }
+
+      case kInstCapture:
+        if (!ip->last())
+          Push(id+1, p);  // try the next when we're done
+
+        if (0 <= ip->cap() && ip->cap() < cap_.size()) {
+          // Capture p to register, but save old value first.
+          Push(-id, cap_[ip->cap()]);  // undo when we're done
+          cap_[ip->cap()] = p;
+        }
+
+        id = ip->out();
+        goto CheckAndLoop;
+
+      case kInstEmptyWidth:
+        if (ip->empty() & ~Prog::EmptyFlags(context_, p))
+          goto Next;
+
+        if (!ip->last())
+          Push(id+1, p);  // try the next when we're done
+        id = ip->out();
+        goto CheckAndLoop;
+
+      case kInstNop:
+        if (!ip->last())
+          Push(id+1, p);  // try the next when we're done
+        id = ip->out();
+
+      CheckAndLoop:
+        // Sanity check: id is the head of its list, which must
+        // be the case if id-1 is the last of *its* list. :)
+        DCHECK(id == 0 || prog_->inst(id-1)->last());
+        if (ShouldVisit(id, p))
+          goto Loop;
+        break;
+
+      case kInstMatch: {
+        if (endmatch_ && p != end)
+          goto Next;
+
+        // We found a match.  If the caller doesn't care
+        // where the match is, no point going further.
+        if (nsubmatch_ == 0)
+          return true;
+
+        // Record best match so far.
+        // Only need to check end point, because this entire
+        // call is only considering one start position.
+        matched = true;
+        cap_[1] = p;
+        if (submatch_[0].data() == NULL ||
+            (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
+          for (int i = 0; i < nsubmatch_; i++)
+            submatch_[i] =
+                StringPiece(cap_[2 * i],
+                            static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
+        }
+
+        // If going for first match, we're done.
+        if (!longest_)
+          return true;
+
+        // If we used the entire text, no longer match is possible.
+        if (p == end)
+          return true;
+
+        // Otherwise, continue on in hope of a longer match.
+        // Note the absence of the ShouldVisit() check here
+        // due to execution remaining in the same list.
+      Next:
+        if (!ip->last()) {
+          id++;
+          goto Loop;
+        }
+        break;
+      }
+    }
+  }
+  return matched;
+}
+
+// Search text (within context) for prog_.
+bool BitState::Search(const StringPiece& text, const StringPiece& context,
+                      bool anchored, bool longest,
+                      StringPiece* submatch, int nsubmatch) {
+  // Search parameters.
+  text_ = text;
+  context_ = context;
+  if (context_.data() == NULL)
+    context_ = text;
+  if (prog_->anchor_start() && BeginPtr(context_) != BeginPtr(text))
+    return false;
+  if (prog_->anchor_end() && EndPtr(context_) != EndPtr(text))
+    return false;
+  anchored_ = anchored || prog_->anchor_start();
+  longest_ = longest || prog_->anchor_end();
+  endmatch_ = prog_->anchor_end();
+  submatch_ = submatch;
+  nsubmatch_ = nsubmatch;
+  for (int i = 0; i < nsubmatch_; i++)
+    submatch_[i] = StringPiece();
+
+  // Allocate scratch space.
+  int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
+  nvisited = (nvisited + kVisitedBits-1) / kVisitedBits;
+  visited_ = PODArray<uint64_t>(nvisited);
+  memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
+
+  int ncap = 2*nsubmatch;
+  if (ncap < 2)
+    ncap = 2;
+  cap_ = PODArray<const char*>(ncap);
+  memset(cap_.data(), 0, ncap*sizeof cap_[0]);
+
+  // When sizeof(Job) == 16, we start with a nice round 1KiB. :)
+  job_ = PODArray<Job>(64);
+
+  // Anchored search must start at text.begin().
+  if (anchored_) {
+    cap_[0] = text.data();
+    return TrySearch(prog_->start(), text.data());
+  }
+
+  // Unanchored search, starting from each possible text position.
+  // Notice that we have to try the empty string at the end of
+  // the text, so the loop condition is p <= text.end(), not p < text.end().
+  // This looks like it's quadratic in the size of the text,
+  // but we are not clearing visited_ between calls to TrySearch,
+  // so no work is duplicated and it ends up still being linear.
+  const char* etext = text.data() + text.size();
+  for (const char* p = text.data(); p <= etext; p++) {
+    // Try to use prefix accel (e.g. memchr) to skip ahead.
+    if (p < etext && prog_->can_prefix_accel()) {
+      p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
+      if (p == NULL)
+        p = etext;
+    }
+
+    cap_[0] = p;
+    if (TrySearch(prog_->start(), p))  // Match must be leftmost; done.
+      return true;
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    if (p == NULL)
+      break;
+  }
+  return false;
+}
+
+// Bit-state search.
+bool Prog::SearchBitState(const StringPiece& text,
+                          const StringPiece& context,
+                          Anchor anchor,
+                          MatchKind kind,
+                          StringPiece* match,
+                          int nmatch) {
+  // If full match, we ask for an anchored longest match
+  // and then check that match[0] == text.
+  // So make sure match[0] exists.
+  StringPiece sp0;
+  if (kind == kFullMatch) {
+    anchor = kAnchored;
+    if (nmatch < 1) {
+      match = &sp0;
+      nmatch = 1;
+    }
+  }
+
+  // Run the search.
+  BitState b(this);
+  bool anchored = anchor == kAnchored;
+  bool longest = kind != kFirstMatch;
+  if (!b.Search(text, context, anchored, longest, match, nmatch))
+    return false;
+  if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text))
+    return false;
+  return true;
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/compile.cc
+++ b/external/duckdb/third_party/re2/re2/compile.cc
--- a/external/duckdb/third_party/re2/re2/dfa.cc
+++ b/external/duckdb/third_party/re2/re2/dfa.cc
--- a/external/duckdb/third_party/re2/re2/filtered_re2.cc
+++ b/external/duckdb/third_party/re2/re2/filtered_re2.cc
@@ -0,0 +1,137 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/filtered_re2.h"
+
+#include <stddef.h>
+#include <string>
+#include <utility>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "re2/prefilter.h"
+#include "re2/prefilter_tree.h"
+
+namespace duckdb_re2 {
+
+FilteredRE2::FilteredRE2()
+    : compiled_(false),
+      prefilter_tree_(new PrefilterTree()) {
+}
+
+FilteredRE2::FilteredRE2(int min_atom_len)
+    : compiled_(false),
+      prefilter_tree_(new PrefilterTree(min_atom_len)) {
+}
+
+FilteredRE2::~FilteredRE2() {
+  for (size_t i = 0; i < re2_vec_.size(); i++)
+    delete re2_vec_[i];
+}
+
+FilteredRE2::FilteredRE2(FilteredRE2&& other)
+    : re2_vec_(std::move(other.re2_vec_)),
+      compiled_(other.compiled_),
+      prefilter_tree_(std::move(other.prefilter_tree_)) {
+  other.re2_vec_.clear();
+  other.re2_vec_.shrink_to_fit();
+  other.compiled_ = false;
+  other.prefilter_tree_.reset(new PrefilterTree());
+}
+
+FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
+  this->~FilteredRE2();
+  (void) new (this) FilteredRE2(std::move(other));
+  return *this;
+}
+
+RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
+                                const RE2::Options& options, int* id) {
+  RE2* re = new RE2(pattern, options);
+  RE2::ErrorCode code = re->error_code();
+
+  if (!re->ok()) {
+    if (options.log_errors()) {
+      LOG(ERROR) << "Couldn't compile regular expression, skipping: "
+                 << pattern << " due to error " << re->error();
+    }
+    delete re;
+  } else {
+    *id = static_cast<int>(re2_vec_.size());
+    re2_vec_.push_back(re);
+  }
+
+  return code;
+}
+
+void FilteredRE2::Compile(std::vector<std::string>* atoms) {
+  if (compiled_) {
+    LOG(ERROR) << "Compile called already.";
+    return;
+  }
+
+  if (re2_vec_.empty()) {
+    LOG(ERROR) << "Compile called before Add.";
+    return;
+  }
+
+  for (size_t i = 0; i < re2_vec_.size(); i++) {
+    Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);
+    prefilter_tree_->Add(prefilter);
+  }
+  atoms->clear();
+  prefilter_tree_->Compile(atoms);
+  compiled_ = true;
+}
+
+int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
+  for (size_t i = 0; i < re2_vec_.size(); i++)
+    if (RE2::PartialMatch(text, *re2_vec_[i]))
+      return static_cast<int>(i);
+  return -1;
+}
+
+int FilteredRE2::FirstMatch(const StringPiece& text,
+                            const std::vector<int>& atoms) const {
+  if (!compiled_) {
+    LOG(DFATAL) << "FirstMatch called before Compile.";
+    return -1;
+  }
+  std::vector<int> regexps;
+  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
+  for (size_t i = 0; i < regexps.size(); i++)
+    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
+      return regexps[i];
+  return -1;
+}
+
+bool FilteredRE2::AllMatches(
+    const StringPiece& text,
+    const std::vector<int>& atoms,
+    std::vector<int>* matching_regexps) const {
+  matching_regexps->clear();
+  std::vector<int> regexps;
+  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
+  for (size_t i = 0; i < regexps.size(); i++)
+    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
+      matching_regexps->push_back(regexps[i]);
+  return !matching_regexps->empty();
+}
+
+void FilteredRE2::AllPotentials(
+    const std::vector<int>& atoms,
+    std::vector<int>* potential_regexps) const {
+  prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps);
+}
+
+void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+                                      std::vector<int>* passed_regexps) {
+  prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
+}
+
+void FilteredRE2::PrintPrefilter(int regexpid) {
+  prefilter_tree_->PrintPrefilter(regexpid);
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/filtered_re2.h
+++ b/external/duckdb/third_party/re2/re2/filtered_re2.h
@@ -0,0 +1,120 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_FILTERED_RE2_H_
+#define RE2_FILTERED_RE2_H_
+
+// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
+// It provides a prefilter mechanism that helps in cutting down the
+// number of regexps that need to be actually searched.
+//
+// By design, it does not include a string matching engine. This is to
+// allow the user of the class to use their favorite string matching
+// engine. The overall flow is: Add all the regexps using Add, then
+// Compile the FilteredRE2. Compile returns strings that need to be
+// matched. Note that the returned strings are lowercased and distinct.
+// For applying regexps to a search text, the caller does the string
+// matching using the returned strings. When doing the string match,
+// note that the caller has to do that in a case-insensitive way or
+// on a lowercased version of the search text. Then call FirstMatch
+// or AllMatches with a vector of indices of strings that were found
+// in the text to get the actual regexp matches.
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "re2/re2.h"
+
+#ifndef DUCKDB_BASE_STD
+namespace duckdb_base_std {
+	using ::std::unique_ptr;
+} // namespace duckdb_base_std
+#endif
+
+
+namespace duckdb_re2 {
+
+class PrefilterTree;
+
+class FilteredRE2 {
+ public:
+  FilteredRE2();
+  explicit FilteredRE2(int min_atom_len);
+  ~FilteredRE2();
+
+  // Not copyable.
+  FilteredRE2(const FilteredRE2&) = delete;
+  FilteredRE2& operator=(const FilteredRE2&) = delete;
+  // Movable.
+  FilteredRE2(FilteredRE2&& other);
+  FilteredRE2& operator=(FilteredRE2&& other);
+
+  // Uses RE2 constructor to create a RE2 object (re). Returns
+  // re->error_code(). If error_code is other than NoError, then re is
+  // deleted and not added to re2_vec_.
+  RE2::ErrorCode Add(const StringPiece& pattern,
+                     const RE2::Options& options,
+                     int* id);
+
+  // Prepares the regexps added by Add for filtering.  Returns a set
+  // of strings that the caller should check for in candidate texts.
+  // The returned strings are lowercased and distinct. When doing
+  // string matching, it should be performed in a case-insensitive
+  // way or the search text should be lowercased first.  Call after
+  // all Add calls are done.
+  void Compile(std::vector<std::string>* strings_to_match);
+
+  // Returns the index of the first matching regexp.
+  // Returns -1 on no match. Can be called prior to Compile.
+  // Does not do any filtering: simply tries to Match the
+  // regexps in a loop.
+  int SlowFirstMatch(const StringPiece& text) const;
+
+  // Returns the index of the first matching regexp.
+  // Returns -1 on no match. Compile has to be called before
+  // calling this.
+  int FirstMatch(const StringPiece& text,
+                 const std::vector<int>& atoms) const;
+
+  // Returns the indices of all matching regexps, after first clearing
+  // matched_regexps.
+  bool AllMatches(const StringPiece& text,
+                  const std::vector<int>& atoms,
+                  std::vector<int>* matching_regexps) const;
+
+  // Returns the indices of all potentially matching regexps after first
+  // clearing potential_regexps.
+  // A regexp is potentially matching if it passes the filter.
+  // If a regexp passes the filter it may still not match.
+  // A regexp that does not pass the filter is guaranteed to not match.
+  void AllPotentials(const std::vector<int>& atoms,
+                     std::vector<int>* potential_regexps) const;
+
+  // The number of regexps added.
+  int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
+
+  // Get the individual RE2 objects.
+  const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
+
+ private:
+  // Print prefilter.
+  void PrintPrefilter(int regexpid);
+
+  // Useful for testing and debugging.
+  void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+                           std::vector<int>* passed_regexps);
+
+  // All the regexps in the FilteredRE2.
+  std::vector<RE2*> re2_vec_;
+
+  // Has the FilteredRE2 been compiled using Compile()
+  bool compiled_;
+
+  // An AND-OR tree of string atoms used for filtering regexps.
+  duckdb_base_std::unique_ptr<PrefilterTree> prefilter_tree_;
+};
+
+}  // namespace re2
+
+#endif  // RE2_FILTERED_RE2_H_
--- a/external/duckdb/third_party/re2/re2/mimics_pcre.cc
+++ b/external/duckdb/third_party/re2/re2/mimics_pcre.cc
@@ -0,0 +1,197 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Determine whether this library should match PCRE exactly
+// for a particular Regexp.  (If so, the testing framework can
+// check that it does.)
+//
+// This library matches PCRE except in these cases:
+//   * the regexp contains a repetition of an empty string,
+//     like (a*)* or (a*)+.  In this case, PCRE will treat
+//     the repetition sequence as ending with an empty string,
+//     while this library does not.
+//   * Perl and PCRE differ on whether \v matches \n.
+//     For historical reasons, this library implements the Perl behavior.
+//   * Perl and PCRE allow $ in one-line mode to match either the very
+//     end of the text or just before a \n at the end of the text.
+//     This library requires it to match only the end of the text.
+//   * Similarly, Perl and PCRE do not allow ^ in multi-line mode to
+//     match the end of the text if the last character is a \n.
+//     This library does allow it.
+//
+// Regexp::MimicsPCRE checks for any of these conditions.
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
+
+namespace duckdb_re2 {
+
+// Returns whether re might match an empty string.
+static bool CanBeEmptyString(Regexp *re);
+
+// Walker class to compute whether library handles a regexp
+// exactly as PCRE would.  See comment at top for conditions.
+
+class PCREWalker : public Regexp::Walker<bool> {
+ public:
+  PCREWalker() {}
+
+  virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                         bool* child_args, int nchild_args);
+
+  virtual bool ShortVisit(Regexp* re, bool a) {
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    LOG(DFATAL) << "PCREWalker::ShortVisit called";
+#endif
+    return a;
+  }
+
+ private:
+  PCREWalker(const PCREWalker&) = delete;
+  PCREWalker& operator=(const PCREWalker&) = delete;
+};
+
+// Called after visiting each of re's children and accumulating
+// the return values in child_args.  So child_args contains whether
+// this library mimics PCRE for those subexpressions.
+bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                           bool* child_args, int nchild_args) {
+  // If children failed, so do we.
+  for (int i = 0; i < nchild_args; i++)
+    if (!child_args[i])
+      return false;
+
+  // Otherwise look for other reasons to fail.
+  switch (re->op()) {
+    // Look for repeated empty string.
+    case kRegexpStar:
+    case kRegexpPlus:
+    case kRegexpQuest:
+      if (CanBeEmptyString(re->sub()[0]))
+        return false;
+      break;
+    case kRegexpRepeat:
+      if (re->max() == -1 && CanBeEmptyString(re->sub()[0]))
+        return false;
+      break;
+
+    // Look for \v
+    case kRegexpLiteral:
+      if (re->rune() == '\v')
+        return false;
+      break;
+
+    // Look for $ in single-line mode.
+    case kRegexpEndText:
+    case kRegexpEmptyMatch:
+      if (re->parse_flags() & Regexp::WasDollar)
+        return false;
+      break;
+
+    // Look for ^ in multi-line mode.
+    case kRegexpBeginLine:
+      // No condition: in single-line mode ^ becomes kRegexpBeginText.
+      return false;
+
+    default:
+      break;
+  }
+
+  // Not proven guilty.
+  return true;
+}
+
+// Returns whether this regexp's behavior will mimic PCRE's exactly.
+bool Regexp::MimicsPCRE() {
+  PCREWalker w;
+  return w.Walk(this, true);
+}
+
+
+// Walker class to compute whether a Regexp can match an empty string.
+// It is okay to overestimate.  For example, \b\B cannot match an empty
+// string, because \b and \B are mutually exclusive, but this isn't
+// that smart and will say it can.  Spurious empty strings
+// will reduce the number of regexps we sanity check against PCRE,
+// but they won't break anything.
+
+class EmptyStringWalker : public Regexp::Walker<bool> {
+ public:
+  EmptyStringWalker() {}
+
+  virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                         bool* child_args, int nchild_args);
+
+  virtual bool ShortVisit(Regexp* re, bool a) {
+    // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
+#endif
+    return a;
+  }
+
+ private:
+  EmptyStringWalker(const EmptyStringWalker&) = delete;
+  EmptyStringWalker& operator=(const EmptyStringWalker&) = delete;
+};
+
+// Called after visiting re's children.  child_args contains the return
+// value from each of the children's PostVisits (i.e., whether each child
+// can match an empty string).  Returns whether this clause can match an
+// empty string.
+bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+                                  bool* child_args, int nchild_args) {
+  switch (re->op()) {
+    case kRegexpNoMatch:               // never empty
+    case kRegexpLiteral:
+    case kRegexpAnyChar:
+    case kRegexpAnyByte:
+    case kRegexpCharClass:
+    case kRegexpLiteralString:
+      return false;
+
+    case kRegexpEmptyMatch:            // always empty
+    case kRegexpBeginLine:             // always empty, when they match
+    case kRegexpEndLine:
+    case kRegexpNoWordBoundary:
+    case kRegexpWordBoundary:
+    case kRegexpBeginText:
+    case kRegexpEndText:
+    case kRegexpStar:                  // can always be empty
+    case kRegexpQuest:
+    case kRegexpHaveMatch:
+      return true;
+
+    case kRegexpConcat:                // can be empty if all children can
+      for (int i = 0; i < nchild_args; i++)
+        if (!child_args[i])
+          return false;
+      return true;
+
+    case kRegexpAlternate:             // can be empty if any child can
+      for (int i = 0; i < nchild_args; i++)
+        if (child_args[i])
+          return true;
+      return false;
+
+    case kRegexpPlus:                  // can be empty if the child can
+    case kRegexpCapture:
+      return child_args[0];
+
+    case kRegexpRepeat:                // can be empty if child can or is x{0}
+      return child_args[0] || re->min() == 0;
+  }
+  return false;
+}
+
+// Returns whether re can match an empty string.
+static bool CanBeEmptyString(Regexp* re) {
+  EmptyStringWalker w;
+  return w.Walk(re, true);
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/nfa.cc
+++ b/external/duckdb/third_party/re2/re2/nfa.cc
@@ -0,0 +1,674 @@
+// Copyright 2006-2007 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tested by search_test.cc.
+//
+// Prog::SearchNFA, an NFA search.
+// This is an actual NFA like the theorists talk about,
+// not the pseudo-NFA found in backtracking regexp implementations.
+//
+// IMPLEMENTATION
+//
+// This algorithm is a variant of one that appeared in Rob Pike's sam editor,
+// which is a variant of the one described in Thompson's 1968 CACM paper.
+// See http://swtch.com/~rsc/regexp/ for various history.  The main feature
+// over the DFA implementation is that it tracks submatch boundaries.
+//
+// When the choice of submatch boundaries is ambiguous, this particular
+// implementation makes the same choices that traditional backtracking
+// implementations (in particular, Perl and PCRE) do.
+// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential
+// time in the length of the input.
+//
+// Like Thompson's original machine and like the DFA implementation, this
+// implementation notices a match only once it is one byte past it.
+
+#include <stdio.h>
+#include <string.h>
+#include <algorithm>
+#include <deque>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/pod_array.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+#include "re2/sparse_array.h"
+#include "re2/sparse_set.h"
+
+namespace duckdb_re2 {
+
+class NFA {
+ public:
+  NFA(Prog* prog);
+  ~NFA();
+
+  // Searches for a matching string.
+  //   * If anchored is true, only considers matches starting at offset.
+  //     Otherwise finds lefmost match at or after offset.
+  //   * If longest is true, returns the longest match starting
+  //     at the chosen start point.  Otherwise returns the so-called
+  //     left-biased match, the one traditional backtracking engines
+  //     (like Perl and PCRE) find.
+  // Records submatch boundaries in submatch[1..nsubmatch-1].
+  // Submatch[0] is the entire match.  When there is a choice in
+  // which text matches each subexpression, the submatch boundaries
+  // are chosen to match what a backtracking implementation would choose.
+  bool Search(const StringPiece& text, const StringPiece& context,
+              bool anchored, bool longest,
+              StringPiece* submatch, int nsubmatch);
+
+ private:
+  struct Thread {
+    union {
+      int ref;
+      Thread* next;  // when on free list
+    };
+    const char** capture;
+  };
+
+  // State for explicit stack in AddToThreadq.
+  struct AddState {
+    int id;     // Inst to process
+    Thread* t;  // if not null, set t0 = t before processing id
+  };
+
+  // Threadq is a list of threads.  The list is sorted by the order
+  // in which Perl would explore that particular state -- the earlier
+  // choices appear earlier in the list.
+  typedef SparseArray<Thread*> Threadq;
+
+  inline Thread* AllocThread();
+  inline Thread* Incref(Thread* t);
+  inline void Decref(Thread* t);
+
+  // Follows all empty arrows from id0 and enqueues all the states reached.
+  // Enqueues only the ByteRange instructions that match byte c.
+  // context is used (with p) for evaluating empty-width specials.
+  // p is the current input position, and t0 is the current thread.
+  void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
+                    const char* p, Thread* t0);
+
+  // Run runq on byte c, appending new states to nextq.
+  // Updates matched_ and match_ as new, better matches are found.
+  // context is used (with p) for evaluating empty-width specials.
+  // p is the position of byte c in the input string for AddToThreadq;
+  // p-1 will be used when processing Match instructions.
+  // Frees all the threads on runq.
+  // If there is a shortcut to the end, returns that shortcut.
+  int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+           const char* p);
+
+  // Returns text version of capture information, for debugging.
+  std::string FormatCapture(const char** capture);
+
+  void CopyCapture(const char** dst, const char** src) {
+    memmove(dst, src, ncapture_*sizeof src[0]);
+  }
+
+  Prog* prog_;                // underlying program
+  int start_;                 // start instruction in program
+  int ncapture_;              // number of submatches to track
+  bool longest_;              // whether searching for longest match
+  bool endmatch_;             // whether match must end at text.end()
+  const char* btext_;         // beginning of text (for FormatSubmatch)
+  const char* etext_;         // end of text (for endmatch_)
+  Threadq q0_, q1_;           // pre-allocated for Search.
+  PODArray<AddState> stack_;  // pre-allocated for AddToThreadq
+  std::deque<Thread> arena_;  // thread arena
+  Thread* freelist_;          // thread freelist
+  const char** match_;        // best match so far
+  bool matched_;              // any match so far?
+
+  NFA(const NFA&) = delete;
+  NFA& operator=(const NFA&) = delete;
+};
+
+NFA::NFA(Prog* prog) {
+  prog_ = prog;
+  start_ = prog_->start();
+  ncapture_ = 0;
+  longest_ = false;
+  endmatch_ = false;
+  btext_ = NULL;
+  etext_ = NULL;
+  q0_.resize(prog_->size());
+  q1_.resize(prog_->size());
+  // See NFA::AddToThreadq() for why this is so.
+  int nstack = 2*prog_->inst_count(kInstCapture) +
+               prog_->inst_count(kInstEmptyWidth) +
+               prog_->inst_count(kInstNop) + 1;  // + 1 for start inst
+  stack_ = PODArray<AddState>(nstack);
+  freelist_ = NULL;
+  match_ = NULL;
+  matched_ = false;
+}
+
+NFA::~NFA() {
+  delete[] match_;
+  for (const Thread& t : arena_)
+    delete[] t.capture;
+}
+
+NFA::Thread* NFA::AllocThread() {
+  Thread* t = freelist_;
+  if (t != NULL) {
+    freelist_ = t->next;
+    t->ref = 1;
+    // We don't need to touch t->capture because
+    // the caller will immediately overwrite it.
+    return t;
+  }
+  arena_.emplace_back();
+  t = &arena_.back();
+  t->ref = 1;
+  t->capture = new const char*[ncapture_];
+  return t;
+}
+
+NFA::Thread* NFA::Incref(Thread* t) {
+  DCHECK(t != NULL);
+  t->ref++;
+  return t;
+}
+
+void NFA::Decref(Thread* t) {
+  DCHECK(t != NULL);
+  t->ref--;
+  if (t->ref > 0)
+    return;
+  DCHECK_EQ(t->ref, 0);
+  t->next = freelist_;
+  freelist_ = t;
+}
+
+// Follows all empty arrows from id0 and enqueues all the states reached.
+// Enqueues only the ByteRange instructions that match byte c.
+// context is used (with p) for evaluating empty-width specials.
+// p is the current input position, and t0 is the current thread.
+void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
+                       const char* p, Thread* t0) {
+  if (id0 == 0)
+    return;
+
+  // Use stack_ to hold our stack of instructions yet to process.
+  // It was preallocated as follows:
+  //   two entries per Capture;
+  //   one entry per EmptyWidth; and
+  //   one entry per Nop.
+  // This reflects the maximum number of stack pushes that each can
+  // perform. (Each instruction can be processed at most once.)
+  AddState* stk = stack_.data();
+  int nstk = 0;
+
+  stk[nstk++] = {id0, NULL};
+  while (nstk > 0) {
+    DCHECK_LE(nstk, stack_.size());
+    AddState a = stk[--nstk];
+
+  Loop:
+    if (a.t != NULL) {
+      // t0 was a thread that we allocated and copied in order to
+      // record the capture, so we must now decref it.
+      Decref(t0);
+      t0 = a.t;
+    }
+
+    int id = a.id;
+    if (id == 0)
+      continue;
+    if (q->has_index(id)) {
+      continue;
+    }
+
+    // Create entry in q no matter what.  We might fill it in below,
+    // or we might not.  Even if not, it is necessary to have it,
+    // so that we don't revisit id0 during the recursion.
+    q->set_new(id, NULL);
+    Thread** tp = &q->get_existing(id);
+    int j;
+    Thread* t;
+    Prog::Inst* ip = prog_->inst(id);
+    switch (ip->opcode()) {
+    default:
+      LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
+      break;
+
+    case kInstFail:
+      break;
+
+    case kInstAltMatch:
+      // Save state; will pick up at next byte.
+      t = Incref(t0);
+      *tp = t;
+
+      DCHECK(!ip->last());
+      a = {id+1, NULL};
+      goto Loop;
+
+    case kInstNop:
+      if (!ip->last())
+        stk[nstk++] = {id+1, NULL};
+
+      // Continue on.
+      a = {ip->out(), NULL};
+      goto Loop;
+
+    case kInstCapture:
+      if (!ip->last())
+        stk[nstk++] = {id+1, NULL};
+
+      if ((j=ip->cap()) < ncapture_) {
+        // Push a dummy whose only job is to restore t0
+        // once we finish exploring this possibility.
+        stk[nstk++] = {0, t0};
+
+        // Record capture.
+        t = AllocThread();
+        CopyCapture(t->capture, t0->capture);
+        t->capture[j] = p;
+        t0 = t;
+      }
+      a = {ip->out(), NULL};
+      goto Loop;
+
+    case kInstByteRange:
+      if (!ip->Matches(c))
+        goto Next;
+
+      // Save state; will pick up at next byte.
+      t = Incref(t0);
+      *tp = t;
+
+      if (ip->hint() == 0)
+        break;
+      a = {id+ip->hint(), NULL};
+      goto Loop;
+
+    case kInstMatch:
+      // Save state; will pick up at next byte.
+      t = Incref(t0);
+      *tp = t;
+
+    Next:
+      if (ip->last())
+        break;
+      a = {id+1, NULL};
+      goto Loop;
+
+    case kInstEmptyWidth:
+      if (!ip->last())
+        stk[nstk++] = {id+1, NULL};
+
+      // Continue on if we have all the right flag bits.
+      if (ip->empty() & ~Prog::EmptyFlags(context, p))
+        break;
+      a = {ip->out(), NULL};
+      goto Loop;
+    }
+  }
+}
+
+// Run runq on byte c, appending new states to nextq.
+// Updates matched_ and match_ as new, better matches are found.
+// context is used (with p) for evaluating empty-width specials.
+// p is the position of byte c in the input string for AddToThreadq;
+// p-1 will be used when processing Match instructions.
+// Frees all the threads on runq.
+// If there is a shortcut to the end, returns that shortcut.
+int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+              const char* p) {
+  nextq->clear();
+
+  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
+    Thread* t = i->value();
+    if (t == NULL)
+      continue;
+
+    if (longest_) {
+      // Can skip any threads started after our current best match.
+      if (matched_ && match_[0] < t->capture[0]) {
+        Decref(t);
+        continue;
+      }
+    }
+
+    int id = i->index();
+    Prog::Inst* ip = prog_->inst(id);
+
+    switch (ip->opcode()) {
+      default:
+        // Should only see the values handled below.
+        LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
+        break;
+
+      case kInstByteRange:
+        AddToThreadq(nextq, ip->out(), c, context, p, t);
+        break;
+
+      case kInstAltMatch:
+        if (i != runq->begin())
+          break;
+        // The match is ours if we want it.
+        if (ip->greedy(prog_) || longest_) {
+          CopyCapture(match_, t->capture);
+          matched_ = true;
+
+          Decref(t);
+          for (++i; i != runq->end(); ++i) {
+            if (i->value() != NULL)
+              Decref(i->value());
+          }
+          runq->clear();
+          if (ip->greedy(prog_))
+            return ip->out1();
+          return ip->out();
+        }
+        break;
+
+      case kInstMatch: {
+        // Avoid invoking undefined behavior (arithmetic on a null pointer)
+        // by storing p instead of p-1. (What would the latter even mean?!)
+        // This complements the special case in NFA::Search().
+        if (p == NULL) {
+          CopyCapture(match_, t->capture);
+          match_[1] = p;
+          matched_ = true;
+          break;
+        }
+
+        if (endmatch_ && p-1 != etext_)
+          break;
+
+        if (longest_) {
+          // Leftmost-longest mode: save this match only if
+          // it is either farther to the left or at the same
+          // point but longer than an existing match.
+          if (!matched_ || t->capture[0] < match_[0] ||
+              (t->capture[0] == match_[0] && p-1 > match_[1])) {
+            CopyCapture(match_, t->capture);
+            match_[1] = p-1;
+            matched_ = true;
+          }
+        } else {
+          // Leftmost-biased mode: this match is by definition
+          // better than what we've already found (see next line).
+          CopyCapture(match_, t->capture);
+          match_[1] = p-1;
+          matched_ = true;
+
+          // Cut off the threads that can only find matches
+          // worse than the one we just found: don't run the
+          // rest of the current Threadq.
+          Decref(t);
+          for (++i; i != runq->end(); ++i) {
+            if (i->value() != NULL)
+              Decref(i->value());
+          }
+          runq->clear();
+          return 0;
+        }
+        break;
+      }
+    }
+    Decref(t);
+  }
+  runq->clear();
+  return 0;
+}
+
+std::string NFA::FormatCapture(const char** capture) {
+  std::string s;
+  for (int i = 0; i < ncapture_; i+=2) {
+    if (capture[i] == NULL)
+      s += "(?,?)";
+    else if (capture[i+1] == NULL)
+      s += StringPrintf("(%td,?)",
+                        capture[i] - btext_);
+    else
+      s += StringPrintf("(%td,%td)",
+                        capture[i] - btext_,
+                        capture[i+1] - btext_);
+  }
+  return s;
+}
+
+bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
+            bool anchored, bool longest,
+            StringPiece* submatch, int nsubmatch) {
+  if (start_ == 0)
+    return false;
+
+  StringPiece context = const_context;
+  if (context.data() == NULL)
+    context = text;
+
+  // Sanity check: make sure that text lies within context.
+  if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
+    LOG(DFATAL) << "context does not contain text";
+    return false;
+  }
+
+  if (prog_->anchor_start() && BeginPtr(context) != BeginPtr(text))
+    return false;
+  if (prog_->anchor_end() && EndPtr(context) != EndPtr(text))
+    return false;
+  anchored |= prog_->anchor_start();
+  if (prog_->anchor_end()) {
+    longest = true;
+    endmatch_ = true;
+  }
+
+  if (nsubmatch < 0) {
+    LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
+    return false;
+  }
+
+  // Save search parameters.
+  ncapture_ = 2*nsubmatch;
+  longest_ = longest;
+
+  if (nsubmatch == 0) {
+    // We need to maintain match[0], both to distinguish the
+    // longest match (if longest is true) and also to tell
+    // whether we've seen any matches at all.
+    ncapture_ = 2;
+  }
+
+  match_ = new const char*[ncapture_];
+  memset(match_, 0, ncapture_*sizeof match_[0]);
+  matched_ = false;
+
+  // For debugging prints.
+  btext_ = context.data();
+  // For convenience.
+  etext_ = text.data() + text.size();
+
+  // Set up search.
+  Threadq* runq = &q0_;
+  Threadq* nextq = &q1_;
+  runq->clear();
+  nextq->clear();
+
+  // Loop over the text, stepping the machine.
+  for (const char* p = text.data();; p++) {
+    // This is a no-op the first time around the loop because runq is empty.
+    int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
+    DCHECK_EQ(runq->size(), 0);
+    using std::swap;
+    swap(nextq, runq);
+    nextq->clear();
+    if (id != 0) {
+      // We're done: full match ahead.
+      p = etext_;
+      for (;;) {
+        Prog::Inst* ip = prog_->inst(id);
+        switch (ip->opcode()) {
+          default:
+            LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode();
+            break;
+
+          case kInstCapture:
+            if (ip->cap() < ncapture_)
+              match_[ip->cap()] = p;
+            id = ip->out();
+            continue;
+
+          case kInstNop:
+            id = ip->out();
+            continue;
+
+          case kInstMatch:
+            match_[1] = p;
+            matched_ = true;
+            break;
+        }
+        break;
+      }
+      break;
+    }
+
+    if (p > etext_)
+      break;
+
+    // Start a new thread if there have not been any matches.
+    // (No point in starting a new thread if there have been
+    // matches, since it would be to the right of the match
+    // we already found.)
+    if (!matched_ && (!anchored || p == text.data())) {
+      // Try to use prefix accel (e.g. memchr) to skip ahead.
+      // The search must be unanchored and there must be zero
+      // possible matches already.
+      if (!anchored && runq->size() == 0 &&
+          p < etext_ && prog_->can_prefix_accel()) {
+        p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
+        if (p == NULL)
+          p = etext_;
+      }
+
+      Thread* t = AllocThread();
+      CopyCapture(t->capture, match_);
+      t->capture[0] = p;
+      AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
+                   t);
+      Decref(t);
+    }
+
+    // If all the threads have died, stop early.
+    if (runq->size() == 0) {
+      break;
+    }
+
+    // Avoid invoking undefined behavior (arithmetic on a null pointer)
+    // by simply not continuing the loop.
+    // This complements the special case in NFA::Step().
+    if (p == NULL) {
+      (void) Step(runq, nextq, -1, context, p);
+      DCHECK_EQ(runq->size(), 0);
+      using std::swap;
+      swap(nextq, runq);
+      nextq->clear();
+      break;
+    }
+  }
+
+  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
+    if (i->value() != NULL)
+      Decref(i->value());
+  }
+
+  if (matched_) {
+    for (int i = 0; i < nsubmatch; i++)
+      submatch[i] =
+          StringPiece(match_[2 * i],
+                      static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
+    return true;
+  }
+  return false;
+}
+
+bool
+Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
+                Anchor anchor, MatchKind kind,
+                StringPiece* match, int nmatch) {
+
+  NFA nfa(this);
+  StringPiece sp;
+  if (kind == kFullMatch) {
+    anchor = kAnchored;
+    if (nmatch == 0) {
+      match = &sp;
+      nmatch = 1;
+    }
+  }
+  if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch))
+    return false;
+  if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text))
+    return false;
+  return true;
+}
+
+// For each instruction i in the program reachable from the start, compute the
+// number of instructions reachable from i by following only empty transitions
+// and record that count as fanout[i].
+//
+// fanout holds the results and is also the work queue for the outer iteration.
+// reachable holds the reached nodes for the inner iteration.
+void Prog::Fanout(SparseArray<int>* fanout) {
+  DCHECK_EQ(fanout->max_size(), size());
+  SparseSet reachable(size());
+  fanout->clear();
+  fanout->set_new(start(), 0);
+  for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
+    int* count = &i->value();
+    reachable.clear();
+    reachable.insert(i->index());
+    for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
+      int id = *j;
+      Prog::Inst* ip = inst(id);
+      switch (ip->opcode()) {
+        default:
+          LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";
+          break;
+
+        case kInstByteRange:
+          if (!ip->last())
+            reachable.insert(id+1);
+
+          (*count)++;
+          if (!fanout->has_index(ip->out())) {
+            fanout->set_new(ip->out(), 0);
+          }
+          break;
+
+        case kInstAltMatch:
+          DCHECK(!ip->last());
+          reachable.insert(id+1);
+          break;
+
+        case kInstCapture:
+        case kInstEmptyWidth:
+        case kInstNop:
+          if (!ip->last())
+            reachable.insert(id+1);
+
+          reachable.insert(ip->out());
+          break;
+
+        case kInstMatch:
+          if (!ip->last())
+            reachable.insert(id+1);
+          break;
+
+        case kInstFail:
+          break;
+      }
+    }
+  }
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/onepass.cc
+++ b/external/duckdb/third_party/re2/re2/onepass.cc
@@ -0,0 +1,577 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tested by search_test.cc.
+//
+// Prog::SearchOnePass is an efficient implementation of
+// regular expression search with submatch tracking for
+// what I call "one-pass regular expressions".  (An alternate
+// name might be "backtracking-free regular expressions".)
+//
+// One-pass regular expressions have the property that
+// at each input byte during an anchored match, there may be
+// multiple alternatives but only one can proceed for any
+// given input byte.
+//
+// For example, the regexp /x*yx*/ is one-pass: you read
+// x's until a y, then you read the y, then you keep reading x's.
+// At no point do you have to guess what to do or back up
+// and try a different guess.
+//
+// On the other hand, /x*x/ is not one-pass: when you're
+// looking at an input "x", it's not clear whether you should
+// use it to extend the x* or as the final x.
+//
+// More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not.
+// /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not.
+//
+// A simple intuition for identifying one-pass regular expressions
+// is that it's always immediately obvious when a repetition ends.
+// It must also be immediately obvious which branch of an | to take:
+//
+// /x(y|z)/ is one-pass, but /(xy|xz)/ is not.
+//
+// The NFA-based search in nfa.cc does some bookkeeping to
+// avoid the need for backtracking and its associated exponential blowup.
+// But if we have a one-pass regular expression, there is no
+// possibility of backtracking, so there is no need for the
+// extra bookkeeping.  Hence, this code.
+//
+// On a one-pass regular expression, the NFA code in nfa.cc
+// runs at about 1/20 of the backtracking-based PCRE speed.
+// In contrast, the code in this file runs at about the same
+// speed as PCRE.
+//
+// One-pass regular expressions get used a lot when RE is
+// used for parsing simple strings, so it pays off to
+// notice them and handle them efficiently.
+//
+// See also Anne Brüggemann-Klein and Derick Wood,
+// "One-unambiguous regular languages", Information and Computation 142(2).
+
+#include <stdint.h>
+#include <string.h>
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+#include "re2/pod_array.h"
+#include "re2/prog.h"
+#include "re2/sparse_set.h"
+#include "re2/stringpiece.h"
+
+// Silence "zero-sized array in struct/union" warning for OneState::action.
+#ifdef _MSC_VER
+#pragma warning(disable: 4200)
+#endif
+
+namespace duckdb_re2 {
+
+// The key insight behind this implementation is that the
+// non-determinism in an NFA for a one-pass regular expression
+// is contained.  To explain what that means, first a
+// refresher about what regular expression programs look like
+// and how the usual NFA execution runs.
+//
+// In a regular expression program, only the kInstByteRange
+// instruction processes an input byte c and moves on to the
+// next byte in the string (it does so if c is in the given range).
+// The kInstByteRange instructions correspond to literal characters
+// and character classes in the regular expression.
+//
+// The kInstAlt instructions are used as wiring to connect the
+// kInstByteRange instructions together in interesting ways when
+// implementing | + and *.
+// The kInstAlt instruction forks execution, like a goto that
+// jumps to ip->out() and ip->out1() in parallel.  Each of the
+// resulting computation paths is called a thread.
+//
+// The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture --
+// are interesting in their own right but like kInstAlt they don't
+// advance the input pointer.  Only kInstByteRange does.
+//
+// The automaton execution in nfa.cc runs all the possible
+// threads of execution in lock-step over the input.  To process
+// a particular byte, each thread gets run until it either dies
+// or finds a kInstByteRange instruction matching the byte.
+// If the latter happens, the thread stops just past the
+// kInstByteRange instruction (at ip->out()) and waits for
+// the other threads to finish processing the input byte.
+// Then, once all the threads have processed that input byte,
+// the whole process repeats.  The kInstAlt state instruction
+// might create new threads during input processing, but no
+// matter what, all the threads stop after a kInstByteRange
+// and wait for the other threads to "catch up".
+// Running in lock step like this ensures that the NFA reads
+// the input string only once.
+//
+// Each thread maintains its own set of capture registers
+// (the string positions at which it executed the kInstCapture
+// instructions corresponding to capturing parentheses in the
+// regular expression).  Repeated copying of the capture registers
+// is the main performance bottleneck in the NFA implementation.
+//
+// A regular expression program is "one-pass" if, no matter what
+// the input string, there is only one thread that makes it
+// past a kInstByteRange instruction at each input byte.  This means
+// that there is in some sense only one active thread throughout
+// the execution.  Other threads might be created during the
+// processing of an input byte, but they are ephemeral: only one
+// thread is left to start processing the next input byte.
+// This is what I meant above when I said the non-determinism
+// was "contained".
+//
+// To execute a one-pass regular expression program, we can build
+// a DFA (no non-determinism) that has at most as many states as
+// the NFA (compare this to the possibly exponential number of states
+// in the general case).  Each state records, for each possible
+// input byte, the next state along with the conditions required
+// before entering that state -- empty-width flags that must be true
+// and capture operations that must be performed.  It also records
+// whether a set of conditions required to finish a match at that
+// point in the input rather than process the next byte.
+
+// A state in the one-pass NFA - just an array of actions indexed
+// by the bytemap_[] of the next input byte.  (The bytemap
+// maps next input bytes into equivalence classes, to reduce
+// the memory footprint.)
+struct OneState {
+  uint32_t matchcond;   // conditions to match right now.
+  uint32_t action[256];
+};
+
+// The uint32_t conditions in the action are a combination of
+// condition and capture bits and the next state.  The bottom 16 bits
+// are the condition and capture bits, and the top 16 are the index of
+// the next state.
+//
+// Bits 0-5 are the empty-width flags from prog.h.
+// Bit 6 is kMatchWins, which means the match takes
+// priority over moving to next in a first-match search.
+// The remaining bits mark capture registers that should
+// be set to the current input position.  The capture bits
+// start at index 2, since the search loop can take care of
+// cap[0], cap[1] (the overall match position).
+// That means we can handle up to 5 capturing parens: $1 through $4, plus $0.
+// No input position can satisfy both kEmptyWordBoundary
+// and kEmptyNonWordBoundary, so we can use that as a sentinel
+// instead of needing an extra bit.
+
+static const int    kIndexShift   = 16;  // number of bits below index
+static const int    kEmptyShift   = 6;   // number of empty flags in prog.h
+static const int    kRealCapShift = kEmptyShift + 1;
+static const int    kRealMaxCap   = (kIndexShift - kRealCapShift) / 2 * 2;
+
+// Parameters used to skip over cap[0], cap[1].
+static const int    kCapShift     = kRealCapShift - 2;
+static const int    kMaxCap       = kRealMaxCap + 2;
+
+static const uint32_t kMatchWins  = 1 << kEmptyShift;
+static const uint32_t kCapMask    = ((1 << kRealMaxCap) - 1) << kRealCapShift;
+
+static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary;
+
+// Check, at compile time, that prog.h agrees with math above.
+// This function is never called.
+void OnePass_Checks() {
+  static_assert((1<<kEmptyShift)-1 == kEmptyAllFlags,
+                "kEmptyShift disagrees with kEmptyAllFlags");
+  // kMaxCap counts pointers, kMaxOnePassCapture counts pairs.
+  static_assert(kMaxCap == Prog::kMaxOnePassCapture*2,
+                "kMaxCap disagrees with kMaxOnePassCapture");
+}
+
+static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) {
+  uint32_t satisfied = Prog::EmptyFlags(context, p);
+  if (cond & kEmptyAllFlags & ~satisfied)
+    return false;
+  return true;
+}
+
+// Apply the capture bits in cond, saving p to the appropriate
+// locations in cap[].
+static void ApplyCaptures(uint32_t cond, const char* p,
+                          const char** cap, int ncap) {
+  for (int i = 2; i < ncap; i++)
+    if (cond & (1 << kCapShift << i))
+      cap[i] = p;
+}
+
+// Computes the OneState* for the given nodeindex.
+static inline OneState* IndexToNode(uint8_t* nodes, int statesize,
+                                    int nodeindex) {
+  return reinterpret_cast<OneState*>(nodes + statesize*nodeindex);
+}
+
+bool Prog::SearchOnePass(const StringPiece& text,
+                         const StringPiece& const_context,
+                         Anchor anchor, MatchKind kind,
+                         StringPiece* match, int nmatch) {
+  if (anchor != kAnchored && kind != kFullMatch) {
+    LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
+    return false;
+  }
+
+  // Make sure we have at least cap[1],
+  // because we use it to tell if we matched.
+  int ncap = 2*nmatch;
+  if (ncap < 2)
+    ncap = 2;
+
+  const char* cap[kMaxCap];
+  for (int i = 0; i < ncap; i++)
+    cap[i] = NULL;
+
+  const char* matchcap[kMaxCap];
+  for (int i = 0; i < ncap; i++)
+    matchcap[i] = NULL;
+
+  StringPiece context = const_context;
+  if (context.data() == NULL)
+    context = text;
+  if (anchor_start() && BeginPtr(context) != BeginPtr(text))
+    return false;
+  if (anchor_end() && EndPtr(context) != EndPtr(text))
+    return false;
+  if (anchor_end())
+    kind = kFullMatch;
+
+  uint8_t* nodes = onepass_nodes_.data();
+  int statesize = sizeof(uint32_t) + bytemap_range()*sizeof(uint32_t);
+
+  // start() is always mapped to the zeroth OneState.
+  OneState* state = IndexToNode(nodes, statesize, 0);
+  uint8_t* bytemap = bytemap_;
+  const char* bp = text.data();
+  const char* ep = text.data() + text.size();
+  const char* p;
+  bool matched = false;
+  matchcap[0] = bp;
+  cap[0] = bp;
+  uint32_t nextmatchcond = state->matchcond;
+  for (p = bp; p < ep; p++) {
+    int c = bytemap[*p & 0xFF];
+    uint32_t matchcond = nextmatchcond;
+    uint32_t cond = state->action[c];
+
+    // Determine whether we can reach act->next.
+    // If so, advance state and nextmatchcond.
+    if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) {
+      uint32_t nextindex = cond >> kIndexShift;
+      state = IndexToNode(nodes, statesize, nextindex);
+      nextmatchcond = state->matchcond;
+    } else {
+      state = NULL;
+      nextmatchcond = kImpossible;
+    }
+
+    // This code section is carefully tuned.
+    // The goto sequence is about 10% faster than the
+    // obvious rewrite as a large if statement in the
+    // ASCIIMatchRE2 and DotMatchRE2 benchmarks.
+
+    // Saving the match capture registers is expensive.
+    // Is this intermediate match worth thinking about?
+
+    // Not if we want a full match.
+    if (kind == kFullMatch)
+      goto skipmatch;
+
+    // Not if it's impossible.
+    if (matchcond == kImpossible)
+      goto skipmatch;
+
+    // Not if the possible match is beaten by the certain
+    // match at the next byte.  When this test is useless
+    // (e.g., HTTPPartialMatchRE2) it slows the loop by
+    // about 10%, but when it avoids work (e.g., DotMatchRE2),
+    // it cuts the loop execution by about 45%.
+    if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0)
+      goto skipmatch;
+
+    // Finally, the match conditions must be satisfied.
+    if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) {
+      for (int i = 2; i < 2*nmatch; i++)
+        matchcap[i] = cap[i];
+      if (nmatch > 1 && (matchcond & kCapMask))
+        ApplyCaptures(matchcond, p, matchcap, ncap);
+      matchcap[1] = p;
+      matched = true;
+
+      // If we're in longest match mode, we have to keep
+      // going and see if we find a longer match.
+      // In first match mode, we can stop if the match
+      // takes priority over the next state for this input byte.
+      // That bit is per-input byte and thus in cond, not matchcond.
+      if (kind == kFirstMatch && (cond & kMatchWins))
+        goto done;
+    }
+
+  skipmatch:
+    if (state == NULL)
+      goto done;
+    if ((cond & kCapMask) && nmatch > 1)
+      ApplyCaptures(cond, p, cap, ncap);
+  }
+
+  // Look for match at end of input.
+  {
+    uint32_t matchcond = state->matchcond;
+    if (matchcond != kImpossible &&
+        ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) {
+      if (nmatch > 1 && (matchcond & kCapMask))
+        ApplyCaptures(matchcond, p, cap, ncap);
+      for (int i = 2; i < ncap; i++)
+        matchcap[i] = cap[i];
+      matchcap[1] = p;
+      matched = true;
+    }
+  }
+
+done:
+  if (!matched)
+    return false;
+  for (int i = 0; i < nmatch; i++)
+    match[i] =
+        StringPiece(matchcap[2 * i],
+                    static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i]));
+  return true;
+}
+
+
+// Analysis to determine whether a given regexp program is one-pass.
+
+// If ip is not on workq, adds ip to work queue and returns true.
+// If ip is already on work queue, does nothing and returns false.
+// If ip is NULL, does nothing and returns true (pretends to add it).
+typedef SparseSet Instq;
+static bool AddQ(Instq *q, int id) {
+  if (id == 0)
+    return true;
+  if (q->contains(id))
+    return false;
+  q->insert(id);
+  return true;
+}
+
+struct InstCond {
+  int id;
+  uint32_t cond;
+};
+
+// Returns whether this is a one-pass program; that is,
+// returns whether it is safe to use SearchOnePass on this program.
+// These conditions must be true for any instruction ip:
+//
+//   (1) for any other Inst nip, there is at most one input-free
+//       path from ip to nip.
+//   (2) there is at most one kInstByte instruction reachable from
+//       ip that matches any particular byte c.
+//   (3) there is at most one input-free path from ip to a kInstMatch
+//       instruction.
+//
+// This is actually just a conservative approximation: it might
+// return false when the answer is true, when kInstEmptyWidth
+// instructions are involved.
+// Constructs and saves corresponding one-pass NFA on success.
+bool Prog::IsOnePass() {
+  if (did_onepass_)
+    return onepass_nodes_.data() != NULL;
+  did_onepass_ = true;
+
+  if (start() == 0)  // no match
+    return false;
+
+  // Steal memory for the one-pass NFA from the overall DFA budget.
+  // Willing to use at most 1/4 of the DFA budget (heuristic).
+  // Limit max node count to 65000 as a conservative estimate to
+  // avoid overflowing 16-bit node index in encoding.
+  int maxnodes = 2 + inst_count(kInstByteRange);
+  int statesize = sizeof(uint32_t) + bytemap_range()*sizeof(uint32_t);
+  if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)
+    return false;
+
+  // Flood the graph starting at the start state, and check
+  // that in each reachable state, each possible byte leads
+  // to a unique next state.
+  int stacksize = inst_count(kInstCapture) +
+                  inst_count(kInstEmptyWidth) +
+                  inst_count(kInstNop) + 1;  // + 1 for start inst
+  PODArray<InstCond> stack(stacksize);
+
+  int size = this->size();
+  PODArray<int> nodebyid(size);  // indexed by ip
+  memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]);
+
+  // Originally, nodes was a uint8_t[maxnodes*statesize], but that was
+  // unnecessarily optimistic: why allocate a large amount of memory
+  // upfront for a large program when it is unlikely to be one-pass?
+  std::vector<uint8_t> nodes;
+
+  Instq tovisit(size), workq(size);
+  AddQ(&tovisit, start());
+  nodebyid[start()] = 0;
+  int nalloc = 1;
+  nodes.insert(nodes.end(), statesize, 0);
+  for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
+    int id = *it;
+    int nodeindex = nodebyid[id];
+    OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
+
+    // Flood graph using manual stack, filling in actions as found.
+    // Default is none.
+    for (int b = 0; b < bytemap_range_; b++)
+      node->action[b] = kImpossible;
+    node->matchcond = kImpossible;
+
+    workq.clear();
+    bool matched = false;
+    int nstack = 0;
+    stack[nstack].id = id;
+    stack[nstack++].cond = 0;
+    while (nstack > 0) {
+      int id = stack[--nstack].id;
+      uint32_t cond = stack[nstack].cond;
+
+    Loop:
+      Prog::Inst* ip = inst(id);
+      switch (ip->opcode()) {
+        default:
+          LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+          break;
+
+        case kInstAltMatch:
+          // TODO(rsc): Ignoring kInstAltMatch optimization.
+          // Should implement it in this engine, but it's subtle.
+          DCHECK(!ip->last());
+          // If already on work queue, (1) is violated: bail out.
+          if (!AddQ(&workq, id+1))
+            goto fail;
+          id = id+1;
+          goto Loop;
+
+        case kInstByteRange: {
+          int nextindex = nodebyid[ip->out()];
+          if (nextindex == -1) {
+            if (nalloc >= maxnodes) {
+              goto fail;
+            }
+            nextindex = nalloc;
+            AddQ(&tovisit, ip->out());
+            nodebyid[ip->out()] = nalloc;
+            nalloc++;
+            nodes.insert(nodes.end(), statesize, 0);
+            // Update node because it might have been invalidated.
+            node = IndexToNode(nodes.data(), statesize, nodeindex);
+          }
+          for (int c = ip->lo(); c <= ip->hi(); c++) {
+            int b = bytemap_[c];
+            // Skip any bytes immediately after c that are also in b.
+            while (c < 256-1 && bytemap_[c+1] == b)
+              c++;
+            uint32_t act = node->action[b];
+            uint32_t newact = (nextindex << kIndexShift) | cond;
+            if (matched)
+              newact |= kMatchWins;
+            if ((act & kImpossible) == kImpossible) {
+              node->action[b] = newact;
+            } else if (act != newact) {
+              goto fail;
+            }
+          }
+          if (ip->foldcase()) {
+            Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a';
+            Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a';
+            for (int c = lo; c <= hi; c++) {
+              int b = bytemap_[c];
+              // Skip any bytes immediately after c that are also in b.
+              while (c < 256-1 && bytemap_[c+1] == b)
+                c++;
+              uint32_t act = node->action[b];
+              uint32_t newact = (nextindex << kIndexShift) | cond;
+              if (matched)
+                newact |= kMatchWins;
+              if ((act & kImpossible) == kImpossible) {
+                node->action[b] = newact;
+              } else if (act != newact) {
+                goto fail;
+              }
+            }
+          }
+
+          if (ip->last())
+            break;
+          // If already on work queue, (1) is violated: bail out.
+          if (!AddQ(&workq, id+1))
+            goto fail;
+          id = id+1;
+          goto Loop;
+        }
+
+        case kInstCapture:
+        case kInstEmptyWidth:
+        case kInstNop:
+          if (!ip->last()) {
+            // If already on work queue, (1) is violated: bail out.
+            if (!AddQ(&workq, id+1))
+              goto fail;
+            stack[nstack].id = id+1;
+            stack[nstack++].cond = cond;
+          }
+
+          if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap)
+            cond |= (1 << kCapShift) << ip->cap();
+          if (ip->opcode() == kInstEmptyWidth)
+            cond |= ip->empty();
+
+          // kInstCapture and kInstNop always proceed to ip->out().
+          // kInstEmptyWidth only sometimes proceeds to ip->out(),
+          // but as a conservative approximation we assume it always does.
+          // We could be a little more precise by looking at what c
+          // is, but that seems like overkill.
+
+          // If already on work queue, (1) is violated: bail out.
+          if (!AddQ(&workq, ip->out())) {
+            goto fail;
+          }
+          id = ip->out();
+          goto Loop;
+
+        case kInstMatch:
+          if (matched) {
+            // (3) is violated
+            goto fail;
+          }
+          matched = true;
+          node->matchcond = cond;
+
+          if (ip->last())
+            break;
+          // If already on work queue, (1) is violated: bail out.
+          if (!AddQ(&workq, id+1))
+            goto fail;
+          id = id+1;
+          goto Loop;
+
+        case kInstFail:
+          break;
+      }
+    }
+  }
+
+  dfa_mem_ -= nalloc*statesize;
+  onepass_nodes_ = PODArray<uint8_t>(nalloc*statesize);
+  memmove(onepass_nodes_.data(), nodes.data(), nalloc*statesize);
+  return true;
+
+fail:
+  return false;
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/parse.cc
+++ b/external/duckdb/third_party/re2/re2/parse.cc
--- a/external/duckdb/third_party/re2/re2/perl_groups.cc
+++ b/external/duckdb/third_party/re2/re2/perl_groups.cc
@@ -0,0 +1,119 @@
+// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
+// make_perl_groups.pl >perl_groups.cc
+
+#include "re2/unicode_groups.h"
+
+namespace duckdb_re2 {
+
+static const URange16 code1[] = {  /* \d */
+	{ 0x30, 0x39 },
+};
+static const URange16 code2[] = {  /* \s */
+	{ 0x9, 0xa },
+	{ 0xc, 0xd },
+	{ 0x20, 0x20 },
+};
+static const URange16 code3[] = {  /* \w */
+	{ 0x30, 0x39 },
+	{ 0x41, 0x5a },
+	{ 0x5f, 0x5f },
+	{ 0x61, 0x7a },
+};
+const UGroup perl_groups[] = {
+	{ "\\d", +1, code1, 1, 0, 0 },
+	{ "\\D", -1, code1, 1, 0, 0 },
+	{ "\\s", +1, code2, 3, 0, 0 },
+	{ "\\S", -1, code2, 3, 0, 0 },
+	{ "\\w", +1, code3, 4, 0, 0 },
+	{ "\\W", -1, code3, 4, 0, 0 },
+};
+const int num_perl_groups = 6;
+static const URange16 code4[] = {  /* [:alnum:] */
+	{ 0x30, 0x39 },
+	{ 0x41, 0x5a },
+	{ 0x61, 0x7a },
+};
+static const URange16 code5[] = {  /* [:alpha:] */
+	{ 0x41, 0x5a },
+	{ 0x61, 0x7a },
+};
+static const URange16 code6[] = {  /* [:ascii:] */
+	{ 0x0, 0x7f },
+};
+static const URange16 code7[] = {  /* [:blank:] */
+	{ 0x9, 0x9 },
+	{ 0x20, 0x20 },
+};
+static const URange16 code8[] = {  /* [:cntrl:] */
+	{ 0x0, 0x1f },
+	{ 0x7f, 0x7f },
+};
+static const URange16 code9[] = {  /* [:digit:] */
+	{ 0x30, 0x39 },
+};
+static const URange16 code10[] = {  /* [:graph:] */
+	{ 0x21, 0x7e },
+};
+static const URange16 code11[] = {  /* [:lower:] */
+	{ 0x61, 0x7a },
+};
+static const URange16 code12[] = {  /* [:print:] */
+	{ 0x20, 0x7e },
+};
+static const URange16 code13[] = {  /* [:punct:] */
+	{ 0x21, 0x2f },
+	{ 0x3a, 0x40 },
+	{ 0x5b, 0x60 },
+	{ 0x7b, 0x7e },
+};
+static const URange16 code14[] = {  /* [:space:] */
+	{ 0x9, 0xd },
+	{ 0x20, 0x20 },
+};
+static const URange16 code15[] = {  /* [:upper:] */
+	{ 0x41, 0x5a },
+};
+static const URange16 code16[] = {  /* [:word:] */
+	{ 0x30, 0x39 },
+	{ 0x41, 0x5a },
+	{ 0x5f, 0x5f },
+	{ 0x61, 0x7a },
+};
+static const URange16 code17[] = {  /* [:xdigit:] */
+	{ 0x30, 0x39 },
+	{ 0x41, 0x46 },
+	{ 0x61, 0x66 },
+};
+const UGroup posix_groups[] = {
+	{ "[:alnum:]", +1, code4, 3, 0, 0 },
+	{ "[:^alnum:]", -1, code4, 3, 0, 0 },
+	{ "[:alpha:]", +1, code5, 2, 0, 0 },
+	{ "[:^alpha:]", -1, code5, 2, 0, 0 },
+	{ "[:ascii:]", +1, code6, 1, 0, 0 },
+	{ "[:^ascii:]", -1, code6, 1, 0, 0 },
+	{ "[:blank:]", +1, code7, 2, 0, 0 },
+	{ "[:^blank:]", -1, code7, 2, 0, 0 },
+	{ "[:cntrl:]", +1, code8, 2, 0, 0 },
+	{ "[:^cntrl:]", -1, code8, 2, 0, 0 },
+	{ "[:digit:]", +1, code9, 1, 0, 0 },
+	{ "[:^digit:]", -1, code9, 1, 0, 0 },
+	{ "[:graph:]", +1, code10, 1, 0, 0 },
+	{ "[:^graph:]", -1, code10, 1, 0, 0 },
+	{ "[:lower:]", +1, code11, 1, 0, 0 },
+	{ "[:^lower:]", -1, code11, 1, 0, 0 },
+	{ "[:print:]", +1, code12, 1, 0, 0 },
+	{ "[:^print:]", -1, code12, 1, 0, 0 },
+	{ "[:punct:]", +1, code13, 4, 0, 0 },
+	{ "[:^punct:]", -1, code13, 4, 0, 0 },
+	{ "[:space:]", +1, code14, 2, 0, 0 },
+	{ "[:^space:]", -1, code14, 2, 0, 0 },
+	{ "[:upper:]", +1, code15, 1, 0, 0 },
+	{ "[:^upper:]", -1, code15, 1, 0, 0 },
+	{ "[:word:]", +1, code16, 4, 0, 0 },
+	{ "[:^word:]", -1, code16, 4, 0, 0 },
+	{ "[:xdigit:]", +1, code17, 3, 0, 0 },
+	{ "[:^xdigit:]", -1, code17, 3, 0, 0 },
+};
+const int num_posix_groups = 28;
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/pod_array.h
+++ b/external/duckdb/third_party/re2/re2/pod_array.h
@@ -0,0 +1,61 @@
+// Copyright 2018 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_POD_ARRAY_H_
+#define RE2_POD_ARRAY_H_
+
+#include <memory>
+#include <type_traits>
+
+#ifndef DUCKDB_BASE_STD
+namespace duckdb_base_std {
+	using ::std::unique_ptr;
+} // namespace duckdb_base_std
+#endif
+
+namespace duckdb_re2 {
+
+template <typename T>
+class PODArray {
+ public:
+  static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
+                "T must be POD");
+
+  PODArray()
+      : ptr_() {}
+  explicit PODArray(int len)
+      : ptr_(std::allocator<T>().allocate(len), Deleter(len)) {}
+
+  T* data() const {
+    return ptr_.get();
+  }
+
+  int size() const {
+    return ptr_.get_deleter().len_;
+  }
+
+  T& operator[](int pos) const {
+    return ptr_[pos];
+  }
+
+ private:
+  struct Deleter {
+    Deleter()
+        : len_(0) {}
+    explicit Deleter(int len)
+        : len_(len) {}
+
+    void operator()(T* ptr) const {
+      std::allocator<T>().deallocate(ptr, len_);
+    }
+
+    int len_;
+  };
+
+  duckdb_base_std::unique_ptr<T[], Deleter> ptr_;
+};
+
+}  // namespace re2
+
+#endif  // RE2_POD_ARRAY_H_
--- a/external/duckdb/third_party/re2/re2/prefilter.cc
+++ b/external/duckdb/third_party/re2/re2/prefilter.cc
@@ -0,0 +1,692 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/prefilter.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+#include "re2/re2.h"
+#include "re2/unicode_casefold.h"
+#include "re2/walker-inl.h"
+
+namespace duckdb_re2 {
+
+// Initializes a Prefilter, allocating subs_ as necessary.
+Prefilter::Prefilter(Op op) {
+  op_ = op;
+  subs_ = NULL;
+  if (op_ == AND || op_ == OR)
+    subs_ = new std::vector<Prefilter*>;
+}
+
+// Destroys a Prefilter.
+Prefilter::~Prefilter() {
+  if (subs_) {
+    for (size_t i = 0; i < subs_->size(); i++)
+      delete (*subs_)[i];
+    delete subs_;
+    subs_ = NULL;
+  }
+}
+
+// Simplify if the node is an empty Or or And.
+Prefilter* Prefilter::Simplify() {
+  if (op_ != AND && op_ != OR) {
+    return this;
+  }
+
+  // Nothing left in the AND/OR.
+  if (subs_->empty()) {
+    if (op_ == AND)
+      op_ = ALL;  // AND of nothing is true
+    else
+      op_ = NONE;  // OR of nothing is false
+
+    return this;
+  }
+
+  // Just one subnode: throw away wrapper.
+  if (subs_->size() == 1) {
+    Prefilter* a = (*subs_)[0];
+    subs_->clear();
+    delete this;
+    return a->Simplify();
+  }
+
+  return this;
+}
+
+// Combines two Prefilters together to create an "op" (AND or OR).
+// The passed Prefilters will be part of the returned Prefilter or deleted.
+// Does lots of work to avoid creating unnecessarily complicated structures.
+Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
+  // If a, b can be rewritten as op, do so.
+  a = a->Simplify();
+  b = b->Simplify();
+
+  // Canonicalize: a->op <= b->op.
+  if (a->op() > b->op()) {
+    Prefilter* t = a;
+    a = b;
+    b = t;
+  }
+
+  // Trivial cases.
+  //    ALL AND b = b
+  //    NONE OR b = b
+  //    ALL OR b   = ALL
+  //    NONE AND b = NONE
+  // Don't need to look at b, because of canonicalization above.
+  // ALL and NONE are smallest opcodes.
+  if (a->op() == ALL || a->op() == NONE) {
+    if ((a->op() == ALL && op == AND) ||
+        (a->op() == NONE && op == OR)) {
+      delete a;
+      return b;
+    } else {
+      delete b;
+      return a;
+    }
+  }
+
+  // If a and b match op, merge their contents.
+  if (a->op() == op && b->op() == op) {
+    for (size_t i = 0; i < b->subs()->size(); i++) {
+      Prefilter* bb = (*b->subs())[i];
+      a->subs()->push_back(bb);
+    }
+    b->subs()->clear();
+    delete b;
+    return a;
+  }
+
+  // If a already has the same op as the op that is under construction
+  // add in b (similarly if b already has the same op, add in a).
+  if (b->op() == op) {
+    Prefilter* t = a;
+    a = b;
+    b = t;
+  }
+  if (a->op() == op) {
+    a->subs()->push_back(b);
+    return a;
+  }
+
+  // Otherwise just return the op.
+  Prefilter* c = new Prefilter(op);
+  c->subs()->push_back(a);
+  c->subs()->push_back(b);
+  return c;
+}
+
+Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) {
+  return AndOr(AND, a, b);
+}
+
+Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
+  return AndOr(OR, a, b);
+}
+
+void Prefilter::SimplifyStringSet(SSet* ss) {
+  // Now make sure that the strings aren't redundant.  For example, if
+  // we know "ab" is a required string, then it doesn't help at all to
+  // know that "abc" is also a required string, so delete "abc". This
+  // is because, when we are performing a string search to filter
+  // regexps, matching "ab" will already allow this regexp to be a
+  // candidate for match, so further matching "abc" is redundant.
+  // Note that we must ignore "" because find() would find it at the
+  // start of everything and thus we would end up erasing everything.
+  //
+  // The SSet sorts strings by length, then lexicographically. Note that
+  // smaller strings appear first and all strings must be unique. These
+  // observations let us skip string comparisons when possible.
+  SSIter i = ss->begin();
+  if (i != ss->end() && i->empty()) {
+    ++i;
+  }
+  for (; i != ss->end(); ++i) {
+    SSIter j = i;
+    ++j;
+    while (j != ss->end()) {
+      if (j->size() > i->size() && j->find(*i) != std::string::npos) {
+        j = ss->erase(j);
+        continue;
+      }
+      ++j;
+    }
+  }
+}
+
+Prefilter* Prefilter::OrStrings(SSet* ss) {
+  Prefilter* or_prefilter = new Prefilter(NONE);
+  SimplifyStringSet(ss);
+  for (SSIter i = ss->begin(); i != ss->end(); ++i)
+    or_prefilter = Or(or_prefilter, FromString(*i));
+  return or_prefilter;
+}
+
+static Rune ToLowerRune(Rune r) {
+  if (r < Runeself) {
+    if ('A' <= r && r <= 'Z')
+      r += 'a' - 'A';
+    return r;
+  }
+
+  const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
+  if (f == NULL || r < f->lo)
+    return r;
+  return ApplyFold(f, r);
+}
+
+static Rune ToLowerRuneLatin1(Rune r) {
+  if ('A' <= r && r <= 'Z')
+    r += 'a' - 'A';
+  return r;
+}
+
+Prefilter* Prefilter::FromString(const std::string& str) {
+  Prefilter* m = new Prefilter(Prefilter::ATOM);
+  m->atom_ = str;
+  return m;
+}
+
+// Information about a regexp used during computation of Prefilter.
+// Can be thought of as information about the set of strings matching
+// the given regular expression.
+class Prefilter::Info {
+ public:
+  Info();
+  ~Info();
+
+  // More constructors.  They delete their Info* arguments.
+  static Info* Alt(Info* a, Info* b);
+  static Info* Concat(Info* a, Info* b);
+  static Info* And(Info* a, Info* b);
+  static Info* Star(Info* a);
+  static Info* Plus(Info* a);
+  static Info* Quest(Info* a);
+  static Info* EmptyString();
+  static Info* NoMatch();
+  static Info* AnyCharOrAnyByte();
+  static Info* CClass(CharClass* cc, bool latin1);
+  static Info* Literal(Rune r);
+  static Info* LiteralLatin1(Rune r);
+  static Info* AnyMatch();
+
+  // Format Info as a string.
+  std::string ToString();
+
+  // Caller takes ownership of the Prefilter.
+  Prefilter* TakeMatch();
+
+  SSet& exact() { return exact_; }
+
+  bool is_exact() const { return is_exact_; }
+
+  class Walker;
+
+ private:
+  SSet exact_;
+
+  // When is_exact_ is true, the strings that match
+  // are placed in exact_. When it is no longer an exact
+  // set of strings that match this RE, then is_exact_
+  // is false and the match_ contains the required match
+  // criteria.
+  bool is_exact_;
+
+  // Accumulated Prefilter query that any
+  // match for this regexp is guaranteed to match.
+  Prefilter* match_;
+};
+
+
+Prefilter::Info::Info()
+  : is_exact_(false),
+    match_(NULL) {
+}
+
+Prefilter::Info::~Info() {
+  delete match_;
+}
+
+Prefilter* Prefilter::Info::TakeMatch() {
+  if (is_exact_) {
+    match_ = Prefilter::OrStrings(&exact_);
+    is_exact_ = false;
+  }
+  Prefilter* m = match_;
+  match_ = NULL;
+  return m;
+}
+
+// Format a Info in string form.
+std::string Prefilter::Info::ToString() {
+  if (is_exact_) {
+    int n = 0;
+    std::string s;
+    for (SSIter i = exact_.begin(); i != exact_.end(); ++i) {
+      if (n++ > 0)
+        s += ",";
+      s += *i;
+    }
+    return s;
+  }
+
+  if (match_)
+    return match_->DebugString();
+
+  return "";
+}
+
+void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
+  for (ConstSSIter i = a.begin(); i != a.end(); ++i)
+    for (ConstSSIter j = b.begin(); j != b.end(); ++j)
+      dst->insert(*i + *j);
+}
+
+// Concats a and b. Requires that both are exact sets.
+// Forms an exact set that is a crossproduct of a and b.
+Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
+  if (a == NULL)
+    return b;
+  DCHECK(a->is_exact_);
+  DCHECK(b && b->is_exact_);
+  Info *ab = new Info();
+
+  CrossProduct(a->exact_, b->exact_, &ab->exact_);
+  ab->is_exact_ = true;
+
+  delete a;
+  delete b;
+  return ab;
+}
+
+// Constructs an inexact Info for ab given a and b.
+// Used only when a or b is not exact or when the
+// exact cross product is likely to be too big.
+Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) {
+  if (a == NULL)
+    return b;
+  if (b == NULL)
+    return a;
+
+  Info *ab = new Info();
+
+  ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());
+  ab->is_exact_ = false;
+  delete a;
+  delete b;
+  return ab;
+}
+
+// Constructs Info for a|b given a and b.
+Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
+  Info *ab = new Info();
+
+  if (a->is_exact_ && b->is_exact_) {
+    // Avoid string copies by moving the larger exact_ set into
+    // ab directly, then merge in the smaller set.
+    if (a->exact_.size() < b->exact_.size()) {
+      using std::swap;
+      swap(a, b);
+    }
+    ab->exact_ = std::move(a->exact_);
+    ab->exact_.insert(b->exact_.begin(), b->exact_.end());
+    ab->is_exact_ = true;
+  } else {
+    // Either a or b has is_exact_ = false. If the other
+    // one has is_exact_ = true, we move it to match_ and
+    // then create a OR of a,b. The resulting Info has
+    // is_exact_ = false.
+    ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());
+    ab->is_exact_ = false;
+  }
+
+  delete a;
+  delete b;
+  return ab;
+}
+
+// Constructs Info for a? given a.
+Prefilter::Info* Prefilter::Info::Quest(Info *a) {
+  Info *ab = new Info();
+
+  ab->is_exact_ = false;
+  ab->match_ = new Prefilter(ALL);
+  delete a;
+  return ab;
+}
+
+// Constructs Info for a* given a.
+// Same as a? -- not much to do.
+Prefilter::Info* Prefilter::Info::Star(Info *a) {
+  return Quest(a);
+}
+
+// Constructs Info for a+ given a. If a was exact set, it isn't
+// anymore.
+Prefilter::Info* Prefilter::Info::Plus(Info *a) {
+  Info *ab = new Info();
+
+  ab->match_ = a->TakeMatch();
+  ab->is_exact_ = false;
+
+  delete a;
+  return ab;
+}
+
+static std::string RuneToString(Rune r) {
+  char buf[UTFmax];
+  int n = runetochar(buf, &r);
+  return std::string(buf, n);
+}
+
+static std::string RuneToStringLatin1(Rune r) {
+  char c = r & 0xff;
+  return std::string(&c, 1);
+}
+
+// Constructs Info for literal rune.
+Prefilter::Info* Prefilter::Info::Literal(Rune r) {
+  Info* info = new Info();
+  info->exact_.insert(RuneToString(ToLowerRune(r)));
+  info->is_exact_ = true;
+  return info;
+}
+
+// Constructs Info for literal rune for Latin1 encoded string.
+Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
+  Info* info = new Info();
+  info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
+  info->is_exact_ = true;
+  return info;
+}
+
+// Constructs Info for dot (any character) or \C (any byte).
+Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() {
+  Prefilter::Info* info = new Prefilter::Info();
+  info->match_ = new Prefilter(ALL);
+  return info;
+}
+
+// Constructs Prefilter::Info for no possible match.
+Prefilter::Info* Prefilter::Info::NoMatch() {
+  Prefilter::Info* info = new Prefilter::Info();
+  info->match_ = new Prefilter(NONE);
+  return info;
+}
+
+// Constructs Prefilter::Info for any possible match.
+// This Prefilter::Info is valid for any regular expression,
+// since it makes no assertions whatsoever about the
+// strings being matched.
+Prefilter::Info* Prefilter::Info::AnyMatch() {
+  Prefilter::Info *info = new Prefilter::Info();
+  info->match_ = new Prefilter(ALL);
+  return info;
+}
+
+// Constructs Prefilter::Info for just the empty string.
+Prefilter::Info* Prefilter::Info::EmptyString() {
+  Prefilter::Info* info = new Prefilter::Info();
+  info->is_exact_ = true;
+  info->exact_.insert("");
+  return info;
+}
+
+// Constructs Prefilter::Info for a character class.
+typedef CharClass::iterator CCIter;
+Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
+                                         bool latin1) {
+
+  // If the class is too large, it's okay to overestimate.
+  if (cc->size() > 10)
+    return AnyCharOrAnyByte();
+
+  Prefilter::Info *a = new Prefilter::Info();
+  for (CCIter i = cc->begin(); i != cc->end(); ++i)
+    for (Rune r = i->lo; r <= i->hi; r++) {
+      if (latin1) {
+        a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
+      } else {
+        a->exact_.insert(RuneToString(ToLowerRune(r)));
+      }
+    }
+
+
+  a->is_exact_ = true;
+  return a;
+}
+
+class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
+ public:
+  Walker(bool latin1) : latin1_(latin1) {}
+
+  virtual Info* PostVisit(
+      Regexp* re, Info* parent_arg,
+      Info* pre_arg,
+      Info** child_args, int nchild_args);
+
+  virtual Info* ShortVisit(
+      Regexp* re,
+      Info* parent_arg);
+
+  bool latin1() { return latin1_; }
+ private:
+  bool latin1_;
+
+  Walker(const Walker&) = delete;
+  Walker& operator=(const Walker&) = delete;
+};
+
+Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
+  bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
+  Prefilter::Info::Walker w(latin1);
+  Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
+
+  if (w.stopped_early()) {
+    delete info;
+    return NULL;
+  }
+
+  return info;
+}
+
+Prefilter::Info* Prefilter::Info::Walker::ShortVisit(
+    Regexp* re, Prefilter::Info* parent_arg) {
+  return AnyMatch();
+}
+
+// Constructs the Prefilter::Info for the given regular expression.
+// Assumes re is simplified.
+Prefilter::Info* Prefilter::Info::Walker::PostVisit(
+    Regexp* re, Prefilter::Info* parent_arg,
+    Prefilter::Info* pre_arg, Prefilter::Info** child_args,
+    int nchild_args) {
+  Prefilter::Info *info;
+  switch (re->op()) {
+    default:
+    case kRegexpRepeat:
+      info = EmptyString();
+      LOG(DFATAL) << "Bad regexp op " << re->op();
+      break;
+
+    case kRegexpNoMatch:
+      info = NoMatch();
+      break;
+
+    // These ops match the empty string:
+    case kRegexpEmptyMatch:      // anywhere
+    case kRegexpBeginLine:       // at beginning of line
+    case kRegexpEndLine:         // at end of line
+    case kRegexpBeginText:       // at beginning of text
+    case kRegexpEndText:         // at end of text
+    case kRegexpWordBoundary:    // at word boundary
+    case kRegexpNoWordBoundary:  // not at word boundary
+      info = EmptyString();
+      break;
+
+    case kRegexpLiteral:
+      if (latin1()) {
+        info = LiteralLatin1(re->rune());
+      }
+      else {
+        info = Literal(re->rune());
+      }
+      break;
+
+    case kRegexpLiteralString:
+      if (re->nrunes() == 0) {
+        info = NoMatch();
+        break;
+      }
+      if (latin1()) {
+        info = LiteralLatin1(re->runes()[0]);
+        for (int i = 1; i < re->nrunes(); i++) {
+          info = Concat(info, LiteralLatin1(re->runes()[i]));
+        }
+      } else {
+        info = Literal(re->runes()[0]);
+        for (int i = 1; i < re->nrunes(); i++) {
+          info = Concat(info, Literal(re->runes()[i]));
+        }
+      }
+      break;
+
+    case kRegexpConcat: {
+      // Accumulate in info.
+      // Exact is concat of recent contiguous exact nodes.
+      info = NULL;
+      Info* exact = NULL;
+      for (int i = 0; i < nchild_args; i++) {
+        Info* ci = child_args[i];  // child info
+        if (!ci->is_exact() ||
+            (exact && ci->exact().size() * exact->exact().size() > 16)) {
+          // Exact run is over.
+          info = And(info, exact);
+          exact = NULL;
+          // Add this child's info.
+          info = And(info, ci);
+        } else {
+          // Append to exact run.
+          exact = Concat(exact, ci);
+        }
+      }
+      info = And(info, exact);
+    }
+      break;
+
+    case kRegexpAlternate:
+      info = child_args[0];
+      for (int i = 1; i < nchild_args; i++)
+        info = Alt(info, child_args[i]);
+      break;
+
+    case kRegexpStar:
+      info = Star(child_args[0]);
+      break;
+
+    case kRegexpQuest:
+      info = Quest(child_args[0]);
+      break;
+
+    case kRegexpPlus:
+      info = Plus(child_args[0]);
+      break;
+
+    case kRegexpAnyChar:
+    case kRegexpAnyByte:
+      // Claim nothing, except that it's not empty.
+      info = AnyCharOrAnyByte();
+      break;
+
+    case kRegexpCharClass:
+      info = CClass(re->cc(), latin1());
+      break;
+
+    case kRegexpCapture:
+      // These don't affect the set of matching strings.
+      info = child_args[0];
+      break;
+  }
+
+  return info;
+}
+
+
+Prefilter* Prefilter::FromRegexp(Regexp* re) {
+  if (re == NULL)
+    return NULL;
+
+  Regexp* simple = re->Simplify();
+  if (simple == NULL)
+    return NULL;
+
+  Prefilter::Info* info = BuildInfo(simple);
+  simple->Decref();
+  if (info == NULL)
+    return NULL;
+
+  Prefilter* m = info->TakeMatch();
+  delete info;
+  return m;
+}
+
+std::string Prefilter::DebugString() const {
+  switch (op_) {
+    default:
+      LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
+      return StringPrintf("op%d", op_);
+    case NONE:
+      return "*no-matches*";
+    case ATOM:
+      return atom_;
+    case ALL:
+      return "";
+    case AND: {
+      std::string s = "";
+      for (size_t i = 0; i < subs_->size(); i++) {
+        if (i > 0)
+          s += " ";
+        Prefilter* sub = (*subs_)[i];
+        s += sub ? sub->DebugString() : "<nil>";
+      }
+      return s;
+    }
+    case OR: {
+      std::string s = "(";
+      for (size_t i = 0; i < subs_->size(); i++) {
+        if (i > 0)
+          s += "|";
+        Prefilter* sub = (*subs_)[i];
+        s += sub ? sub->DebugString() : "<nil>";
+      }
+      s += ")";
+      return s;
+    }
+  }
+}
+
+Prefilter* Prefilter::FromRE2(const RE2* re2) {
+  if (re2 == NULL)
+    return NULL;
+
+  Regexp* regexp = re2->Regexp();
+  if (regexp == NULL)
+    return NULL;
+
+  return FromRegexp(regexp);
+}
+
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/prefilter.h
+++ b/external/duckdb/third_party/re2/re2/prefilter.h
@@ -0,0 +1,130 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_PREFILTER_H_
+#define RE2_PREFILTER_H_
+
+// Prefilter is the class used to extract string guards from regexps.
+// Rather than using Prefilter class directly, use FilteredRE2.
+// See filtered_re2.h
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+
+namespace duckdb_re2 {
+
+class RE2;
+
+class Regexp;
+
+class Prefilter {
+  // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
+ public:
+  enum Op {
+    ALL = 0,  // Everything matches
+    NONE,  // Nothing matches
+    ATOM,  // The string atom() must match
+    AND,   // All in subs() must match
+    OR,   // One of subs() must match
+  };
+
+  explicit Prefilter(Op op);
+  ~Prefilter();
+
+  Op op() { return op_; }
+  const std::string& atom() const { return atom_; }
+  void set_unique_id(int id) { unique_id_ = id; }
+  int unique_id() const { return unique_id_; }
+
+  // The children of the Prefilter node.
+  std::vector<Prefilter*>* subs() {
+    DCHECK(op_ == AND || op_ == OR);
+    return subs_;
+  }
+
+  // Set the children vector. Prefilter takes ownership of subs and
+  // subs_ will be deleted when Prefilter is deleted.
+  void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
+
+  // Given a RE2, return a Prefilter. The caller takes ownership of
+  // the Prefilter and should deallocate it. Returns NULL if Prefilter
+  // cannot be formed.
+  static Prefilter* FromRE2(const RE2* re2);
+
+  // Returns a readable debug string of the prefilter.
+  std::string DebugString() const;
+
+ private:
+  // A comparator used to store exact strings. We compare by length,
+  // then lexicographically. This ordering makes it easier to reduce the
+  // set of strings in SimplifyStringSet.
+  struct LengthThenLex {
+    bool operator()(const std::string& a, const std::string& b) const {
+       return (a.size() < b.size()) || (a.size() == b.size() && a < b);
+    }
+  };
+
+  class Info;
+
+  using SSet = std::set<std::string, LengthThenLex>;
+  using SSIter = SSet::iterator;
+  using ConstSSIter = SSet::const_iterator;
+
+  // Combines two prefilters together to create an AND. The passed
+  // Prefilters will be part of the returned Prefilter or deleted.
+  static Prefilter* And(Prefilter* a, Prefilter* b);
+
+  // Combines two prefilters together to create an OR. The passed
+  // Prefilters will be part of the returned Prefilter or deleted.
+  static Prefilter* Or(Prefilter* a, Prefilter* b);
+
+  // Generalized And/Or
+  static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
+
+  static Prefilter* FromRegexp(Regexp* a);
+
+  static Prefilter* FromString(const std::string& str);
+
+  static Prefilter* OrStrings(SSet* ss);
+
+  static Info* BuildInfo(Regexp* re);
+
+  Prefilter* Simplify();
+
+  // Removes redundant strings from the set. A string is redundant if
+  // any of the other strings appear as a substring. The empty string
+  // is a special case, which is ignored.
+  static void SimplifyStringSet(SSet* ss);
+
+  // Adds the cross-product of a and b to dst.
+  // (For each string i in a and j in b, add i+j.)
+  static void CrossProduct(const SSet& a, const SSet& b, SSet* dst);
+
+  // Kind of Prefilter.
+  Op op_;
+
+  // Sub-matches for AND or OR Prefilter.
+  std::vector<Prefilter*>* subs_;
+
+  // Actual string to match in leaf node.
+  std::string atom_;
+
+  // If different prefilters have the same string atom, or if they are
+  // structurally the same (e.g., OR of same atom strings) they are
+  // considered the same unique nodes. This is the id for each unique
+  // node. This field is populated with a unique id for every node,
+  // and -1 for duplicate nodes.
+  int unique_id_;
+
+  Prefilter(const Prefilter&) = delete;
+  Prefilter& operator=(const Prefilter&) = delete;
+};
+
+}  // namespace re2
+
+#endif  // RE2_PREFILTER_H_
--- a/external/duckdb/third_party/re2/re2/prefilter_tree.cc
+++ b/external/duckdb/third_party/re2/re2/prefilter_tree.cc
@@ -0,0 +1,388 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/prefilter_tree.h"
+
+#include <stddef.h>
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/prefilter.h"
+#include "re2/re2.h"
+
+namespace duckdb_re2 {
+
+PrefilterTree::PrefilterTree()
+    : compiled_(false),
+      min_atom_len_(3) {
+}
+
+PrefilterTree::PrefilterTree(int min_atom_len)
+    : compiled_(false),
+      min_atom_len_(min_atom_len) {
+}
+
+PrefilterTree::~PrefilterTree() {
+  for (size_t i = 0; i < prefilter_vec_.size(); i++)
+    delete prefilter_vec_[i];
+}
+
+void PrefilterTree::Add(Prefilter* prefilter) {
+  if (compiled_) {
+    LOG(DFATAL) << "Add called after Compile.";
+    return;
+  }
+  if (prefilter != NULL && !KeepNode(prefilter)) {
+    delete prefilter;
+    prefilter = NULL;
+  }
+
+  prefilter_vec_.push_back(prefilter);
+}
+
+void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
+  if (compiled_) {
+    LOG(DFATAL) << "Compile called already.";
+    return;
+  }
+
+  // Some legacy users of PrefilterTree call Compile() before
+  // adding any regexps and expect Compile() to have no effect.
+  if (prefilter_vec_.empty())
+    return;
+
+  compiled_ = true;
+
+  NodeMap nodes;
+  AssignUniqueIds(&nodes, atom_vec);
+}
+
+Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
+  std::string node_string = NodeString(node);
+  NodeMap::iterator iter = nodes->find(node_string);
+  if (iter == nodes->end())
+    return NULL;
+  return (*iter).second;
+}
+
+std::string PrefilterTree::NodeString(Prefilter* node) const {
+  // Adding the operation disambiguates AND/OR/atom nodes.
+  std::string s = StringPrintf("%d", node->op()) + ":";
+  if (node->op() == Prefilter::ATOM) {
+    s += node->atom();
+  } else {
+    for (size_t i = 0; i < node->subs()->size(); i++) {
+      if (i > 0)
+        s += ',';
+      s += StringPrintf("%d", (*node->subs())[i]->unique_id());
+    }
+  }
+  return s;
+}
+
+bool PrefilterTree::KeepNode(Prefilter* node) const {
+  if (node == NULL)
+    return false;
+
+  switch (node->op()) {
+    default:
+      LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
+      return false;
+
+    case Prefilter::ALL:
+    case Prefilter::NONE:
+      return false;
+
+    case Prefilter::ATOM:
+      return node->atom().size() >= static_cast<size_t>(min_atom_len_);
+
+    case Prefilter::AND: {
+      int j = 0;
+      std::vector<Prefilter*>* subs = node->subs();
+      for (size_t i = 0; i < subs->size(); i++)
+        if (KeepNode((*subs)[i]))
+          (*subs)[j++] = (*subs)[i];
+        else
+          delete (*subs)[i];
+
+      subs->resize(j);
+      return j > 0;
+    }
+
+    case Prefilter::OR:
+      for (size_t i = 0; i < node->subs()->size(); i++)
+        if (!KeepNode((*node->subs())[i]))
+          return false;
+      return true;
+  }
+}
+
+void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
+                                    std::vector<std::string>* atom_vec) {
+  atom_vec->clear();
+
+  // Build vector of all filter nodes, sorted topologically
+  // from top to bottom in v.
+  std::vector<Prefilter*> v;
+
+  // Add the top level nodes of each regexp prefilter.
+  for (size_t i = 0; i < prefilter_vec_.size(); i++) {
+    Prefilter* f = prefilter_vec_[i];
+    if (f == NULL)
+      unfiltered_.push_back(static_cast<int>(i));
+
+    // We push NULL also on to v, so that we maintain the
+    // mapping of index==regexpid for level=0 prefilter nodes.
+    v.push_back(f);
+  }
+
+  // Now add all the descendant nodes.
+  for (size_t i = 0; i < v.size(); i++) {
+    Prefilter* f = v[i];
+    if (f == NULL)
+      continue;
+    if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) {
+      const std::vector<Prefilter*>& subs = *f->subs();
+      for (size_t j = 0; j < subs.size(); j++)
+        v.push_back(subs[j]);
+    }
+  }
+
+  // Identify unique nodes.
+  int unique_id = 0;
+  for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
+    Prefilter *node = v[i];
+    if (node == NULL)
+      continue;
+    node->set_unique_id(-1);
+    Prefilter* canonical = CanonicalNode(nodes, node);
+    if (canonical == NULL) {
+      // Any further nodes that have the same node string
+      // will find this node as the canonical node.
+      nodes->emplace(NodeString(node), node);
+      if (node->op() == Prefilter::ATOM) {
+        atom_vec->push_back(node->atom());
+        atom_index_to_id_.push_back(unique_id);
+      }
+      node->set_unique_id(unique_id++);
+    } else {
+      node->set_unique_id(canonical->unique_id());
+    }
+  }
+  entries_.resize(unique_id);
+
+  // Fill the entries.
+  for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
+    Prefilter* prefilter = v[i];
+    if (prefilter == NULL)
+      continue;
+    if (CanonicalNode(nodes, prefilter) != prefilter)
+      continue;
+    int id = prefilter->unique_id();
+    switch (prefilter->op()) {
+      default:
+        LOG(DFATAL) << "Unexpected op: " << prefilter->op();
+        return;
+
+      case Prefilter::ATOM:
+        entries_[id].propagate_up_at_count = 1;
+        break;
+
+      case Prefilter::OR:
+      case Prefilter::AND: {
+        // For each child, we append our id to the child's list of
+        // parent ids... unless we happen to have done so already.
+        // The number of appends is the number of unique children,
+        // which allows correct upward propagation from AND nodes.
+        int up_count = 0;
+        for (size_t j = 0; j < prefilter->subs()->size(); j++) {
+          int child_id = (*prefilter->subs())[j]->unique_id();
+          std::vector<int>& parents = entries_[child_id].parents;
+          if (parents.empty() || parents.back() != id) {
+            parents.push_back(id);
+            up_count++;
+          }
+        }
+        entries_[id].propagate_up_at_count =
+            prefilter->op() == Prefilter::AND ? up_count : 1;
+        break;
+      }
+    }
+  }
+
+  // For top level nodes, populate regexp id.
+  for (size_t i = 0; i < prefilter_vec_.size(); i++) {
+    if (prefilter_vec_[i] == NULL)
+      continue;
+    int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id();
+    DCHECK_LE(0, id);
+    Entry* entry = &entries_[id];
+    entry->regexps.push_back(static_cast<int>(i));
+  }
+
+  // Lastly, using probability-based heuristics, we identify nodes
+  // that trigger too many parents and then we try to prune edges.
+  // We use logarithms below to avoid the likelihood of underflow.
+  double log_num_regexps = std::log(prefilter_vec_.size() - unfiltered_.size());
+  // Hoisted this above the loop so that we don't thrash the heap.
+  std::vector<std::pair<size_t, int>> entries_by_num_edges;
+  for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
+    Prefilter* prefilter = v[i];
+    // Pruning applies only to AND nodes because it "just" reduces
+    // precision; applied to OR nodes, it would break correctness.
+    if (prefilter == NULL || prefilter->op() != Prefilter::AND)
+      continue;
+    if (CanonicalNode(nodes, prefilter) != prefilter)
+      continue;
+    int id = prefilter->unique_id();
+
+    // Sort the current node's children by the numbers of parents.
+    entries_by_num_edges.clear();
+    for (size_t j = 0; j < prefilter->subs()->size(); j++) {
+      int child_id = (*prefilter->subs())[j]->unique_id();
+      const std::vector<int>& parents = entries_[child_id].parents;
+      entries_by_num_edges.emplace_back(parents.size(), child_id);
+    }
+    std::stable_sort(entries_by_num_edges.begin(), entries_by_num_edges.end());
+
+    // A running estimate of how many regexps will be triggered by
+    // pruning the remaining children's edges to the current node.
+    // Our nominal target is one, so the threshold is log(1) == 0;
+    // pruning occurs iff the child has more than nine edges left.
+    double log_num_triggered = log_num_regexps;
+    for (const auto& pair : entries_by_num_edges) {
+      int child_id = pair.second;
+      std::vector<int>& parents = entries_[child_id].parents;
+      if (log_num_triggered > 0.) {
+        log_num_triggered += std::log(parents.size());
+        log_num_triggered -= log_num_regexps;
+      } else if (parents.size() > 9) {
+        auto it = std::find(parents.begin(), parents.end(), id);
+        if (it != parents.end()) {
+          parents.erase(it);
+          entries_[id].propagate_up_at_count--;
+        }
+      }
+    }
+  }
+}
+
+// Functions for triggering during search.
+void PrefilterTree::RegexpsGivenStrings(
+    const std::vector<int>& matched_atoms,
+    std::vector<int>* regexps) const {
+  regexps->clear();
+  if (!compiled_) {
+    // Some legacy users of PrefilterTree call Compile() before
+    // adding any regexps and expect Compile() to have no effect.
+    // This kludge is a counterpart to that kludge.
+    if (prefilter_vec_.empty())
+      return;
+
+    LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
+    for (size_t i = 0; i < prefilter_vec_.size(); i++)
+      regexps->push_back(static_cast<int>(i));
+  } else {
+    IntMap regexps_map(static_cast<int>(prefilter_vec_.size()));
+    std::vector<int> matched_atom_ids;
+    for (size_t j = 0; j < matched_atoms.size(); j++)
+      matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
+    PropagateMatch(matched_atom_ids, &regexps_map);
+    for (IntMap::iterator it = regexps_map.begin();
+         it != regexps_map.end();
+         ++it)
+      regexps->push_back(it->index());
+
+    regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());
+  }
+  std::sort(regexps->begin(), regexps->end());
+}
+
+void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
+                                   IntMap* regexps) const {
+  IntMap count(static_cast<int>(entries_.size()));
+  IntMap work(static_cast<int>(entries_.size()));
+  for (size_t i = 0; i < atom_ids.size(); i++)
+    work.set(atom_ids[i], 1);
+  for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
+    const Entry& entry = entries_[it->index()];
+    // Record regexps triggered.
+    for (size_t i = 0; i < entry.regexps.size(); i++)
+      regexps->set(entry.regexps[i], 1);
+    int c;
+    // Pass trigger up to parents.
+    for (int j : entry.parents) {
+      const Entry& parent = entries_[j];
+      // Delay until all the children have succeeded.
+      if (parent.propagate_up_at_count > 1) {
+        if (count.has_index(j)) {
+          c = count.get_existing(j) + 1;
+          count.set_existing(j, c);
+        } else {
+          c = 1;
+          count.set_new(j, c);
+        }
+        if (c < parent.propagate_up_at_count)
+          continue;
+      }
+      // Trigger the parent.
+      work.set(j, 1);
+    }
+  }
+}
+
+// Debugging help.
+void PrefilterTree::PrintPrefilter(int regexpid) {
+  LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
+}
+
+void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
+  LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
+  LOG(ERROR) << "#Unique Nodes: " << entries_.size();
+
+  for (size_t i = 0; i < entries_.size(); i++) {
+    const std::vector<int>& parents = entries_[i].parents;
+    const std::vector<int>& regexps = entries_[i].regexps;
+    LOG(ERROR) << "EntryId: " << i
+               << " N: " << parents.size() << " R: " << regexps.size();
+    for (int parent : parents)
+      LOG(ERROR) << parent;
+  }
+  LOG(ERROR) << "Map:";
+  for (NodeMap::const_iterator iter = nodes->begin();
+       iter != nodes->end(); ++iter)
+    LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
+               << " Str: " << (*iter).first;
+}
+
+std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
+  std::string node_string = "";
+  if (node->op() == Prefilter::ATOM) {
+    DCHECK(!node->atom().empty());
+    node_string += node->atom();
+  } else {
+    // Adding the operation disambiguates AND and OR nodes.
+    node_string +=  node->op() == Prefilter::AND ? "AND" : "OR";
+    node_string += "(";
+    for (size_t i = 0; i < node->subs()->size(); i++) {
+      if (i > 0)
+        node_string += ',';
+      node_string += StringPrintf("%d", (*node->subs())[i]->unique_id());
+      node_string += ":";
+      node_string += DebugNodeString((*node->subs())[i]);
+    }
+    node_string += ")";
+  }
+  return node_string;
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/prefilter_tree.h
+++ b/external/duckdb/third_party/re2/re2/prefilter_tree.h
@@ -0,0 +1,140 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_PREFILTER_TREE_H_
+#define RE2_PREFILTER_TREE_H_
+
+// The PrefilterTree class is used to form an AND-OR tree of strings
+// that would trigger each regexp. The 'prefilter' of each regexp is
+// added to PrefilterTree, and then PrefilterTree is used to find all
+// the unique strings across the prefilters. During search, by using
+// matches from a string matching engine, PrefilterTree deduces the
+// set of regexps that are to be triggered. The 'string matching
+// engine' itself is outside of this class, and the caller can use any
+// favorite engine. PrefilterTree provides a set of strings (called
+// atoms) that the user of this class should use to do the string
+// matching.
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "re2/prefilter.h"
+#include "re2/sparse_array.h"
+
+namespace duckdb_re2 {
+
+class PrefilterTree {
+ public:
+  PrefilterTree();
+  explicit PrefilterTree(int min_atom_len);
+  ~PrefilterTree();
+
+  // Adds the prefilter for the next regexp. Note that we assume that
+  // Add called sequentially for all regexps. All Add calls
+  // must precede Compile.
+  void Add(Prefilter* prefilter);
+
+  // The Compile returns a vector of string in atom_vec.
+  // Call this after all the prefilters are added through Add.
+  // No calls to Add after Compile are allowed.
+  // The caller should use the returned set of strings to do string matching.
+  // Each time a string matches, the corresponding index then has to be
+  // and passed to RegexpsGivenStrings below.
+  void Compile(std::vector<std::string>* atom_vec);
+
+  // Given the indices of the atoms that matched, returns the indexes
+  // of regexps that should be searched.  The matched_atoms should
+  // contain all the ids of string atoms that were found to match the
+  // content. The caller can use any string match engine to perform
+  // this function. This function is thread safe.
+  void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+                           std::vector<int>* regexps) const;
+
+  // Print debug prefilter. Also prints unique ids associated with
+  // nodes of the prefilter of the regexp.
+  void PrintPrefilter(int regexpid);
+
+ private:
+  typedef SparseArray<int> IntMap;
+  // TODO(junyer): Use std::unordered_set<Prefilter*> instead?
+  // It should be trivial to get rid of the stringification...
+  typedef std::map<std::string, Prefilter*> NodeMap;
+
+  // Each unique node has a corresponding Entry that helps in
+  // passing the matching trigger information along the tree.
+  struct Entry {
+   public:
+    // How many children should match before this node triggers the
+    // parent. For an atom and an OR node, this is 1 and for an AND
+    // node, it is the number of unique children.
+    int propagate_up_at_count;
+
+    // When this node is ready to trigger the parent, what are the indices
+    // of the parent nodes to trigger. The reason there may be more than
+    // one is because of sharing. For example (abc | def) and (xyz | def)
+    // are two different nodes, but they share the atom 'def'. So when
+    // 'def' matches, it triggers two parents, corresponding to the two
+    // different OR nodes.
+    std::vector<int> parents;
+
+    // When this node is ready to trigger the parent, what are the
+    // regexps that are triggered.
+    std::vector<int> regexps;
+  };
+
+  // Returns true if the prefilter node should be kept.
+  bool KeepNode(Prefilter* node) const;
+
+  // This function assigns unique ids to various parts of the
+  // prefilter, by looking at if these nodes are already in the
+  // PrefilterTree.
+  void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec);
+
+  // Given the matching atoms, find the regexps to be triggered.
+  void PropagateMatch(const std::vector<int>& atom_ids,
+                      IntMap* regexps) const;
+
+  // Returns the prefilter node that has the same NodeString as this
+  // node. For the canonical node, returns node.
+  Prefilter* CanonicalNode(NodeMap* nodes, Prefilter* node);
+
+  // A string that uniquely identifies the node. Assumes that the
+  // children of node has already been assigned unique ids.
+  std::string NodeString(Prefilter* node) const;
+
+  // Recursively constructs a readable prefilter string.
+  std::string DebugNodeString(Prefilter* node) const;
+
+  // Used for debugging.
+  void PrintDebugInfo(NodeMap* nodes);
+
+  // These are all the nodes formed by Compile. Essentially, there is
+  // one node for each unique atom and each unique AND/OR node.
+  std::vector<Entry> entries_;
+
+  // indices of regexps that always pass through the filter (since we
+  // found no required literals in these regexps).
+  std::vector<int> unfiltered_;
+
+  // vector of Prefilter for all regexps.
+  std::vector<Prefilter*> prefilter_vec_;
+
+  // Atom index in returned strings to entry id mapping.
+  std::vector<int> atom_index_to_id_;
+
+  // Has the prefilter tree been compiled.
+  bool compiled_;
+
+  // Strings less than this length are not stored as atoms.
+  const int min_atom_len_;
+
+  PrefilterTree(const PrefilterTree&) = delete;
+  PrefilterTree& operator=(const PrefilterTree&) = delete;
+};
+
+}  // namespace
+
+#endif  // RE2_PREFILTER_TREE_H_
--- a/external/duckdb/third_party/re2/re2/prog.cc
+++ b/external/duckdb/third_party/re2/re2/prog.cc
--- a/external/duckdb/third_party/re2/re2/prog.h
+++ b/external/duckdb/third_party/re2/re2/prog.h
@@ -0,0 +1,467 @@
+// Copyright 2007 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_PROG_H_
+#define RE2_PROG_H_
+
+// Compiled representation of regular expressions.
+// See regexp.h for the Regexp class, which represents a regular
+// expression symbolically.
+
+#include <stdint.h>
+#include <functional>
+#include <mutex>
+#include <string>
+#include <vector>
+#include <type_traits>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "re2/pod_array.h"
+#include "re2/re2.h"
+#include "re2/sparse_array.h"
+#include "re2/sparse_set.h"
+
+namespace duckdb_re2 {
+
+// Opcodes for Inst
+enum InstOp {
+  kInstAlt = 0,      // choose between out_ and out1_
+  kInstAltMatch,     // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa.
+  kInstByteRange,    // next (possible case-folded) byte must be in [lo_, hi_]
+  kInstCapture,      // capturing parenthesis number cap_
+  kInstEmptyWidth,   // empty-width special (^ $ ...); bit(s) set in empty_
+  kInstMatch,        // found a match!
+  kInstNop,          // no-op; occasionally unavoidable
+  kInstFail,         // never match; occasionally unavoidable
+  kNumInst,
+};
+
+// Bit flags for empty-width specials
+enum EmptyOp {
+  kEmptyBeginLine        = 1<<0,      // ^ - beginning of line
+  kEmptyEndLine          = 1<<1,      // $ - end of line
+  kEmptyBeginText        = 1<<2,      // \A - beginning of text
+  kEmptyEndText          = 1<<3,      // \z - end of text
+  kEmptyWordBoundary     = 1<<4,      // \b - word boundary
+  kEmptyNonWordBoundary  = 1<<5,      // \B - not \b
+  kEmptyAllFlags         = (1<<6)-1,
+};
+
+class DFA;
+class Regexp;
+
+// Compiled form of regexp program.
+class Prog {
+ public:
+  Prog();
+  ~Prog();
+
+  // Single instruction in regexp program.
+  class Inst {
+   public:
+    // See the assertion below for why this is so.
+    Inst() = default;
+
+    // Copyable.
+    Inst(const Inst&) = default;
+    Inst& operator=(const Inst&) = default;
+
+    // Constructors per opcode
+    void InitAlt(uint32_t out, uint32_t out1);
+    void InitByteRange(int lo, int hi, int foldcase, uint32_t out);
+    void InitCapture(int cap, uint32_t out);
+    void InitEmptyWidth(EmptyOp empty, uint32_t out);
+    void InitMatch(int id);
+    void InitNop(uint32_t out);
+    void InitFail();
+
+    // Getters
+    int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
+    InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
+    int last()      { return (out_opcode_>>3)&1; }
+    int out()       { return out_opcode_>>4; }
+    int out1()      { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
+    int cap()       { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
+    int lo()        { DCHECK_EQ(opcode(), kInstByteRange); return byte_range.lo_; }
+    int hi()        { DCHECK_EQ(opcode(), kInstByteRange); return byte_range.hi_; }
+    int foldcase()  { DCHECK_EQ(opcode(), kInstByteRange); return byte_range.hint_foldcase_&1; }
+    int hint()      { DCHECK_EQ(opcode(), kInstByteRange); return byte_range.hint_foldcase_>>1; }
+    int match_id()  { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
+    EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
+
+    bool greedy(Prog* p) {
+      DCHECK_EQ(opcode(), kInstAltMatch);
+      return p->inst(out())->opcode() == kInstByteRange ||
+             (p->inst(out())->opcode() == kInstNop &&
+              p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
+    }
+
+    // Does this inst (an kInstByteRange) match c?
+    inline bool Matches(int c) {
+      DCHECK_EQ(opcode(), kInstByteRange);
+      if (foldcase() && 'A' <= c && c <= 'Z')
+        c += 'a' - 'A';
+      return byte_range.lo_ <= c && c <= byte_range.hi_;
+    }
+
+    // Returns string representation for debugging.
+    std::string Dump();
+
+    // Maximum instruction id.
+    // (Must fit in out_opcode_. PatchList/last steal another bit.)
+    static const int kMaxInst = (1<<28) - 1;
+
+   private:
+    void set_opcode(InstOp opcode) {
+      out_opcode_ = (out()<<4) | (last()<<3) | opcode;
+    }
+
+    void set_last() {
+      out_opcode_ = (out()<<4) | (1<<3) | opcode();
+    }
+
+    void set_out(int out) {
+      out_opcode_ = (out<<4) | (last()<<3) | opcode();
+    }
+
+    void set_out_opcode(int out, InstOp opcode) {
+      out_opcode_ = (out<<4) | (last()<<3) | opcode;
+    }
+
+    uint32_t out_opcode_;  // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
+    union {                // additional instruction arguments:
+      uint32_t out1_;      // opcode == kInstAlt
+                           //   alternate next instruction
+
+      int32_t cap_;        // opcode == kInstCapture
+                           //   Index of capture register (holds text
+                           //   position recorded by capturing parentheses).
+                           //   For \n (the submatch for the nth parentheses),
+                           //   the left parenthesis captures into register 2*n
+                           //   and the right one captures into register 2*n+1.
+
+      int32_t match_id_;   // opcode == kInstMatch
+                           //   Match ID to identify this match (for duckdb_re2::Set).
+
+      struct {             // opcode == kInstByteRange
+        uint8_t lo_;       //   byte range is lo_-hi_ inclusive
+        uint8_t hi_;       //
+        uint16_t hint_foldcase_;  // 15 bits: hint, 1 (low) bit: foldcase
+                           //   hint to execution engines: the delta to the
+                           //   next instruction (in the current list) worth
+                           //   exploring iff this instruction matched; 0
+                           //   means there are no remaining possibilities,
+                           //   which is most likely for character classes.
+                           //   foldcase: A-Z -> a-z before checking range.
+      } byte_range;
+
+      EmptyOp empty_;       // opcode == kInstEmptyWidth
+                            //   empty_ is bitwise OR of kEmpty* flags above.
+    };
+
+    friend class Compiler;
+    friend struct PatchList;
+    friend class Prog;
+  };
+
+  // Inst must be trivial so that we can freely clear it with memset(3).
+  // Arrays of Inst are initialised by copying the initial elements with
+  // memmove(3) and then clearing any remaining elements with memset(3).
+  static_assert(std::is_trivial<Inst>::value, "Inst must be trivial");
+
+  // Whether to anchor the search.
+  enum Anchor {
+    kUnanchored,  // match anywhere
+    kAnchored,    // match only starting at beginning of text
+  };
+
+  // Kind of match to look for (for anchor != kFullMatch)
+  //
+  // kLongestMatch mode finds the overall longest
+  // match but still makes its submatch choices the way
+  // Perl would, not in the way prescribed by POSIX.
+  // The POSIX rules are much more expensive to implement,
+  // and no one has needed them.
+  //
+  // kFullMatch is not strictly necessary -- we could use
+  // kLongestMatch and then check the length of the match -- but
+  // the matching code can run faster if it knows to consider only
+  // full matches.
+  enum MatchKind {
+    kFirstMatch,     // like Perl, PCRE
+    kLongestMatch,   // like egrep or POSIX
+    kFullMatch,      // match only entire text; implies anchor==kAnchored
+    kManyMatch       // for SearchDFA, records set of matches
+  };
+
+  Inst *inst(int id) { return &inst_[id]; }
+  int start() { return start_; }
+  void set_start(int start) { start_ = start; }
+  int start_unanchored() { return start_unanchored_; }
+  void set_start_unanchored(int start) { start_unanchored_ = start; }
+  int size() { return size_; }
+  bool reversed() { return reversed_; }
+  void set_reversed(bool reversed) { reversed_ = reversed; }
+  int list_count() { return list_count_; }
+  int inst_count(InstOp op) { return inst_count_[op]; }
+  uint16_t* list_heads() { return list_heads_.data(); }
+  size_t bit_state_text_max_size() { return bit_state_text_max_size_; }
+  int64_t dfa_mem() { return dfa_mem_; }
+  void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
+  bool anchor_start() { return anchor_start_; }
+  void set_anchor_start(bool b) { anchor_start_ = b; }
+  bool anchor_end() { return anchor_end_; }
+  void set_anchor_end(bool b) { anchor_end_ = b; }
+  int bytemap_range() { return bytemap_range_; }
+  const uint8_t* bytemap() { return bytemap_; }
+  bool can_prefix_accel() { return prefix_size_ != 0; }
+
+  // Accelerates to the first likely occurrence of the prefix.
+  // Returns a pointer to the first byte or NULL if not found.
+  const void* PrefixAccel(const void* data, size_t size) {
+    DCHECK(can_prefix_accel());
+    if (prefix_foldcase_) {
+      return PrefixAccel_ShiftDFA(data, size);
+    } else if (prefix_size_ != 1) {
+      return PrefixAccel_FrontAndBack(data, size);
+    } else {
+      return memchr(data, prefix_front_back.prefix_front_, size);
+    }
+  }
+
+  // Configures prefix accel using the analysis performed during compilation.
+  void ConfigurePrefixAccel(const std::string& prefix, bool prefix_foldcase);
+
+  // An implementation of prefix accel that uses prefix_dfa_ to perform
+  // case-insensitive search.
+  const void* PrefixAccel_ShiftDFA(const void* data, size_t size);
+
+  // An implementation of prefix accel that looks for prefix_front_ and
+  // prefix_back_ to return fewer false positives than memchr(3) alone.
+  const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
+
+  // Returns string representation of program for debugging.
+  std::string Dump();
+  std::string DumpUnanchored();
+  std::string DumpByteMap();
+
+  // Returns the set of kEmpty flags that are in effect at
+  // position p within context.
+  static uint32_t EmptyFlags(const StringPiece& context, const char* p);
+
+  // Returns whether byte c is a word character: ASCII only.
+  // Used by the implementation of \b and \B.
+  // This is not right for Unicode, but:
+  //   - it's hard to get right in a byte-at-a-time matching world
+  //     (the DFA has only one-byte lookahead).
+  //   - even if the lookahead were possible, the Progs would be huge.
+  // This crude approximation is the same one PCRE uses.
+  static bool IsWordChar(uint8_t c) {
+    return ('A' <= c && c <= 'Z') ||
+           ('a' <= c && c <= 'z') ||
+           ('0' <= c && c <= '9') ||
+           c == '_';
+  }
+
+  // Execution engines.  They all search for the regexp (run the prog)
+  // in text, which is in the larger context (used for ^ $ \b etc).
+  // Anchor and kind control the kind of search.
+  // Returns true if match found, false if not.
+  // If match found, fills match[0..nmatch-1] with submatch info.
+  // match[0] is overall match, match[1] is first set of parens, etc.
+  // If a particular submatch is not matched during the regexp match,
+  // it is set to NULL.
+  //
+  // Matching text == StringPiece(NULL, 0) is treated as any other empty
+  // string, but note that on return, it will not be possible to distinguish
+  // submatches that matched that empty string from submatches that didn't
+  // match anything.  Either way, match[i] == NULL.
+
+  // Search using NFA: can find submatches but kind of slow.
+  bool SearchNFA(const StringPiece& text, const StringPiece& context,
+                 Anchor anchor, MatchKind kind,
+                 StringPiece* match, int nmatch);
+
+  // Search using DFA: much faster than NFA but only finds
+  // end of match and can use a lot more memory.
+  // Returns whether a match was found.
+  // If the DFA runs out of memory, sets *failed to true and returns false.
+  // If matches != NULL and kind == kManyMatch and there is a match,
+  // SearchDFA fills matches with the match IDs of the final matching state.
+  bool SearchDFA(const StringPiece& text, const StringPiece& context,
+                 Anchor anchor, MatchKind kind, StringPiece* match0,
+                 bool* failed, SparseSet* matches);
+
+  // The callback issued after building each DFA state with BuildEntireDFA().
+  // If next is null, then the memory budget has been exhausted and building
+  // will halt. Otherwise, the state has been built and next points to an array
+  // of bytemap_range()+1 slots holding the next states as per the bytemap and
+  // kByteEndText. The number of the state is implied by the callback sequence:
+  // the first callback is for state 0, the second callback is for state 1, ...
+  // match indicates whether the state is a matching state.
+  using DFAStateCallback = std::function<void(const int* next, bool match)>;
+
+  // Build the entire DFA for the given match kind.
+  // Usually the DFA is built out incrementally, as needed, which
+  // avoids lots of unnecessary work.
+  // If cb is not empty, it receives one callback per state built.
+  // Returns the number of states built.
+  // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
+  int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
+
+  // Compute bytemap.
+  void ComputeByteMap();
+
+  // Run peep-hole optimizer on program.
+  void Optimize();
+
+  // One-pass NFA: only correct if IsOnePass() is true,
+  // but much faster than NFA (competitive with PCRE)
+  // for those expressions.
+  bool IsOnePass();
+  bool SearchOnePass(const StringPiece& text, const StringPiece& context,
+                     Anchor anchor, MatchKind kind,
+                     StringPiece* match, int nmatch);
+
+  // Bit-state backtracking.  Fast on small cases but uses memory
+  // proportional to the product of the list count and the text size.
+  bool CanBitState() { return list_heads_.data() != NULL; }
+  bool SearchBitState(const StringPiece& text, const StringPiece& context,
+                      Anchor anchor, MatchKind kind,
+                      StringPiece* match, int nmatch);
+
+  static const int kMaxOnePassCapture = 5;  // $0 through $4
+
+  // Backtracking search: the gold standard against which the other
+  // implementations are checked.  FOR TESTING ONLY.
+  // It allocates a ton of memory to avoid running forever.
+  // It is also recursive, so can't use in production (will overflow stacks).
+  // The name "Unsafe" here is supposed to be a flag that
+  // you should not be using this function.
+  bool UnsafeSearchBacktrack(const StringPiece& text,
+                             const StringPiece& context,
+                             Anchor anchor, MatchKind kind,
+                             StringPiece* match, int nmatch);
+
+  // Computes range for any strings matching regexp. The min and max can in
+  // some cases be arbitrarily precise, so the caller gets to specify the
+  // maximum desired length of string returned.
+  //
+  // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
+  // string s that is an anchored match for this regexp satisfies
+  //   min <= s && s <= max.
+  //
+  // Note that PossibleMatchRange() will only consider the first copy of an
+  // infinitely repeated element (i.e., any regexp element followed by a '*' or
+  // '+' operator). Regexps with "{N}" constructions are not affected, as those
+  // do not compile down to infinite repetitions.
+  //
+  // Returns true on success, false on error.
+  bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
+
+  // Outputs the program fanout into the given sparse array.
+  void Fanout(SparseArray<int>* fanout);
+
+  // Compiles a collection of regexps to Prog.  Each regexp will have
+  // its own Match instruction recording the index in the output vector.
+  static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
+
+  // Flattens the Prog from "tree" form to "list" form. This is an in-place
+  // operation in the sense that the old instructions are lost.
+  void Flatten();
+
+  // Walks the Prog; the "successor roots" or predecessors of the reachable
+  // instructions are marked in rootmap or predmap/predvec, respectively.
+  // reachable and stk are preallocated scratch structures.
+  void MarkSuccessors(SparseArray<int>* rootmap,
+                      SparseArray<int>* predmap,
+                      std::vector<std::vector<int>>* predvec,
+                      SparseSet* reachable, std::vector<int>* stk);
+
+  // Walks the Prog from the given "root" instruction; the "dominator root"
+  // of the reachable instructions (if such exists) is marked in rootmap.
+  // reachable and stk are preallocated scratch structures.
+  void MarkDominator(int root, SparseArray<int>* rootmap,
+                     SparseArray<int>* predmap,
+                     std::vector<std::vector<int>>* predvec,
+                     SparseSet* reachable, std::vector<int>* stk);
+
+  // Walks the Prog from the given "root" instruction; the reachable
+  // instructions are emitted in "list" form and appended to flat.
+  // reachable and stk are preallocated scratch structures.
+  void EmitList(int root, SparseArray<int>* rootmap,
+                std::vector<Inst>* flat,
+                SparseSet* reachable, std::vector<int>* stk);
+
+  // Computes hints for ByteRange instructions in [begin, end).
+  void ComputeHints(std::vector<Inst>* flat, int begin, int end);
+
+  // Controls whether the DFA should bail out early if the NFA would be faster.
+  // FOR TESTING ONLY.
+  static void TESTING_ONLY_set_dfa_should_bail_when_slow(bool b);
+
+ private:
+  friend class Compiler;
+
+  DFA* GetDFA(MatchKind kind);
+  void DeleteDFA(DFA* dfa);
+
+  bool anchor_start_;       // regexp has explicit start anchor
+  bool anchor_end_;         // regexp has explicit end anchor
+  bool reversed_;           // whether program runs backward over input
+  bool did_flatten_;        // has Flatten been called?
+  bool did_onepass_;        // has IsOnePass been called?
+
+  int start_;               // entry point for program
+  int start_unanchored_;    // unanchored entry point for program
+  int size_;                // number of instructions
+  int bytemap_range_;       // bytemap_[x] < bytemap_range_
+
+  bool prefix_foldcase_;    // whether prefix is case-insensitive
+  size_t prefix_size_;      // size of prefix (0 if no prefix)
+  union {
+    uint64_t* prefix_dfa_;  // "Shift DFA" for prefix
+    struct {
+      int prefix_front_;    // first byte of prefix
+      int prefix_back_;     // last byte of prefix
+    } prefix_front_back;
+  };
+
+  int list_count_;                  // count of lists (see above)
+  int inst_count_[kNumInst];        // count of instructions by opcode
+  PODArray<uint16_t> list_heads_;   // sparse array enumerating list heads
+                                    // not populated if size_ is overly large
+  size_t bit_state_text_max_size_;  // upper bound (inclusive) on text.size()
+
+  PODArray<Inst> inst_;              // pointer to instruction array
+  PODArray<uint8_t> onepass_nodes_;  // data for OnePass nodes
+
+  int64_t dfa_mem_;         // Maximum memory for DFAs.
+  DFA* dfa_first_;          // DFA cached for kFirstMatch/kManyMatch
+  DFA* dfa_longest_;        // DFA cached for kLongestMatch/kFullMatch
+
+  uint8_t bytemap_[256];    // map from input bytes to byte classes
+
+  std::once_flag dfa_first_once_;
+  std::once_flag dfa_longest_once_;
+
+  Prog(const Prog&) = delete;
+  Prog& operator=(const Prog&) = delete;
+};
+
+// std::string_view in MSVC has iterators that aren't just pointers and
+// that don't allow comparisons between different objects - not even if
+// those objects are views into the same string! Thus, we provide these
+// conversion functions for convenience.
+static inline const char* BeginPtr(const StringPiece& s) {
+  return s.data();
+}
+static inline const char* EndPtr(const StringPiece& s) {
+  return s.data() + s.size();
+}
+
+}  // namespace re2
+
+#endif  // RE2_PROG_H_
--- a/external/duckdb/third_party/re2/re2/re2.cc
+++ b/external/duckdb/third_party/re2/re2/re2.cc
--- a/external/duckdb/third_party/re2/re2/re2.h
+++ b/external/duckdb/third_party/re2/re2/re2.h
--- a/external/duckdb/third_party/re2/re2/regexp.cc
+++ b/external/duckdb/third_party/re2/re2/regexp.cc
--- a/external/duckdb/third_party/re2/re2/regexp.h
+++ b/external/duckdb/third_party/re2/re2/regexp.h
@@ -0,0 +1,665 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_REGEXP_H_
+#define RE2_REGEXP_H_
+
+// --- SPONSORED LINK --------------------------------------------------
+// If you want to use this library for regular expression matching,
+// you should use re2/re2.h, which provides a class RE2 that
+// mimics the PCRE interface provided by PCRE's C++ wrappers.
+// This header describes the low-level interface used to implement RE2
+// and may change in backwards-incompatible ways from time to time.
+// In contrast, RE2's interface will not.
+// ---------------------------------------------------------------------
+
+// Regular expression library: parsing, execution, and manipulation
+// of regular expressions.
+//
+// Any operation that traverses the Regexp structures should be written
+// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested
+// regular expressions such as x++++++++++++++++++++... might cause recursive
+// traversals to overflow the stack.
+//
+// It is the caller's responsibility to provide appropriate mutual exclusion
+// around manipulation of the regexps.  RE2 does this.
+//
+// PARSING
+//
+// Regexp::Parse parses regular expressions encoded in UTF-8.
+// The default syntax is POSIX extended regular expressions,
+// with the following changes:
+//
+//   1.  Backreferences (optional in POSIX EREs) are not supported.
+//         (Supporting them precludes the use of DFA-based
+//          matching engines.)
+//
+//   2.  Collating elements and collation classes are not supported.
+//         (No one has needed or wanted them.)
+//
+// The exact syntax accepted can be modified by passing flags to
+// Regexp::Parse.  In particular, many of the basic Perl additions
+// are available.  The flags are documented below (search for LikePerl).
+//
+// If parsed with the flag Regexp::Latin1, both the regular expression
+// and the input to the matching routines are assumed to be encoded in
+// Latin-1, not UTF-8.
+//
+// EXECUTION
+//
+// Once Regexp has parsed a regular expression, it provides methods
+// to search text using that regular expression.  These methods are
+// implemented via calling out to other regular expression libraries.
+// (Let's call them the sublibraries.)
+//
+// To call a sublibrary, Regexp does not simply prepare a
+// string version of the regular expression and hand it to the
+// sublibrary.  Instead, Regexp prepares, from its own parsed form, the
+// corresponding internal representation used by the sublibrary.
+// This has the drawback of needing to know the internal representation
+// used by the sublibrary, but it has two important benefits:
+//
+//   1. The syntax and meaning of regular expressions is guaranteed
+//      to be that used by Regexp's parser, not the syntax expected
+//      by the sublibrary.  Regexp might accept a restricted or
+//      expanded syntax for regular expressions as compared with
+//      the sublibrary.  As long as Regexp can translate from its
+//      internal form into the sublibrary's, clients need not know
+//      exactly which sublibrary they are using.
+//
+//   2. The sublibrary parsers are bypassed.  For whatever reason,
+//      sublibrary regular expression parsers often have security
+//      problems.  For example, plan9grep's regular expression parser
+//      has a buffer overflow in its handling of large character
+//      classes, and PCRE's parser has had buffer overflow problems
+//      in the past.  Security-team requires sandboxing of sublibrary
+//      regular expression parsers.  Avoiding the sublibrary parsers
+//      avoids the sandbox.
+//
+// The execution methods we use now are provided by the compiled form,
+// Prog, described in prog.h
+//
+// MANIPULATION
+//
+// Unlike other regular expression libraries, Regexp makes its parsed
+// form accessible to clients, so that client code can analyze the
+// parsed regular expressions.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <map>
+#include <set>
+#include <string>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/utf.h"
+#include "re2/stringpiece.h"
+
+namespace duckdb_re2 {
+
+// Keep in sync with string list kOpcodeNames[] in testing/dump.cc
+enum RegexpOp {
+  // Matches no strings.
+  kRegexpNoMatch = 1,
+
+  // Matches empty string.
+  kRegexpEmptyMatch,
+
+  // Matches rune_.
+  kRegexpLiteral,
+
+  // Matches runes_.
+  kRegexpLiteralString,
+
+  // Matches concatenation of sub_[0..nsub-1].
+  kRegexpConcat,
+  // Matches union of sub_[0..nsub-1].
+  kRegexpAlternate,
+
+  // Matches sub_[0] zero or more times.
+  kRegexpStar,
+  // Matches sub_[0] one or more times.
+  kRegexpPlus,
+  // Matches sub_[0] zero or one times.
+  kRegexpQuest,
+
+  // Matches sub_[0] at least min_ times, at most max_ times.
+  // max_ == -1 means no upper limit.
+  kRegexpRepeat,
+
+  // Parenthesized (capturing) subexpression.  Index is cap_.
+  // Optionally, capturing name is name_.
+  kRegexpCapture,
+
+  // Matches any character.
+  kRegexpAnyChar,
+
+  // Matches any byte [sic].
+  kRegexpAnyByte,
+
+  // Matches empty string at beginning of line.
+  kRegexpBeginLine,
+  // Matches empty string at end of line.
+  kRegexpEndLine,
+
+  // Matches word boundary "\b".
+  kRegexpWordBoundary,
+  // Matches not-a-word boundary "\B".
+  kRegexpNoWordBoundary,
+
+  // Matches empty string at beginning of text.
+  kRegexpBeginText,
+  // Matches empty string at end of text.
+  kRegexpEndText,
+
+  // Matches character class given by cc_.
+  kRegexpCharClass,
+
+  // Forces match of entire expression right now,
+  // with match ID match_id_ (used by RE2::Set).
+  kRegexpHaveMatch,
+
+  kMaxRegexpOp = kRegexpHaveMatch,
+};
+
+// Keep in sync with string list in regexp.cc
+enum RegexpStatusCode {
+  // No error
+  kRegexpSuccess = 0,
+
+  // Unexpected error
+  kRegexpInternalError,
+
+  // Parse errors
+  kRegexpBadEscape,          // bad escape sequence
+  kRegexpBadCharClass,       // bad character class
+  kRegexpBadCharRange,       // bad character class range
+  kRegexpMissingBracket,     // missing closing ]
+  kRegexpMissingParen,       // missing closing )
+  kRegexpUnexpectedParen,    // unexpected closing )
+  kRegexpTrailingBackslash,  // at end of regexp
+  kRegexpRepeatArgument,     // repeat argument missing, e.g. "*"
+  kRegexpRepeatSize,         // bad repetition argument
+  kRegexpRepeatOp,           // bad repetition operator
+  kRegexpBadPerlOp,          // bad perl operator
+  kRegexpBadUTF8,            // invalid UTF-8 in regexp
+  kRegexpBadNamedCapture,    // bad named capture
+};
+
+// Error status for certain operations.
+class RegexpStatus {
+ public:
+  RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
+  ~RegexpStatus() { delete tmp_; }
+
+  void set_code(RegexpStatusCode code) { code_ = code; }
+  void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
+  void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; }
+  RegexpStatusCode code() const { return code_; }
+  const StringPiece& error_arg() const { return error_arg_; }
+  bool ok() const { return code() == kRegexpSuccess; }
+
+  // Copies state from status.
+  void Copy(const RegexpStatus& status);
+
+  // Returns text equivalent of code, e.g.:
+  //   "Bad character class"
+  static std::string CodeText(RegexpStatusCode code);
+
+  // Returns text describing error, e.g.:
+  //   "Bad character class: [z-a]"
+  std::string Text() const;
+
+ private:
+  RegexpStatusCode code_;  // Kind of error
+  StringPiece error_arg_;  // Piece of regexp containing syntax error.
+  std::string* tmp_;       // Temporary storage, possibly where error_arg_ is.
+
+  RegexpStatus(const RegexpStatus&) = delete;
+  RegexpStatus& operator=(const RegexpStatus&) = delete;
+};
+
+// Compiled form; see prog.h
+class Prog;
+
+struct RuneRange {
+  RuneRange() : lo(0), hi(0) { }
+  RuneRange(int l, int h) : lo(l), hi(h) { }
+  Rune lo;
+  Rune hi;
+};
+
+// Less-than on RuneRanges treats a == b if they overlap at all.
+// This lets us look in a set to find the range covering a particular Rune.
+struct RuneRangeLess {
+  bool operator()(const RuneRange& a, const RuneRange& b) const {
+    return a.hi < b.lo;
+  }
+};
+
+class CharClassBuilder;
+
+class CharClass {
+ public:
+  void Delete();
+
+  typedef RuneRange* iterator;
+  iterator begin() { return ranges_; }
+  iterator end() { return ranges_ + nranges_; }
+
+  int size() { return nrunes_; }
+  bool empty() { return nrunes_ == 0; }
+  bool full() { return nrunes_ == Runemax+1; }
+  bool FoldsASCII() { return folds_ascii_; }
+
+  bool Contains(Rune r) const;
+  CharClass* Negate();
+
+ private:
+  CharClass();  // not implemented
+  ~CharClass();  // not implemented
+  static CharClass* New(size_t maxranges);
+
+  friend class CharClassBuilder;
+
+  bool folds_ascii_;
+  int nrunes_;
+  RuneRange *ranges_;
+  int nranges_;
+
+  CharClass(const CharClass&) = delete;
+  CharClass& operator=(const CharClass&) = delete;
+};
+
+class Regexp {
+ public:
+
+  // Flags for parsing.  Can be ORed together.
+  enum ParseFlags {
+    NoParseFlags  = 0,
+    FoldCase      = 1<<0,   // Fold case during matching (case-insensitive).
+    Literal       = 1<<1,   // Treat s as literal string instead of a regexp.
+    ClassNL       = 1<<2,   // Allow char classes like [^a-z] and \D and \s
+                            // and [[:space:]] to match newline.
+    DotNL         = 1<<3,   // Allow . to match newline.
+    MatchNL       = ClassNL | DotNL,
+    OneLine       = 1<<4,   // Treat ^ and $ as only matching at beginning and
+                            // end of text, not around embedded newlines.
+                            // (Perl's default)
+    Latin1        = 1<<5,   // Regexp and text are in Latin1, not UTF-8.
+    NonGreedy     = 1<<6,   // Repetition operators are non-greedy by default.
+    PerlClasses   = 1<<7,   // Allow Perl character classes like \d.
+    PerlB         = 1<<8,   // Allow Perl's \b and \B.
+    PerlX         = 1<<9,   // Perl extensions:
+                            //   non-capturing parens - (?: )
+                            //   non-greedy operators - *? +? ?? {}?
+                            //   flag edits - (?i) (?-i) (?i: )
+                            //     i - FoldCase
+                            //     m - !OneLine
+                            //     s - DotNL
+                            //     U - NonGreedy
+                            //   line ends: \A \z
+                            //   \Q and \E to disable/enable metacharacters
+                            //   (?P<name>expr) for named captures
+                            //   \C to match any single byte
+    UnicodeGroups = 1<<10,  // Allow \p{Han} for Unicode Han group
+                            //   and \P{Han} for its negation.
+    NeverNL       = 1<<11,  // Never match NL, even if the regexp mentions
+                            //   it explicitly.
+    NeverCapture  = 1<<12,  // Parse all parens as non-capturing.
+
+    // As close to Perl as we can get.
+    LikePerl      = ClassNL | OneLine | PerlClasses | PerlB | PerlX |
+                    UnicodeGroups,
+
+    // Internal use only.
+    WasDollar     = 1<<13,  // on kRegexpEndText: was $ in regexp text
+    AllParseFlags = (1<<14)-1,
+  };
+
+  // Get.  No set, Regexps are logically immutable once created.
+  RegexpOp op() { return static_cast<RegexpOp>(op_); }
+  int nsub() { return nsub_; }
+  bool simple() { return simple_ != 0; }
+  ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
+  int Ref();  // For testing.
+
+  Regexp** sub() {
+    if(nsub_ <= 1)
+      return &subone_;
+    else
+      return submany_;
+  }
+
+  int min() { DCHECK_EQ(op_, kRegexpRepeat); return arguments.repeat.min_; }
+  int max() { DCHECK_EQ(op_, kRegexpRepeat); return arguments.repeat.max_; }
+  Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return arguments.rune_; }
+  CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return arguments.char_class.cc_; }
+  int cap() { DCHECK_EQ(op_, kRegexpCapture); return arguments.capture.cap_; }
+  const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return arguments.capture.name_; }
+  Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return arguments.literal_string.runes_; }
+  int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return arguments.literal_string.nrunes_; }
+  int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return arguments.match_id_; }
+
+  // Increments reference count, returns object as convenience.
+  Regexp* Incref();
+
+  // Decrements reference count and deletes this object if count reaches 0.
+  void Decref();
+
+  // Parses string s to produce regular expression, returned.
+  // Caller must release return value with re->Decref().
+  // On failure, sets *status (if status != NULL) and returns NULL.
+  static Regexp* Parse(const StringPiece& s, ParseFlags flags,
+                       RegexpStatus* status);
+
+  // Returns a _new_ simplified version of the current regexp.
+  // Does not edit the current regexp.
+  // Caller must release return value with re->Decref().
+  // Simplified means that counted repetition has been rewritten
+  // into simpler terms and all Perl/POSIX features have been
+  // removed.  The result will capture exactly the same
+  // subexpressions the original did, unless formatted with ToString.
+  Regexp* Simplify();
+  friend class CoalesceWalker;
+  friend class SimplifyWalker;
+
+  // Parses the regexp src and then simplifies it and sets *dst to the
+  // string representation of the simplified form.  Returns true on success.
+  // Returns false and sets *status (if status != NULL) on parse error.
+  static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
+                             std::string* dst, RegexpStatus* status);
+
+  // Returns the number of capturing groups in the regexp.
+  int NumCaptures();
+  friend class NumCapturesWalker;
+
+  // Returns a map from names to capturing group indices,
+  // or NULL if the regexp contains no named capture groups.
+  // The caller is responsible for deleting the map.
+  std::map<std::string, int>* NamedCaptures();
+
+  // Returns a map from capturing group indices to capturing group
+  // names or NULL if the regexp contains no named capture groups. The
+  // caller is responsible for deleting the map.
+  std::map<int, std::string>* CaptureNames();
+
+  // Returns a string representation of the current regexp,
+  // using as few parentheses as possible.
+  std::string ToString();
+
+  // Convenience functions.  They consume the passed reference,
+  // so in many cases you should use, e.g., Plus(re->Incref(), flags).
+  // They do not consume allocated arrays like subs or runes.
+  static Regexp* Plus(Regexp* sub, ParseFlags flags);
+  static Regexp* Star(Regexp* sub, ParseFlags flags);
+  static Regexp* Quest(Regexp* sub, ParseFlags flags);
+  static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags);
+  static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags);
+  static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap);
+  static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max);
+  static Regexp* NewLiteral(Rune rune, ParseFlags flags);
+  static Regexp* NewCharClass(CharClass* cc, ParseFlags flags);
+  static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags);
+  static Regexp* HaveMatch(int match_id, ParseFlags flags);
+
+  // Like Alternate but does not factor out common prefixes.
+  static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags);
+
+  // Debugging function.  Returns string format for regexp
+  // that makes structure clear.  Does NOT use regexp syntax.
+  std::string Dump();
+
+  // Helper traversal class, defined fully in walker-inl.h.
+  template<typename T> class Walker;
+
+  // Compile to Prog.  See prog.h
+  // Reverse prog expects to be run over text backward.
+  // Construction and execution of prog will
+  // stay within approximately max_mem bytes of memory.
+  // If max_mem <= 0, a reasonable default is used.
+  Prog* CompileToProg(int64_t max_mem);
+  Prog* CompileToReverseProg(int64_t max_mem);
+
+  // Whether to expect this library to find exactly the same answer as PCRE
+  // when running this regexp.  Most regexps do mimic PCRE exactly, but a few
+  // obscure cases behave differently.  Technically this is more a property
+  // of the Prog than the Regexp, but the computation is much easier to do
+  // on the Regexp.  See mimics_pcre.cc for the exact conditions.
+  bool MimicsPCRE();
+
+  // Benchmarking function.
+  void NullWalk();
+
+  // Whether every match of this regexp must be anchored and
+  // begin with a non-empty fixed string (perhaps after ASCII
+  // case-folding).  If so, returns the prefix and the sub-regexp that
+  // follows it.
+  // Callers should expect *prefix, *foldcase and *suffix to be "zeroed"
+  // regardless of the return value.
+  bool RequiredPrefix(std::string* prefix, bool* foldcase,
+                      Regexp** suffix);
+
+  // Whether every match of this regexp must be unanchored and
+  // begin with a non-empty fixed string (perhaps after ASCII
+  // case-folding).  If so, returns the prefix.
+  // Callers should expect *prefix and *foldcase to be "zeroed"
+  // regardless of the return value.
+  bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
+
+  // Controls the maximum repeat count permitted by the parser.
+  // FOR FUZZING ONLY.
+  static void FUZZING_ONLY_set_maximum_repeat_count(int i);
+
+ private:
+  // Constructor allocates vectors as appropriate for operator.
+  explicit Regexp(RegexpOp op, ParseFlags parse_flags);
+
+  // Use Decref() instead of delete to release Regexps.
+  // This is private to catch deletes at compile time.
+  ~Regexp();
+  void Destroy();
+  bool QuickDestroy();
+
+  // Helpers for Parse.  Listed here so they can edit Regexps.
+  class ParseState;
+
+  friend class ParseState;
+  friend bool ParseCharClass(StringPiece* s, Regexp** out_re,
+                             RegexpStatus* status);
+
+  // Helper for testing [sic].
+  friend bool RegexpEqualTestingOnly(Regexp*, Regexp*);
+
+  // Computes whether Regexp is already simple.
+  bool ComputeSimple();
+
+  // Constructor that generates a Star, Plus or Quest,
+  // squashing the pair if sub is also a Star, Plus or Quest.
+  static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags);
+
+  // Constructor that generates a concatenation or alternation,
+  // enforcing the limit on the number of subexpressions for
+  // a particular Regexp.
+  static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs,
+                                   ParseFlags flags, bool can_factor);
+
+  // Returns the leading string that re starts with.
+  // The returned Rune* points into a piece of re,
+  // so it must not be used after the caller calls re->Decref().
+  static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags);
+
+  // Removes the first n leading runes from the beginning of re.
+  // Edits re in place.
+  static void RemoveLeadingString(Regexp* re, int n);
+
+  // Returns the leading regexp in re's top-level concatenation.
+  // The returned Regexp* points at re or a sub-expression of re,
+  // so it must not be used after the caller calls re->Decref().
+  static Regexp* LeadingRegexp(Regexp* re);
+
+  // Removes LeadingRegexp(re) from re and returns the remainder.
+  // Might edit re in place.
+  static Regexp* RemoveLeadingRegexp(Regexp* re);
+
+  // Simplifies an alternation of literal strings by factoring out
+  // common prefixes.
+  static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
+  friend class FactorAlternationImpl;
+
+  // Is a == b?  Only efficient on regexps that have not been through
+  // Simplify yet - the expansion of a kRegexpRepeat will make this
+  // take a long time.  Do not call on such regexps, hence private.
+  static bool Equal(Regexp* a, Regexp* b);
+
+  // Allocate space for n sub-regexps.
+  void AllocSub(int n) {
+    DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
+    if (n > 1)
+      submany_ = new Regexp*[n];
+    nsub_ = static_cast<uint16_t>(n);
+  }
+
+  // Add Rune to LiteralString
+  void AddRuneToString(Rune r);
+
+  // Swaps this with that, in place.
+  void Swap(Regexp *that);
+
+  // Operator.  See description of operators above.
+  // uint8_t instead of RegexpOp to control space usage.
+  uint8_t op_;
+
+  // Is this regexp structure already simple
+  // (has it been returned by Simplify)?
+  // uint8_t instead of bool to control space usage.
+  uint8_t simple_;
+
+  // Flags saved from parsing and used during execution.
+  // (Only FoldCase is used.)
+  // uint16_t instead of ParseFlags to control space usage.
+  uint16_t parse_flags_;
+
+  // Reference count.  Exists so that SimplifyRegexp can build
+  // regexp structures that are dags rather than trees to avoid
+  // exponential blowup in space requirements.
+  // uint16_t to control space usage.
+  // The standard regexp routines will never generate a
+  // ref greater than the maximum repeat count (kMaxRepeat),
+  // but even so, Incref and Decref consult an overflow map
+  // when ref_ reaches kMaxRef.
+  uint16_t ref_;
+  static const uint16_t kMaxRef = 0xffff;
+
+  // Subexpressions.
+  // uint16_t to control space usage.
+  // Concat and Alternate handle larger numbers of subexpressions
+  // by building concatenation or alternation trees.
+  // Other routines should call Concat or Alternate instead of
+  // filling in sub() by hand.
+  uint16_t nsub_;
+  static const uint16_t kMaxNsub = 0xffff;
+  union {
+    Regexp** submany_;  // if nsub_ > 1
+    Regexp* subone_;  // if nsub_ == 1
+  };
+
+  // Extra space for parse and teardown stacks.
+  Regexp* down_;
+
+  // Arguments to operator.  See description of operators above.
+  union {
+    struct {  // Repeat
+      int max_;
+      int min_;
+    } repeat;
+    struct {  // Capture
+      int cap_;
+      std::string* name_;
+    } capture;
+    struct {  // LiteralString
+      int nrunes_;
+      Rune* runes_;
+    } literal_string;
+    struct {  // CharClass
+      // These two could be in separate union members,
+      // but it wouldn't save any space (there are other two-word structs)
+      // and keeping them separate avoids confusion during parsing.
+      CharClass* cc_;
+      CharClassBuilder* ccb_;
+    } char_class;
+    Rune rune_;  // Literal
+    int match_id_;  // HaveMatch
+    void *the_union_[2];  // as big as any other element, for memset
+  } arguments;
+
+  Regexp(const Regexp&) = delete;
+  Regexp& operator=(const Regexp&) = delete;
+};
+
+// Character class set: contains non-overlapping, non-abutting RuneRanges.
+typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet;
+
+class CharClassBuilder {
+ public:
+  CharClassBuilder();
+
+  typedef RuneRangeSet::iterator iterator;
+  iterator begin() { return ranges_.begin(); }
+  iterator end() { return ranges_.end(); }
+
+  int size() { return nrunes_; }
+  bool empty() { return nrunes_ == 0; }
+  bool full() { return nrunes_ == Runemax+1; }
+
+  bool Contains(Rune r);
+  bool FoldsASCII();
+  bool AddRange(Rune lo, Rune hi);  // returns whether class changed
+  CharClassBuilder* Copy();
+  void AddCharClass(CharClassBuilder* cc);
+  void Negate();
+  void RemoveAbove(Rune r);
+  CharClass* GetCharClass();
+  void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);
+
+ private:
+  static const uint32_t AlphaMask = (1<<26) - 1;
+  uint32_t upper_;  // bitmap of A-Z
+  uint32_t lower_;  // bitmap of a-z
+  int nrunes_;
+  RuneRangeSet ranges_;
+
+  CharClassBuilder(const CharClassBuilder&) = delete;
+  CharClassBuilder& operator=(const CharClassBuilder&) = delete;
+};
+
+// Bitwise ops on ParseFlags produce ParseFlags.
+inline Regexp::ParseFlags operator|(Regexp::ParseFlags a,
+                                    Regexp::ParseFlags b) {
+  return static_cast<Regexp::ParseFlags>(
+      static_cast<int>(a) | static_cast<int>(b));
+}
+
+inline Regexp::ParseFlags operator^(Regexp::ParseFlags a,
+                                    Regexp::ParseFlags b) {
+  return static_cast<Regexp::ParseFlags>(
+      static_cast<int>(a) ^ static_cast<int>(b));
+}
+
+inline Regexp::ParseFlags operator&(Regexp::ParseFlags a,
+                                    Regexp::ParseFlags b) {
+  return static_cast<Regexp::ParseFlags>(
+      static_cast<int>(a) & static_cast<int>(b));
+}
+
+inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) {
+  // Attempting to produce a value out of enum's range has undefined behaviour.
+  return static_cast<Regexp::ParseFlags>(
+      ~static_cast<int>(a) & static_cast<int>(Regexp::AllParseFlags));
+}
+
+}  // namespace re2
+
+#endif  // RE2_REGEXP_H_
--- a/external/duckdb/third_party/re2/re2/set.cc
+++ b/external/duckdb/third_party/re2/re2/set.cc
@@ -0,0 +1,176 @@
+// Copyright 2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/set.h"
+
+#include <stddef.h>
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "re2/pod_array.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+#include "re2/stringpiece.h"
+
+namespace duckdb_re2 {
+
+RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
+    : options_(options),
+      anchor_(anchor),
+      compiled_(false),
+      size_(0) {
+  options_.set_never_capture(true);  // might unblock some optimisations
+}
+
+RE2::Set::~Set() {
+  for (size_t i = 0; i < elem_.size(); i++)
+    elem_[i].second->Decref();
+}
+
+RE2::Set::Set(Set&& other)
+    : options_(other.options_),
+      anchor_(other.anchor_),
+      elem_(std::move(other.elem_)),
+      compiled_(other.compiled_),
+      size_(other.size_),
+      prog_(std::move(other.prog_)) {
+  other.elem_.clear();
+  other.elem_.shrink_to_fit();
+  other.compiled_ = false;
+  other.size_ = 0;
+  other.prog_.reset();
+}
+
+RE2::Set& RE2::Set::operator=(Set&& other) {
+  this->~Set();
+  (void) new (this) Set(std::move(other));
+  return *this;
+}
+
+int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
+  if (compiled_) {
+    LOG(DFATAL) << "RE2::Set::Add() called after compiling";
+    return -1;
+  }
+
+  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
+    options_.ParseFlags());
+  RegexpStatus status;
+  duckdb_re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
+  if (re == NULL) {
+    if (error != NULL)
+      *error = status.Text();
+    if (options_.log_errors())
+      LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
+    return -1;
+  }
+
+  // Concatenate with match index and push on vector.
+  int n = static_cast<int>(elem_.size());
+  duckdb_re2::Regexp* m = duckdb_re2::Regexp::HaveMatch(n, pf);
+  if (re->op() == kRegexpConcat) {
+    int nsub = re->nsub();
+    PODArray<duckdb_re2::Regexp*> sub(nsub + 1);
+    for (int i = 0; i < nsub; i++)
+      sub[i] = re->sub()[i]->Incref();
+    sub[nsub] = m;
+    re->Decref();
+    re = duckdb_re2::Regexp::Concat(sub.data(), nsub + 1, pf);
+  } else {
+    duckdb_re2::Regexp* sub[2];
+    sub[0] = re;
+    sub[1] = m;
+    re = duckdb_re2::Regexp::Concat(sub, 2, pf);
+  }
+  elem_.emplace_back(std::string(pattern), re);
+  return n;
+}
+
+bool RE2::Set::Compile() {
+  if (compiled_) {
+    LOG(DFATAL) << "RE2::Set::Compile() called more than once";
+    return false;
+  }
+  compiled_ = true;
+  size_ = static_cast<int>(elem_.size());
+
+  // Sort the elements by their patterns. This is good enough for now
+  // until we have a Regexp comparison function. (Maybe someday...)
+  std::sort(elem_.begin(), elem_.end(),
+            [](const Elem& a, const Elem& b) -> bool {
+              return a.first < b.first;
+            });
+
+  PODArray<duckdb_re2::Regexp*> sub(size_);
+  for (int i = 0; i < size_; i++)
+    sub[i] = elem_[i].second;
+  elem_.clear();
+  elem_.shrink_to_fit();
+
+  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
+    options_.ParseFlags());
+  duckdb_re2::Regexp* re = duckdb_re2::Regexp::Alternate(sub.data(), size_, pf);
+
+  prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
+  re->Decref();
+  return prog_ != nullptr;
+}
+
+bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
+  return Match(text, v, NULL);
+}
+
+bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
+                     ErrorInfo* error_info) const {
+  if (!compiled_) {
+    if (error_info != NULL)
+      error_info->kind = kNotCompiled;
+    LOG(DFATAL) << "RE2::Set::Match() called before compiling";
+    return false;
+  }
+#ifdef RE2_HAVE_THREAD_LOCAL
+  hooks::context = NULL;
+#endif
+  bool dfa_failed = false;
+  duckdb_base_std::unique_ptr<SparseSet> matches;
+  if (v != NULL) {
+    matches.reset(new SparseSet(size_));
+    v->clear();
+  }
+  bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
+                              NULL, &dfa_failed, matches.get());
+  if (dfa_failed) {
+    if (options_.log_errors())
+      LOG(ERROR) << "DFA out of memory: "
+                 << "program size " << prog_->size() << ", "
+                 << "list count " << prog_->list_count() << ", "
+                 << "bytemap range " << prog_->bytemap_range();
+    if (error_info != NULL)
+      error_info->kind = kOutOfMemory;
+    return false;
+  }
+  if (ret == false) {
+    if (error_info != NULL)
+      error_info->kind = kNoError;
+    return false;
+  }
+  if (v != NULL) {
+    if (matches->empty()) {
+      if (error_info != NULL)
+        error_info->kind = kInconsistent;
+      LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
+      return false;
+    }
+    v->assign(matches->begin(), matches->end());
+  }
+  if (error_info != NULL)
+    error_info->kind = kNoError;
+  return true;
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/set.h
+++ b/external/duckdb/third_party/re2/re2/set.h
@@ -0,0 +1,91 @@
+// Copyright 2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_SET_H_
+#define RE2_SET_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "re2/re2.h"
+
+#ifndef DUCKDB_BASE_STD
+namespace duckdb_base_std {
+	using ::std::unique_ptr;
+} // namespace duckdb_base_std
+#endif
+
+namespace duckdb_re2 {
+class Prog;
+class Regexp;
+}  // namespace re2
+
+namespace duckdb_re2 {
+
+// An RE2::Set represents a collection of regexps that can
+// be searched for simultaneously.
+class RE2::Set {
+ public:
+  enum ErrorKind {
+    kNoError = 0,
+    kNotCompiled,   // The set is not compiled.
+    kOutOfMemory,   // The DFA ran out of memory.
+    kInconsistent,  // The result is inconsistent. This should never happen.
+  };
+
+  struct ErrorInfo {
+    ErrorKind kind;
+  };
+
+  Set(const RE2::Options& options, RE2::Anchor anchor);
+  ~Set();
+
+  // Not copyable.
+  Set(const Set&) = delete;
+  Set& operator=(const Set&) = delete;
+  // Movable.
+  Set(Set&& other);
+  Set& operator=(Set&& other);
+
+  // Adds pattern to the set using the options passed to the constructor.
+  // Returns the index that will identify the regexp in the output of Match(),
+  // or -1 if the regexp cannot be parsed.
+  // Indices are assigned in sequential order starting from 0.
+  // Errors do not increment the index; if error is not NULL, *error will hold
+  // the error message from the parser.
+  int Add(const StringPiece& pattern, std::string* error);
+
+  // Compiles the set in preparation for matching.
+  // Returns false if the compiler runs out of memory.
+  // Add() must not be called again after Compile().
+  // Compile() must be called before Match().
+  bool Compile();
+
+  // Returns true if text matches at least one of the regexps in the set.
+  // Fills v (if not NULL) with the indices of the matching regexps.
+  // Callers must not expect v to be sorted.
+  bool Match(const StringPiece& text, std::vector<int>* v) const;
+
+  // As above, but populates error_info (if not NULL) when none of the regexps
+  // in the set matched. This can inform callers when DFA execution fails, for
+  // example, because they might wish to handle that case differently.
+  bool Match(const StringPiece& text, std::vector<int>* v,
+             ErrorInfo* error_info) const;
+
+ private:
+  typedef std::pair<std::string, duckdb_re2::Regexp*> Elem;
+
+  RE2::Options options_;
+  RE2::Anchor anchor_;
+  std::vector<Elem> elem_;
+  bool compiled_;
+  int size_;
+  duckdb_base_std::unique_ptr<duckdb_re2::Prog> prog_;
+};
+
+}  // namespace re2
+
+#endif  // RE2_SET_H_
--- a/external/duckdb/third_party/re2/re2/simplify.cc
+++ b/external/duckdb/third_party/re2/re2/simplify.cc
@@ -0,0 +1,665 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Rewrite POSIX and other features in re
+// to use simple extended regular expression features.
+// Also sort and simplify character classes.
+
+#include <string>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/utf.h"
+#include "re2/pod_array.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
+
+namespace duckdb_re2 {
+
+// Parses the regexp src and then simplifies it and sets *dst to the
+// string representation of the simplified form.  Returns true on success.
+// Returns false and sets *error (if error != NULL) on error.
+bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
+                            std::string* dst, RegexpStatus* status) {
+  Regexp* re = Parse(src, flags, status);
+  if (re == NULL)
+    return false;
+  Regexp* sre = re->Simplify();
+  re->Decref();
+  if (sre == NULL) {
+    if (status) {
+      status->set_code(kRegexpInternalError);
+      status->set_error_arg(src);
+    }
+    return false;
+  }
+  *dst = sre->ToString();
+  sre->Decref();
+  return true;
+}
+
+// Assuming the simple_ flags on the children are accurate,
+// is this Regexp* simple?
+bool Regexp::ComputeSimple() {
+  Regexp** subs;
+  switch (op_) {
+    case kRegexpNoMatch:
+    case kRegexpEmptyMatch:
+    case kRegexpLiteral:
+    case kRegexpLiteralString:
+    case kRegexpBeginLine:
+    case kRegexpEndLine:
+    case kRegexpBeginText:
+    case kRegexpWordBoundary:
+    case kRegexpNoWordBoundary:
+    case kRegexpEndText:
+    case kRegexpAnyChar:
+    case kRegexpAnyByte:
+    case kRegexpHaveMatch:
+      return true;
+    case kRegexpConcat:
+    case kRegexpAlternate:
+      // These are simple as long as the subpieces are simple.
+      subs = sub();
+      for (int i = 0; i < nsub_; i++)
+        if (!subs[i]->simple())
+          return false;
+      return true;
+    case kRegexpCharClass:
+      // Simple as long as the char class is not empty, not full.
+      if (arguments.char_class.ccb_ != NULL)
+        return !arguments.char_class.ccb_->empty() && !arguments.char_class.ccb_->full();
+      return !arguments.char_class.cc_->empty() && !arguments.char_class.cc_->full();
+    case kRegexpCapture:
+      subs = sub();
+      return subs[0]->simple();
+    case kRegexpStar:
+    case kRegexpPlus:
+    case kRegexpQuest:
+      subs = sub();
+      if (!subs[0]->simple())
+        return false;
+      switch (subs[0]->op_) {
+        case kRegexpStar:
+        case kRegexpPlus:
+        case kRegexpQuest:
+        case kRegexpEmptyMatch:
+        case kRegexpNoMatch:
+          return false;
+        default:
+          break;
+      }
+      return true;
+    case kRegexpRepeat:
+      return false;
+  }
+  LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
+  return false;
+}
+
+// Walker subclass used by Simplify.
+// Coalesces runs of star/plus/quest/repeat of the same literal along with any
+// occurrences of that literal into repeats of that literal. It also works for
+// char classes, any char and any byte.
+// PostVisit creates the coalesced result, which should then be simplified.
+class CoalesceWalker : public Regexp::Walker<Regexp*> {
+ public:
+  CoalesceWalker() {}
+  virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
+                            Regexp** child_args, int nchild_args);
+  virtual Regexp* Copy(Regexp* re);
+  virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
+
+ private:
+  // These functions are declared inside CoalesceWalker so that
+  // they can edit the private fields of the Regexps they construct.
+
+  // Returns true if r1 and r2 can be coalesced. In particular, ensures that
+  // the parse flags are consistent. (They will not be checked again later.)
+  static bool CanCoalesce(Regexp* r1, Regexp* r2);
+
+  // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
+  // will be empty match and the coalesced op. In other cases, where part of a
+  // literal string was removed to be coalesced, the array elements afterwards
+  // will be the coalesced op and the remainder of the literal string.
+  static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
+
+  CoalesceWalker(const CoalesceWalker&) = delete;
+  CoalesceWalker& operator=(const CoalesceWalker&) = delete;
+};
+
+// Walker subclass used by Simplify.
+// The simplify walk is purely post-recursive: given the simplified children,
+// PostVisit creates the simplified result.
+// The child_args are simplified Regexp*s.
+class SimplifyWalker : public Regexp::Walker<Regexp*> {
+ public:
+  SimplifyWalker() {}
+  virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
+  virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
+                            Regexp** child_args, int nchild_args);
+  virtual Regexp* Copy(Regexp* re);
+  virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
+
+ private:
+  // These functions are declared inside SimplifyWalker so that
+  // they can edit the private fields of the Regexps they construct.
+
+  // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
+  // Caller must Decref return value when done with it.
+  static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
+
+  // Simplifies the expression re{min,max} in terms of *, +, and ?.
+  // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
+  // Caller must Decref return value when done with it.
+  static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
+                                Regexp::ParseFlags parse_flags);
+
+  // Simplifies a character class by expanding any named classes
+  // into rune ranges.  Does not edit re.  Does not consume ref to re.
+  // Caller must Decref return value when done with it.
+  static Regexp* SimplifyCharClass(Regexp* re);
+
+  SimplifyWalker(const SimplifyWalker&) = delete;
+  SimplifyWalker& operator=(const SimplifyWalker&) = delete;
+};
+
+// Simplifies a regular expression, returning a new regexp.
+// The new regexp uses traditional Unix egrep features only,
+// plus the Perl (?:) non-capturing parentheses.
+// Otherwise, no POSIX or Perl additions.  The new regexp
+// captures exactly the same subexpressions (with the same indices)
+// as the original.
+// Does not edit current object.
+// Caller must Decref() return value when done with it.
+
+Regexp* Regexp::Simplify() {
+  CoalesceWalker cw;
+  Regexp* cre = cw.Walk(this, NULL);
+  if (cre == NULL)
+    return NULL;
+  if (cw.stopped_early()) {
+    cre->Decref();
+    return NULL;
+  }
+  SimplifyWalker sw;
+  Regexp* sre = sw.Walk(cre, NULL);
+  cre->Decref();
+  if (sre == NULL)
+    return NULL;
+  if (sw.stopped_early()) {
+    sre->Decref();
+    return NULL;
+  }
+  return sre;
+}
+
+#define Simplify DontCallSimplify  // Avoid accidental recursion
+
+// Utility function for PostVisit implementations that compares re->sub() with
+// child_args to determine whether any child_args changed. In the common case,
+// where nothing changed, calls Decref() for all child_args and returns false,
+// so PostVisit must return re->Incref(). Otherwise, returns true.
+static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
+  for (int i = 0; i < re->nsub(); i++) {
+    Regexp* sub = re->sub()[i];
+    Regexp* newsub = child_args[i];
+    if (newsub != sub)
+      return true;
+  }
+  for (int i = 0; i < re->nsub(); i++) {
+    Regexp* newsub = child_args[i];
+    newsub->Decref();
+  }
+  return false;
+}
+
+Regexp* CoalesceWalker::Copy(Regexp* re) {
+  return re->Incref();
+}
+
+Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
+  // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
+#endif
+  return re->Incref();
+}
+
+Regexp* CoalesceWalker::PostVisit(Regexp* re,
+                                  Regexp* parent_arg,
+                                  Regexp* pre_arg,
+                                  Regexp** child_args,
+                                  int nchild_args) {
+  if (re->nsub() == 0)
+    return re->Incref();
+
+  if (re->op() != kRegexpConcat) {
+    if (!ChildArgsChanged(re, child_args))
+      return re->Incref();
+
+    // Something changed. Build a new op.
+    Regexp* nre = new Regexp(re->op(), re->parse_flags());
+    nre->AllocSub(re->nsub());
+    Regexp** nre_subs = nre->sub();
+    for (int i = 0; i < re->nsub(); i++)
+      nre_subs[i] = child_args[i];
+    // Repeats and Captures have additional data that must be copied.
+    if (re->op() == kRegexpRepeat) {
+      nre->arguments.repeat.min_ = re->min();
+      nre->arguments.repeat.max_ = re->max();
+    } else if (re->op() == kRegexpCapture) {
+      nre->arguments.capture.cap_ = re->cap();
+    }
+    return nre;
+  }
+
+  bool can_coalesce = false;
+  for (int i = 0; i < re->nsub(); i++) {
+    if (i+1 < re->nsub() &&
+        CanCoalesce(child_args[i], child_args[i+1])) {
+      can_coalesce = true;
+      break;
+    }
+  }
+  if (!can_coalesce) {
+    if (!ChildArgsChanged(re, child_args))
+      return re->Incref();
+
+    // Something changed. Build a new op.
+    Regexp* nre = new Regexp(re->op(), re->parse_flags());
+    nre->AllocSub(re->nsub());
+    Regexp** nre_subs = nre->sub();
+    for (int i = 0; i < re->nsub(); i++)
+      nre_subs[i] = child_args[i];
+    return nre;
+  }
+
+  for (int i = 0; i < re->nsub(); i++) {
+    if (i+1 < re->nsub() &&
+        CanCoalesce(child_args[i], child_args[i+1]))
+      DoCoalesce(&child_args[i], &child_args[i+1]);
+  }
+  // Determine how many empty matches were left by DoCoalesce.
+  int n = 0;
+  for (int i = n; i < re->nsub(); i++) {
+    if (child_args[i]->op() == kRegexpEmptyMatch)
+      n++;
+  }
+  // Build a new op.
+  Regexp* nre = new Regexp(re->op(), re->parse_flags());
+  nre->AllocSub(re->nsub() - n);
+  Regexp** nre_subs = nre->sub();
+  for (int i = 0, j = 0; i < re->nsub(); i++) {
+    if (child_args[i]->op() == kRegexpEmptyMatch) {
+      child_args[i]->Decref();
+      continue;
+    }
+    nre_subs[j] = child_args[i];
+    j++;
+  }
+  return nre;
+}
+
+bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
+  // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
+  // any byte.
+  if ((r1->op() == kRegexpStar ||
+       r1->op() == kRegexpPlus ||
+       r1->op() == kRegexpQuest ||
+       r1->op() == kRegexpRepeat) &&
+      (r1->sub()[0]->op() == kRegexpLiteral ||
+       r1->sub()[0]->op() == kRegexpCharClass ||
+       r1->sub()[0]->op() == kRegexpAnyChar ||
+       r1->sub()[0]->op() == kRegexpAnyByte)) {
+    // r2 must be a star/plus/quest/repeat of the same literal, char class,
+    // any char or any byte.
+    if ((r2->op() == kRegexpStar ||
+         r2->op() == kRegexpPlus ||
+         r2->op() == kRegexpQuest ||
+         r2->op() == kRegexpRepeat) &&
+        Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
+        // The parse flags must be consistent.
+        ((r1->parse_flags() & Regexp::NonGreedy) ==
+         (r2->parse_flags() & Regexp::NonGreedy))) {
+      return true;
+    }
+    // ... OR an occurrence of that literal, char class, any char or any byte
+    if (Regexp::Equal(r1->sub()[0], r2)) {
+      return true;
+    }
+    // ... OR a literal string that begins with that literal.
+    if (r1->sub()[0]->op() == kRegexpLiteral &&
+        r2->op() == kRegexpLiteralString &&
+        r2->runes()[0] == r1->sub()[0]->rune() &&
+        // The parse flags must be consistent.
+        ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
+         (r2->parse_flags() & Regexp::FoldCase))) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
+  Regexp* r1 = *r1ptr;
+  Regexp* r2 = *r2ptr;
+
+  Regexp* nre = Regexp::Repeat(
+      r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
+
+  switch (r1->op()) {
+    case kRegexpStar:
+      nre->arguments.repeat.min_ = 0;
+      nre->arguments.repeat.max_ = -1;
+      break;
+
+    case kRegexpPlus:
+      nre->arguments.repeat.min_ = 1;
+      nre->arguments.repeat.max_ = -1;
+      break;
+
+    case kRegexpQuest:
+      nre->arguments.repeat.min_ = 0;
+      nre->arguments.repeat.max_ = 1;
+      break;
+
+    case kRegexpRepeat:
+      nre->arguments.repeat.min_ = r1->min();
+      nre->arguments.repeat.max_ = r1->max();
+      break;
+
+    default:
+      nre->Decref();
+      LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
+      return;
+  }
+
+  switch (r2->op()) {
+    case kRegexpStar:
+      nre->arguments.repeat.max_ = -1;
+      goto LeaveEmpty;
+
+    case kRegexpPlus:
+      nre->arguments.repeat.min_++;
+      nre->arguments.repeat.max_ = -1;
+      goto LeaveEmpty;
+
+    case kRegexpQuest:
+      if (nre->max() != -1)
+        nre->arguments.repeat.max_++;
+      goto LeaveEmpty;
+
+    case kRegexpRepeat:
+      nre->arguments.repeat.min_ += r2->min();
+      if (r2->max() == -1)
+        nre->arguments.repeat.max_ = -1;
+      else if (nre->max() != -1)
+        nre->arguments.repeat.max_ += r2->max();
+      goto LeaveEmpty;
+
+    case kRegexpLiteral:
+    case kRegexpCharClass:
+    case kRegexpAnyChar:
+    case kRegexpAnyByte:
+      nre->arguments.repeat.min_++;
+      if (nre->max() != -1)
+        nre->arguments.repeat.max_++;
+      goto LeaveEmpty;
+
+    LeaveEmpty:
+      *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
+      *r2ptr = nre;
+      break;
+
+    case kRegexpLiteralString: {
+      Rune r = r1->sub()[0]->rune();
+      // Determine how much of the literal string is removed.
+      // We know that we have at least one rune. :)
+      int n = 1;
+      while (n < r2->nrunes() && r2->runes()[n] == r)
+        n++;
+      nre->arguments.repeat.min_ += n;
+      if (nre->max() != -1)
+        nre->arguments.repeat.max_ += n;
+      if (n == r2->nrunes())
+        goto LeaveEmpty;
+      *r1ptr = nre;
+      *r2ptr = Regexp::LiteralString(
+          &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
+      break;
+    }
+
+    default:
+      nre->Decref();
+      LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
+      return;
+  }
+
+  r1->Decref();
+  r2->Decref();
+}
+
+Regexp* SimplifyWalker::Copy(Regexp* re) {
+  return re->Incref();
+}
+
+Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
+  // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
+#endif
+  return re->Incref();
+}
+
+Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
+  if (re->simple()) {
+    *stop = true;
+    return re->Incref();
+  }
+  return NULL;
+}
+
+Regexp* SimplifyWalker::PostVisit(Regexp* re,
+                                  Regexp* parent_arg,
+                                  Regexp* pre_arg,
+                                  Regexp** child_args,
+                                  int nchild_args) {
+  switch (re->op()) {
+    case kRegexpNoMatch:
+    case kRegexpEmptyMatch:
+    case kRegexpLiteral:
+    case kRegexpLiteralString:
+    case kRegexpBeginLine:
+    case kRegexpEndLine:
+    case kRegexpBeginText:
+    case kRegexpWordBoundary:
+    case kRegexpNoWordBoundary:
+    case kRegexpEndText:
+    case kRegexpAnyChar:
+    case kRegexpAnyByte:
+    case kRegexpHaveMatch:
+      // All these are always simple.
+      re->simple_ = true;
+      return re->Incref();
+
+    case kRegexpConcat:
+    case kRegexpAlternate: {
+      // These are simple as long as the subpieces are simple.
+      if (!ChildArgsChanged(re, child_args)) {
+        re->simple_ = true;
+        return re->Incref();
+      }
+      Regexp* nre = new Regexp(re->op(), re->parse_flags());
+      nre->AllocSub(re->nsub());
+      Regexp** nre_subs = nre->sub();
+      for (int i = 0; i < re->nsub(); i++)
+        nre_subs[i] = child_args[i];
+      nre->simple_ = true;
+      return nre;
+    }
+
+    case kRegexpCapture: {
+      Regexp* newsub = child_args[0];
+      if (newsub == re->sub()[0]) {
+        newsub->Decref();
+        re->simple_ = true;
+        return re->Incref();
+      }
+      Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
+      nre->AllocSub(1);
+      nre->sub()[0] = newsub;
+      nre->arguments.capture.cap_ = re->cap();
+      nre->simple_ = true;
+      return nre;
+    }
+
+    case kRegexpStar:
+    case kRegexpPlus:
+    case kRegexpQuest: {
+      Regexp* newsub = child_args[0];
+      // Special case: repeat the empty string as much as
+      // you want, but it's still the empty string.
+      if (newsub->op() == kRegexpEmptyMatch)
+        return newsub;
+
+      // These are simple as long as the subpiece is simple.
+      if (newsub == re->sub()[0]) {
+        newsub->Decref();
+        re->simple_ = true;
+        return re->Incref();
+      }
+
+      // These are also idempotent if flags are constant.
+      if (re->op() == newsub->op() &&
+          re->parse_flags() == newsub->parse_flags())
+        return newsub;
+
+      Regexp* nre = new Regexp(re->op(), re->parse_flags());
+      nre->AllocSub(1);
+      nre->sub()[0] = newsub;
+      nre->simple_ = true;
+      return nre;
+    }
+
+    case kRegexpRepeat: {
+      Regexp* newsub = child_args[0];
+      // Special case: repeat the empty string as much as
+      // you want, but it's still the empty string.
+      if (newsub->op() == kRegexpEmptyMatch)
+        return newsub;
+
+      Regexp* nre = SimplifyRepeat(newsub, re->arguments.repeat.min_, re->arguments.repeat.max_,
+                                   re->parse_flags());
+      newsub->Decref();
+      nre->simple_ = true;
+      return nre;
+    }
+
+    case kRegexpCharClass: {
+      Regexp* nre = SimplifyCharClass(re);
+      nre->simple_ = true;
+      return nre;
+    }
+  }
+
+  LOG(ERROR) << "Simplify case not handled: " << re->op();
+  return re->Incref();
+}
+
+// Creates a concatenation of two Regexp, consuming refs to re1 and re2.
+// Returns a new Regexp, handing the ref to the caller.
+Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
+                                Regexp::ParseFlags parse_flags) {
+  Regexp* re = new Regexp(kRegexpConcat, parse_flags);
+  re->AllocSub(2);
+  Regexp** subs = re->sub();
+  subs[0] = re1;
+  subs[1] = re2;
+  return re;
+}
+
+// Simplifies the expression re{min,max} in terms of *, +, and ?.
+// Returns a new regexp.  Does not edit re.  Does not consume reference to re.
+// Caller must Decref return value when done with it.
+// The result will *not* necessarily have the right capturing parens
+// if you call ToString() and re-parse it: (x){2} becomes (x)(x),
+// but in the Regexp* representation, both (x) are marked as $1.
+Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
+                                       Regexp::ParseFlags f) {
+  // x{n,} means at least n matches of x.
+  if (max == -1) {
+    // Special case: x{0,} is x*
+    if (min == 0)
+      return Regexp::Star(re->Incref(), f);
+
+    // Special case: x{1,} is x+
+    if (min == 1)
+      return Regexp::Plus(re->Incref(), f);
+
+    // General case: x{4,} is xxxx+
+    PODArray<Regexp*> nre_subs(min);
+    for (int i = 0; i < min-1; i++)
+      nre_subs[i] = re->Incref();
+    nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
+    return Regexp::Concat(nre_subs.data(), min, f);
+  }
+
+  // Special case: (x){0} matches only empty string.
+  if (min == 0 && max == 0)
+    return new Regexp(kRegexpEmptyMatch, f);
+
+  // Special case: x{1} is just x.
+  if (min == 1 && max == 1)
+    return re->Incref();
+
+  // General case: x{n,m} means n copies of x and m copies of x?.
+  // The machine will do less work if we nest the final m copies,
+  // so that x{2,5} = xx(x(x(x)?)?)?
+
+  // Build leading prefix: xx.  Capturing only on the last one.
+  Regexp* nre = NULL;
+  if (min > 0) {
+    PODArray<Regexp*> nre_subs(min);
+    for (int i = 0; i < min; i++)
+      nre_subs[i] = re->Incref();
+    nre = Regexp::Concat(nre_subs.data(), min, f);
+  }
+
+  // Build and attach suffix: (x(x(x)?)?)?
+  if (max > min) {
+    Regexp* suf = Regexp::Quest(re->Incref(), f);
+    for (int i = min+1; i < max; i++)
+      suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
+    if (nre == NULL)
+      nre = suf;
+    else
+      nre = Concat2(nre, suf, f);
+  }
+
+  if (nre == NULL) {
+    // Some degenerate case, like min > max, or min < max < 0.
+    // This shouldn't happen, because the parser rejects such regexps.
+    LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
+    return new Regexp(kRegexpNoMatch, f);
+  }
+
+  return nre;
+}
+
+// Simplifies a character class.
+// Caller must Decref return value when done with it.
+Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
+  CharClass* cc = re->cc();
+
+  // Special cases
+  if (cc->empty())
+    return new Regexp(kRegexpNoMatch, re->parse_flags());
+  if (cc->full())
+    return new Regexp(kRegexpAnyChar, re->parse_flags());
+
+  return re->Incref();
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/sparse_array.h
+++ b/external/duckdb/third_party/re2/re2/sparse_array.h
@@ -0,0 +1,392 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_SPARSE_ARRAY_H_
+#define RE2_SPARSE_ARRAY_H_
+
+// DESCRIPTION
+//
+// SparseArray<T>(m) is a map from integers in [0, m) to T values.
+// It requires (sizeof(T)+sizeof(int))*m memory, but it provides
+// fast iteration through the elements in the array and fast clearing
+// of the array.  The array has a concept of certain elements being
+// uninitialized (having no value).
+//
+// Insertion and deletion are constant time operations.
+//
+// Allocating the array is a constant time operation
+// when memory allocation is a constant time operation.
+//
+// Clearing the array is a constant time operation (unusual!).
+//
+// Iterating through the array is an O(n) operation, where n
+// is the number of items in the array (not O(m)).
+//
+// The array iterator visits entries in the order they were first
+// inserted into the array.  It is safe to add items to the array while
+// using an iterator: the iterator will visit indices added to the array
+// during the iteration, but will not re-visit indices whose values
+// change after visiting.  Thus SparseArray can be a convenient
+// implementation of a work queue.
+//
+// The SparseArray implementation is NOT thread-safe.  It is up to the
+// caller to make sure only one thread is accessing the array.  (Typically
+// these arrays are temporary values and used in situations where speed is
+// important.)
+//
+// The SparseArray interface does not present all the usual STL bells and
+// whistles.
+//
+// Implemented with reference to Briggs & Torczon, An Efficient
+// Representation for Sparse Sets, ACM Letters on Programming Languages
+// and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.
+//
+// Briggs & Torczon popularized this technique, but it had been known
+// long before their paper.  They point out that Aho, Hopcroft, and
+// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's
+// 1986 Programming Pearls both hint at the technique in exercises to the
+// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1
+// exercise 8).
+//
+// Briggs & Torczon describe a sparse set implementation.  I have
+// trivially generalized it to create a sparse array (actually the original
+// target of the AHU and Bentley exercises).
+
+// IMPLEMENTATION
+//
+// SparseArray is an array dense_ and an array sparse_ of identical size.
+// At any point, the number of elements in the sparse array is size_.
+//
+// The array dense_ contains the size_ elements in the sparse array (with
+// their indices),
+// in the order that the elements were first inserted.  This array is dense:
+// the size_ pairs are dense_[0] through dense_[size_-1].
+//
+// The array sparse_ maps from indices in [0,m) to indices in [0,size_).
+// For indices present in the array, dense_[sparse_[i]].index_ == i.
+// For indices not present in the array, sparse_ can contain any value at all,
+// perhaps outside the range [0, size_) but perhaps not.
+//
+// The lax requirement on sparse_ values makes clearing the array very easy:
+// set size_ to 0.  Lookups are slightly more complicated.
+// An index i has a value in the array if and only if:
+//   sparse_[i] is in [0, size_) AND
+//   dense_[sparse_[i]].index_ == i.
+// If both these properties hold, only then it is safe to refer to
+//   dense_[sparse_[i]].value_
+// as the value associated with index i.
+//
+// To insert a new entry, set sparse_[i] to size_,
+// initialize dense_[size_], and then increment size_.
+//
+// To make the sparse array as efficient as possible for non-primitive types,
+// elements may or may not be destroyed when they are deleted from the sparse
+// array through a call to resize(). They immediately become inaccessible, but
+// they are only guaranteed to be destroyed when the SparseArray destructor is
+// called.
+//
+// A moved-from SparseArray will be empty.
+
+// Doing this simplifies the logic below.
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "re2/pod_array.h"
+
+namespace duckdb_re2 {
+
+template<typename Value>
+class SparseArray {
+ public:
+  SparseArray();
+  explicit SparseArray(int max_size);
+  ~SparseArray();
+
+  // IndexValue pairs: exposed in SparseArray::iterator.
+  class IndexValue;
+
+  typedef IndexValue* iterator;
+  typedef const IndexValue* const_iterator;
+
+  SparseArray(const SparseArray& src);
+  SparseArray(SparseArray&& src);
+
+  SparseArray& operator=(const SparseArray& src);
+  SparseArray& operator=(SparseArray&& src);
+
+  // Return the number of entries in the array.
+  int size() const {
+    return size_;
+  }
+
+  // Indicate whether the array is empty.
+  int empty() const {
+    return size_ == 0;
+  }
+
+  // Iterate over the array.
+  iterator begin() {
+    return dense_.data();
+  }
+  iterator end() {
+    return dense_.data() + size_;
+  }
+
+  const_iterator begin() const {
+    return dense_.data();
+  }
+  const_iterator end() const {
+    return dense_.data() + size_;
+  }
+
+  // Change the maximum size of the array.
+  // Invalidates all iterators.
+  void resize(int new_max_size);
+
+  // Return the maximum size of the array.
+  // Indices can be in the range [0, max_size).
+  int max_size() const {
+    if (dense_.data() != NULL)
+      return dense_.size();
+    else
+      return 0;
+  }
+
+  // Clear the array.
+  void clear() {
+    size_ = 0;
+  }
+
+  // Check whether index i is in the array.
+  bool has_index(int i) const;
+
+  // Comparison function for sorting.
+  // Can sort the sparse array so that future iterations
+  // will visit indices in increasing order using
+  // std::sort(arr.begin(), arr.end(), arr.less);
+  static bool less(const IndexValue& a, const IndexValue& b);
+
+ public:
+  // Set the value at index i to v.
+  iterator set(int i, const Value& v) {
+    return SetInternal(true, i, v);
+  }
+
+  // Set the value at new index i to v.
+  // Fast but unsafe: only use if has_index(i) is false.
+  iterator set_new(int i, const Value& v) {
+    return SetInternal(false, i, v);
+  }
+
+  // Set the value at index i to v.
+  // Fast but unsafe: only use if has_index(i) is true.
+  iterator set_existing(int i, const Value& v) {
+    return SetExistingInternal(i, v);
+  }
+
+  // Get the value at index i.
+  // Fast but unsafe: only use if has_index(i) is true.
+  Value& get_existing(int i) {
+    assert(has_index(i));
+    return dense_[sparse_[i]].value_;
+  }
+  const Value& get_existing(int i) const {
+    assert(has_index(i));
+    return dense_[sparse_[i]].value_;
+  }
+
+ private:
+  iterator SetInternal(bool allow_existing, int i, const Value& v) {
+    DebugCheckInvariants();
+    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+      assert(false && "illegal index");
+      // Semantically, end() would be better here, but we already know
+      // the user did something stupid, so begin() insulates them from
+      // dereferencing an invalid pointer.
+      return begin();
+    }
+    if (!allow_existing) {
+      assert(!has_index(i));
+      create_index(i);
+    } else {
+      if (!has_index(i))
+        create_index(i);
+    }
+    return SetExistingInternal(i, v);
+  }
+
+  iterator SetExistingInternal(int i, const Value& v) {
+    DebugCheckInvariants();
+    assert(has_index(i));
+    dense_[sparse_[i]].value_ = v;
+    DebugCheckInvariants();
+    return dense_.data() + sparse_[i];
+  }
+
+  // Add the index i to the array.
+  // Only use if has_index(i) is known to be false.
+  // Since it doesn't set the value associated with i,
+  // this function is private, only intended as a helper
+  // for other methods.
+  void create_index(int i);
+
+  // In debug mode, verify that some invariant properties of the class
+  // are being maintained. This is called at the end of the constructor
+  // and at the beginning and end of all public non-const member functions.
+  void DebugCheckInvariants() const;
+
+  // Initializes memory for elements [min, max).
+  void MaybeInitializeMemory(int min, int max) {
+#if __has_feature(memory_sanitizer)
+    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
+#elif defined(RE2_ON_VALGRIND)
+    for (int i = min; i < max; i++) {
+      sparse_[i] = 0xababababU;
+    }
+#endif
+  }
+
+  int size_ = 0;
+  PODArray<int> sparse_;
+  PODArray<IndexValue> dense_;
+};
+
+template<typename Value>
+SparseArray<Value>::SparseArray() = default;
+
+template<typename Value>
+SparseArray<Value>::SparseArray(const SparseArray& src)
+    : size_(src.size_),
+      sparse_(src.max_size()),
+      dense_(src.max_size()) {
+  std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+  std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
+}
+
+template<typename Value>
+SparseArray<Value>::SparseArray(SparseArray&& src)
+    : size_(src.size_),
+      sparse_(std::move(src.sparse_)),
+      dense_(std::move(src.dense_)) {
+  src.size_ = 0;
+}
+
+template<typename Value>
+SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
+  // Construct these first for exception safety.
+  PODArray<int> a(src.max_size());
+  PODArray<IndexValue> b(src.max_size());
+
+  size_ = src.size_;
+  sparse_ = std::move(a);
+  dense_ = std::move(b);
+  std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+  std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
+  return *this;
+}
+
+template<typename Value>
+SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) {
+  size_ = src.size_;
+  sparse_ = std::move(src.sparse_);
+  dense_ = std::move(src.dense_);
+  src.size_ = 0;
+  return *this;
+}
+
+// IndexValue pairs: exposed in SparseArray::iterator.
+template<typename Value>
+class SparseArray<Value>::IndexValue {
+ public:
+  int index() const { return index_; }
+  Value& value() { return value_; }
+  const Value& value() const { return value_; }
+
+ private:
+  friend class SparseArray;
+  int index_;
+  Value value_;
+};
+
+// Change the maximum size of the array.
+// Invalidates all iterators.
+template<typename Value>
+void SparseArray<Value>::resize(int new_max_size) {
+  DebugCheckInvariants();
+  if (new_max_size > max_size()) {
+    const int old_max_size = max_size();
+
+    // Construct these first for exception safety.
+    PODArray<int> a(new_max_size);
+    PODArray<IndexValue> b(new_max_size);
+
+    std::copy_n(sparse_.data(), old_max_size, a.data());
+    std::copy_n(dense_.data(), old_max_size, b.data());
+
+    sparse_ = std::move(a);
+    dense_ = std::move(b);
+
+    MaybeInitializeMemory(old_max_size, new_max_size);
+  }
+  if (size_ > new_max_size)
+    size_ = new_max_size;
+  DebugCheckInvariants();
+}
+
+// Check whether index i is in the array.
+template<typename Value>
+bool SparseArray<Value>::has_index(int i) const {
+  assert(i >= 0);
+  assert(i < max_size());
+  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+    return false;
+  }
+  // Unsigned comparison avoids checking sparse_[i] < 0.
+  return (uint32_t)sparse_[i] < (uint32_t)size_ &&
+         dense_[sparse_[i]].index_ == i;
+}
+
+template<typename Value>
+void SparseArray<Value>::create_index(int i) {
+  assert(!has_index(i));
+  assert(size_ < max_size());
+  sparse_[i] = size_;
+  dense_[size_].index_ = i;
+  size_++;
+}
+
+template<typename Value> SparseArray<Value>::SparseArray(int max_size) :
+    sparse_(max_size), dense_(max_size) {
+  MaybeInitializeMemory(size_, max_size);
+  DebugCheckInvariants();
+}
+
+template<typename Value> SparseArray<Value>::~SparseArray() {
+  DebugCheckInvariants();
+}
+
+template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
+  assert(0 <= size_);
+  assert(size_ <= max_size());
+}
+
+// Comparison function for sorting.
+template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,
+                                                       const IndexValue& b) {
+  return a.index_ < b.index_;
+}
+
+}  // namespace re2
+
+#endif  // RE2_SPARSE_ARRAY_H_
--- a/external/duckdb/third_party/re2/re2/sparse_set.h
+++ b/external/duckdb/third_party/re2/re2/sparse_set.h
@@ -0,0 +1,264 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_SPARSE_SET_H_
+#define RE2_SPARSE_SET_H_
+
+// DESCRIPTION
+//
+// SparseSet(m) is a set of integers in [0, m).
+// It requires sizeof(int)*m memory, but it provides
+// fast iteration through the elements in the set and fast clearing
+// of the set.
+//
+// Insertion and deletion are constant time operations.
+//
+// Allocating the set is a constant time operation
+// when memory allocation is a constant time operation.
+//
+// Clearing the set is a constant time operation (unusual!).
+//
+// Iterating through the set is an O(n) operation, where n
+// is the number of items in the set (not O(m)).
+//
+// The set iterator visits entries in the order they were first
+// inserted into the set.  It is safe to add items to the set while
+// using an iterator: the iterator will visit indices added to the set
+// during the iteration, but will not re-visit indices whose values
+// change after visiting.  Thus SparseSet can be a convenient
+// implementation of a work queue.
+//
+// The SparseSet implementation is NOT thread-safe.  It is up to the
+// caller to make sure only one thread is accessing the set.  (Typically
+// these sets are temporary values and used in situations where speed is
+// important.)
+//
+// The SparseSet interface does not present all the usual STL bells and
+// whistles.
+//
+// Implemented with reference to Briggs & Torczon, An Efficient
+// Representation for Sparse Sets, ACM Letters on Programming Languages
+// and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.
+//
+// This is a specialization of sparse array; see sparse_array.h.
+
+// IMPLEMENTATION
+//
+// See sparse_array.h for implementation details.
+
+// Doing this simplifies the logic below.
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "re2/pod_array.h"
+
+namespace duckdb_re2 {
+
+template<typename Value>
+class SparseSetT {
+ public:
+  SparseSetT();
+  explicit SparseSetT(int max_size);
+  ~SparseSetT();
+
+  typedef int* iterator;
+  typedef const int* const_iterator;
+
+  // Return the number of entries in the set.
+  int size() const {
+    return size_;
+  }
+
+  // Indicate whether the set is empty.
+  int empty() const {
+    return size_ == 0;
+  }
+
+  // Iterate over the set.
+  iterator begin() {
+    return dense_.data();
+  }
+  iterator end() {
+    return dense_.data() + size_;
+  }
+
+  const_iterator begin() const {
+    return dense_.data();
+  }
+  const_iterator end() const {
+    return dense_.data() + size_;
+  }
+
+  // Change the maximum size of the set.
+  // Invalidates all iterators.
+  void resize(int new_max_size);
+
+  // Return the maximum size of the set.
+  // Indices can be in the range [0, max_size).
+  int max_size() const {
+    if (dense_.data() != NULL)
+      return dense_.size();
+    else
+      return 0;
+  }
+
+  // Clear the set.
+  void clear() {
+    size_ = 0;
+  }
+
+  // Check whether index i is in the set.
+  bool contains(int i) const;
+
+  // Comparison function for sorting.
+  // Can sort the sparse set so that future iterations
+  // will visit indices in increasing order using
+  // std::sort(arr.begin(), arr.end(), arr.less);
+  static bool less(int a, int b);
+
+ public:
+  // Insert index i into the set.
+  iterator insert(int i) {
+    return InsertInternal(true, i);
+  }
+
+  // Insert index i into the set.
+  // Fast but unsafe: only use if contains(i) is false.
+  iterator insert_new(int i) {
+    return InsertInternal(false, i);
+  }
+
+ private:
+  iterator InsertInternal(bool allow_existing, int i) {
+    DebugCheckInvariants();
+    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+      assert(false && "illegal index");
+      // Semantically, end() would be better here, but we already know
+      // the user did something stupid, so begin() insulates them from
+      // dereferencing an invalid pointer.
+      return begin();
+    }
+    if (!allow_existing) {
+      assert(!contains(i));
+      create_index(i);
+    } else {
+      if (!contains(i))
+        create_index(i);
+    }
+    DebugCheckInvariants();
+    return dense_.data() + sparse_[i];
+  }
+
+  // Add the index i to the set.
+  // Only use if contains(i) is known to be false.
+  // This function is private, only intended as a helper
+  // for other methods.
+  void create_index(int i);
+
+  // In debug mode, verify that some invariant properties of the class
+  // are being maintained. This is called at the end of the constructor
+  // and at the beginning and end of all public non-const member functions.
+  void DebugCheckInvariants() const;
+
+  // Initializes memory for elements [min, max).
+  void MaybeInitializeMemory(int min, int max) {
+#if __has_feature(memory_sanitizer)
+    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
+#elif defined(RE2_ON_VALGRIND)
+    for (int i = min; i < max; i++) {
+      sparse_[i] = 0xababababU;
+    }
+#endif
+  }
+
+  int size_ = 0;
+  PODArray<int> sparse_;
+  PODArray<int> dense_;
+};
+
+template<typename Value>
+SparseSetT<Value>::SparseSetT() = default;
+
+// Change the maximum size of the set.
+// Invalidates all iterators.
+template<typename Value>
+void SparseSetT<Value>::resize(int new_max_size) {
+  DebugCheckInvariants();
+  if (new_max_size > max_size()) {
+    const int old_max_size = max_size();
+
+    // Construct these first for exception safety.
+    PODArray<int> a(new_max_size);
+    PODArray<int> b(new_max_size);
+
+    std::copy_n(sparse_.data(), old_max_size, a.data());
+    std::copy_n(dense_.data(), old_max_size, b.data());
+
+    sparse_ = std::move(a);
+    dense_ = std::move(b);
+
+    MaybeInitializeMemory(old_max_size, new_max_size);
+  }
+  if (size_ > new_max_size)
+    size_ = new_max_size;
+  DebugCheckInvariants();
+}
+
+// Check whether index i is in the set.
+template<typename Value>
+bool SparseSetT<Value>::contains(int i) const {
+  assert(i >= 0);
+  assert(i < max_size());
+  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+    return false;
+  }
+  // Unsigned comparison avoids checking sparse_[i] < 0.
+  return (uint32_t)sparse_[i] < (uint32_t)size_ &&
+         dense_[sparse_[i]] == i;
+}
+
+template<typename Value>
+void SparseSetT<Value>::create_index(int i) {
+  assert(!contains(i));
+  assert(size_ < max_size());
+  sparse_[i] = size_;
+  dense_[size_] = i;
+  size_++;
+}
+
+template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
+    sparse_(max_size), dense_(max_size) {
+  MaybeInitializeMemory(size_, max_size);
+  DebugCheckInvariants();
+}
+
+template<typename Value> SparseSetT<Value>::~SparseSetT() {
+  DebugCheckInvariants();
+}
+
+template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
+  assert(0 <= size_);
+  assert(size_ <= max_size());
+}
+
+// Comparison function for sorting.
+template<typename Value> bool SparseSetT<Value>::less(int a, int b) {
+  return a < b;
+}
+
+typedef SparseSetT<void> SparseSet;
+
+}  // namespace re2
+
+#endif  // RE2_SPARSE_SET_H_
--- a/external/duckdb/third_party/re2/re2/stringpiece.cc
+++ b/external/duckdb/third_party/re2/re2/stringpiece.cc
@@ -0,0 +1,65 @@
+// Copyright 2004 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/stringpiece.h"
+
+#include <ostream>
+
+#include "util/util.h"
+
+namespace duckdb_re2 {
+
+const StringPiece::size_type StringPiece::npos;  // initialized in stringpiece.h
+
+StringPiece::size_type StringPiece::copy(char* buf, size_type n,
+                                         size_type pos) const {
+  size_type ret = std::min(size_ - pos, n);
+  memcpy(buf, data_ + pos, ret);
+  return ret;
+}
+
+StringPiece StringPiece::substr(size_type pos, size_type n) const {
+  if (pos > size_) pos = size_;
+  if (n > size_ - pos) n = size_ - pos;
+  return StringPiece(data_ + pos, n);
+}
+
+StringPiece::size_type StringPiece::find(const StringPiece& s,
+                                         size_type pos) const {
+  if (pos > size_) return npos;
+  const_pointer result = std::search(data_ + pos, data_ + size_,
+                                     s.data_, s.data_ + s.size_);
+  size_type xpos = result - data_;
+  return xpos + s.size_ <= size_ ? xpos : npos;
+}
+
+StringPiece::size_type StringPiece::find(char c, size_type pos) const {
+  if (size_ <= 0 || pos >= size_) return npos;
+  const_pointer result = std::find(data_ + pos, data_ + size_, c);
+  return result != data_ + size_ ? result - data_ : npos;
+}
+
+StringPiece::size_type StringPiece::rfind(const StringPiece& s,
+                                          size_type pos) const {
+  if (size_ < s.size_) return npos;
+  if (s.size_ == 0) return std::min(size_, pos);
+  const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_;
+  const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_);
+  return result != last ? result - data_ : npos;
+}
+
+StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {
+  if (size_ <= 0) return npos;
+  for (size_t i = std::min(pos + 1, size_); i != 0;) {
+    if (data_[--i] == c) return i;
+  }
+  return npos;
+}
+
+std::ostream& operator<<(std::ostream& o, const StringPiece& p) {
+  o.write(p.data(), p.size());
+  return o;
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/stringpiece.h
+++ b/external/duckdb/third_party/re2/re2/stringpiece.h
@@ -0,0 +1,217 @@
+// Copyright 2001-2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_STRINGPIECE_H_
+#define RE2_STRINGPIECE_H_
+
+#ifdef min
+#undef min
+#endif
+
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece.  The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate for most other Google classes.
+//
+// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+// conversions from "const char*" to "string" and back again.
+//
+//
+// Arghh!  I wish C++ literals were "string".
+
+#include <stddef.h>
+#include <string.h>
+#include <algorithm>
+#include <iosfwd>
+#include <iterator>
+#include <string>
+#ifdef __cpp_lib_string_view
+#include <string_view>
+#endif
+
+namespace duckdb_re2 {
+
+class StringPiece {
+ public:
+  typedef std::char_traits<char> traits_type;
+  typedef char value_type;
+  typedef char* pointer;
+  typedef const char* const_pointer;
+  typedef char& reference;
+  typedef const char& const_reference;
+  typedef const char* const_iterator;
+  typedef const_iterator iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef const_reverse_iterator reverse_iterator;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  static const size_type npos = static_cast<size_type>(-1);
+
+  // We provide non-explicit singleton constructors so users can pass
+  // in a "const char*" or a "string" wherever a "StringPiece" is
+  // expected.
+  StringPiece()
+      : data_(NULL), size_(0) {}
+#ifdef __cpp_lib_string_view
+  StringPiece(const std::string_view& str)
+      : data_(str.data()), size_(str.size()) {}
+#endif
+  StringPiece(const std::string& str)
+      : data_(str.data()), size_(str.size()) {}
+  StringPiece(const char* str)
+      : data_(str), size_(str == NULL ? 0 : strlen(str)) {}
+  StringPiece(const char* str, size_type len)
+      : data_(str), size_(len) {}
+
+  const_iterator begin() const { return data_; }
+  const_iterator end() const { return data_ + size_; }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(data_ + size_);
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(data_);
+  }
+
+  size_type size() const { return size_; }
+  size_type length() const { return size_; }
+  bool empty() const { return size_ == 0; }
+
+  const_reference operator[](size_type i) const { return data_[i]; }
+  const_pointer data() const { return data_; }
+
+  void remove_prefix(size_type n) {
+    data_ += n;
+    size_ -= n;
+  }
+
+  void remove_suffix(size_type n) {
+    size_ -= n;
+  }
+
+  void set(const char* str) {
+    data_ = str;
+    size_ = str == NULL ? 0 : strlen(str);
+  }
+
+  void set(const char* str, size_type len) {
+    data_ = str;
+    size_ = len;
+  }
+
+#ifdef __cpp_lib_string_view
+  // Converts to `std::basic_string_view`.
+  operator std::basic_string_view<char, traits_type>() const {
+    if (!data_) return {};
+    return std::basic_string_view<char, traits_type>(data_, size_);
+  }
+#endif
+
+  // Converts to `std::basic_string`.
+  template <typename A>
+  explicit operator std::basic_string<char, traits_type, A>() const {
+    if (!data_) return {};
+    return std::basic_string<char, traits_type, A>(data_, size_);
+  }
+
+  std::string as_string() const {
+    return std::string(data_, size_);
+  }
+
+  // We also define ToString() here, since many other string-like
+  // interfaces name the routine that converts to a C++ string
+  // "ToString", and it's confusing to have the method that does that
+  // for a StringPiece be called "as_string()".  We also leave the
+  // "as_string()" method defined here for existing code.
+  std::string ToString() const {
+    return std::string(data_, size_);
+  }
+
+  void CopyToString(std::string* target) const {
+    target->assign(data_, size_);
+  }
+
+  void AppendToString(std::string* target) const {
+    target->append(data_, size_);
+  }
+
+  size_type copy(char* buf, size_type n, size_type pos = 0) const;
+  StringPiece substr(size_type pos = 0, size_type n = npos) const;
+
+  int compare(const StringPiece& x) const {
+    size_type min_size = std::min(size(), x.size());
+    if (min_size > 0) {
+      int r = memcmp(data(), x.data(), min_size);
+      if (r < 0) return -1;
+      if (r > 0) return 1;
+    }
+    if (size() < x.size()) return -1;
+    if (size() > x.size()) return 1;
+    return 0;
+  }
+
+  // Does "this" start with "x"?
+  bool starts_with(const StringPiece& x) const {
+    return x.empty() ||
+           (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0);
+  }
+
+  // Does "this" end with "x"?
+  bool ends_with(const StringPiece& x) const {
+    return x.empty() ||
+           (size() >= x.size() &&
+            memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0);
+  }
+
+  bool contains(const StringPiece& s) const {
+    return find(s) != npos;
+  }
+
+  size_type find(const StringPiece& s, size_type pos = 0) const;
+  size_type find(char c, size_type pos = 0) const;
+  size_type rfind(const StringPiece& s, size_type pos = npos) const;
+  size_type rfind(char c, size_type pos = npos) const;
+
+ private:
+  const_pointer data_;
+  size_type size_;
+};
+
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+  StringPiece::size_type len = x.size();
+  if (len != y.size()) return false;
+  return x.data() == y.data() || len == 0 ||
+         memcmp(x.data(), y.data(), len) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+  StringPiece::size_type min_size = std::min(x.size(), y.size());
+  int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
+  return (r < 0) || (r == 0 && x.size() < y.size());
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+  return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+  return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, const StringPiece& y) {
+  return !(x < y);
+}
+
+// Allow StringPiece to be logged.
+std::ostream& operator<<(std::ostream& o, const StringPiece& p);
+
+}  // namespace re2
+
+#endif  // RE2_STRINGPIECE_H_
--- a/external/duckdb/third_party/re2/re2/tostring.cc
+++ b/external/duckdb/third_party/re2/re2/tostring.cc
@@ -0,0 +1,351 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Format a regular expression structure as a string.
+// Tested by parse_test.cc
+
+#include <string.h>
+#include <string>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
+
+namespace duckdb_re2 {
+
+enum {
+  PrecAtom,
+  PrecUnary,
+  PrecConcat,
+  PrecAlternate,
+  PrecEmpty,
+  PrecParen,
+  PrecToplevel,
+};
+
+// Helper function.  See description below.
+static void AppendCCRange(std::string* t, Rune lo, Rune hi);
+
+// Walker to generate string in s_.
+// The arg pointers are actually integers giving the
+// context precedence.
+// The child_args are always NULL.
+class ToStringWalker : public Regexp::Walker<int> {
+ public:
+  explicit ToStringWalker(std::string* t) : t_(t) {}
+
+  virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
+  virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
+                        int* child_args, int nchild_args);
+  virtual int ShortVisit(Regexp* re, int parent_arg) {
+    return 0;
+  }
+
+ private:
+  std::string* t_;  // The string the walker appends to.
+
+  ToStringWalker(const ToStringWalker&) = delete;
+  ToStringWalker& operator=(const ToStringWalker&) = delete;
+};
+
+std::string Regexp::ToString() {
+  std::string t;
+  ToStringWalker w(&t);
+  w.WalkExponential(this, PrecToplevel, 100000);
+  if (w.stopped_early())
+    t += " [truncated]";
+  return t;
+}
+
+#define ToString DontCallToString  // Avoid accidental recursion.
+
+// Visits re before children are processed.
+// Appends ( if needed and passes new precedence to children.
+int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
+  int prec = parent_arg;
+  int nprec = PrecAtom;
+
+  switch (re->op()) {
+    case kRegexpNoMatch:
+    case kRegexpEmptyMatch:
+    case kRegexpLiteral:
+    case kRegexpAnyChar:
+    case kRegexpAnyByte:
+    case kRegexpBeginLine:
+    case kRegexpEndLine:
+    case kRegexpBeginText:
+    case kRegexpEndText:
+    case kRegexpWordBoundary:
+    case kRegexpNoWordBoundary:
+    case kRegexpCharClass:
+    case kRegexpHaveMatch:
+      nprec = PrecAtom;
+      break;
+
+    case kRegexpConcat:
+    case kRegexpLiteralString:
+      if (prec < PrecConcat)
+        t_->append("(?:");
+      nprec = PrecConcat;
+      break;
+
+    case kRegexpAlternate:
+      if (prec < PrecAlternate)
+        t_->append("(?:");
+      nprec = PrecAlternate;
+      break;
+
+    case kRegexpCapture:
+      t_->append("(");
+      if (re->cap() == 0)
+        LOG(DFATAL) << "kRegexpCapture cap() == 0";
+      if (re->name()) {
+        t_->append("?P<");
+        t_->append(*re->name());
+        t_->append(">");
+      }
+      nprec = PrecParen;
+      break;
+
+    case kRegexpStar:
+    case kRegexpPlus:
+    case kRegexpQuest:
+    case kRegexpRepeat:
+      if (prec < PrecUnary)
+        t_->append("(?:");
+      // The subprecedence here is PrecAtom instead of PrecUnary
+      // because PCRE treats two unary ops in a row as a parse error.
+      nprec = PrecAtom;
+      break;
+  }
+
+  return nprec;
+}
+
+static void AppendLiteral(std::string *t, Rune r, bool foldcase) {
+  if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
+    t->append(1, '\\');
+    t->append(1, static_cast<char>(r));
+  } else if (foldcase && 'a' <= r && r <= 'z') {
+    r -= 'a' - 'A';
+    t->append(1, '[');
+    t->append(1, static_cast<char>(r));
+    t->append(1, static_cast<char>(r) + 'a' - 'A');
+    t->append(1, ']');
+  } else {
+    AppendCCRange(t, r, r);
+  }
+}
+
+// Visits re after children are processed.
+// For childless regexps, all the work is done here.
+// For regexps with children, append any unary suffixes or ).
+int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
+                              int* child_args, int nchild_args) {
+  int prec = parent_arg;
+  switch (re->op()) {
+    case kRegexpNoMatch:
+      // There's no simple symbol for "no match", but
+      // [^0-Runemax] excludes everything.
+      t_->append("[^\\x00-\\x{10ffff}]");
+      break;
+
+    case kRegexpEmptyMatch:
+      // Append (?:) to make empty string visible,
+      // unless this is already being parenthesized.
+      if (prec < PrecEmpty)
+        t_->append("(?:)");
+      break;
+
+    case kRegexpLiteral:
+      AppendLiteral(t_, re->rune(),
+                    (re->parse_flags() & Regexp::FoldCase) != 0);
+      break;
+
+    case kRegexpLiteralString:
+      for (int i = 0; i < re->nrunes(); i++)
+        AppendLiteral(t_, re->runes()[i],
+                      (re->parse_flags() & Regexp::FoldCase) != 0);
+      if (prec < PrecConcat)
+        t_->append(")");
+      break;
+
+    case kRegexpConcat:
+      if (prec < PrecConcat)
+        t_->append(")");
+      break;
+
+    case kRegexpAlternate:
+      // Clumsy but workable: the children all appended |
+      // at the end of their strings, so just remove the last one.
+      if ((*t_)[t_->size()-1] == '|')
+        t_->erase(t_->size()-1);
+      else
+        LOG(DFATAL) << "Bad final char: " << t_;
+      if (prec < PrecAlternate)
+        t_->append(")");
+      break;
+
+    case kRegexpStar:
+      t_->append("*");
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpPlus:
+      t_->append("+");
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpQuest:
+      t_->append("?");
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpRepeat:
+      if (re->max() == -1)
+        t_->append(StringPrintf("{%d,}", re->min()));
+      else if (re->min() == re->max())
+        t_->append(StringPrintf("{%d}", re->min()));
+      else
+        t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpAnyChar:
+      t_->append(".");
+      break;
+
+    case kRegexpAnyByte:
+      t_->append("\\C");
+      break;
+
+    case kRegexpBeginLine:
+      t_->append("^");
+      break;
+
+    case kRegexpEndLine:
+      t_->append("$");
+      break;
+
+    case kRegexpBeginText:
+      t_->append("(?-m:^)");
+      break;
+
+    case kRegexpEndText:
+      if (re->parse_flags() & Regexp::WasDollar)
+        t_->append("(?-m:$)");
+      else
+        t_->append("\\z");
+      break;
+
+    case kRegexpWordBoundary:
+      t_->append("\\b");
+      break;
+
+    case kRegexpNoWordBoundary:
+      t_->append("\\B");
+      break;
+
+    case kRegexpCharClass: {
+      if (re->cc()->size() == 0) {
+        t_->append("[^\\x00-\\x{10ffff}]");
+        break;
+      }
+      t_->append("[");
+      // Heuristic: show class as negated if it contains the
+      // non-character 0xFFFE and yet somehow isn't full.
+      CharClass* cc = re->cc();
+      if (cc->Contains(0xFFFE) && !cc->full()) {
+        cc = cc->Negate();
+        t_->append("^");
+      }
+      for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
+        AppendCCRange(t_, i->lo, i->hi);
+      if (cc != re->cc())
+        cc->Delete();
+      t_->append("]");
+      break;
+    }
+
+    case kRegexpCapture:
+      t_->append(")");
+      break;
+
+    case kRegexpHaveMatch:
+      // There's no syntax accepted by the parser to generate
+      // this node (it is generated by RE2::Set) so make something
+      // up that is readable but won't compile.
+      t_->append(StringPrintf("(?HaveMatch:%d)", re->match_id()));
+      break;
+  }
+
+  // If the parent is an alternation, append the | for it.
+  if (prec == PrecAlternate)
+    t_->append("|");
+
+  return 0;
+}
+
+// Appends a rune for use in a character class to the string t.
+static void AppendCCChar(std::string* t, Rune r) {
+  if (0x20 <= r && r <= 0x7E) {
+    if (strchr("[]^-\\", r))
+      t->append("\\");
+    t->append(1, static_cast<char>(r));
+    return;
+  }
+  switch (r) {
+    default:
+      break;
+
+    case '\r':
+      t->append("\\r");
+      return;
+
+    case '\t':
+      t->append("\\t");
+      return;
+
+    case '\n':
+      t->append("\\n");
+      return;
+
+    case '\f':
+      t->append("\\f");
+      return;
+  }
+
+  if (r < 0x100) {
+    *t += StringPrintf("\\x%02x", static_cast<int>(r));
+    return;
+  }
+  *t += StringPrintf("\\x{%x}", static_cast<int>(r));
+}
+
+static void AppendCCRange(std::string* t, Rune lo, Rune hi) {
+  if (lo > hi)
+    return;
+  AppendCCChar(t, lo);
+  if (lo < hi) {
+    t->append("-");
+    AppendCCChar(t, hi);
+  }
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/re2/unicode_casefold.cc
+++ b/external/duckdb/third_party/re2/re2/unicode_casefold.cc
@@ -0,0 +1,596 @@
+
+// GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
+// make_unicode_casefold.py >unicode_casefold.cc
+
+#include "re2/unicode_casefold.h"
+
+namespace duckdb_re2 {
+
+
+// 1424 groups, 2878 pairs, 367 ranges
+const CaseFold unicode_casefold[] = {
+	{ 65, 90, 32 },
+	{ 97, 106, -32 },
+	{ 107, 107, 8383 },
+	{ 108, 114, -32 },
+	{ 115, 115, 268 },
+	{ 116, 122, -32 },
+	{ 181, 181, 743 },
+	{ 192, 214, 32 },
+	{ 216, 222, 32 },
+	{ 223, 223, 7615 },
+	{ 224, 228, -32 },
+	{ 229, 229, 8262 },
+	{ 230, 246, -32 },
+	{ 248, 254, -32 },
+	{ 255, 255, 121 },
+	{ 256, 303, EvenOdd },
+	{ 306, 311, EvenOdd },
+	{ 313, 328, OddEven },
+	{ 330, 375, EvenOdd },
+	{ 376, 376, -121 },
+	{ 377, 382, OddEven },
+	{ 383, 383, -300 },
+	{ 384, 384, 195 },
+	{ 385, 385, 210 },
+	{ 386, 389, EvenOdd },
+	{ 390, 390, 206 },
+	{ 391, 392, OddEven },
+	{ 393, 394, 205 },
+	{ 395, 396, OddEven },
+	{ 398, 398, 79 },
+	{ 399, 399, 202 },
+	{ 400, 400, 203 },
+	{ 401, 402, OddEven },
+	{ 403, 403, 205 },
+	{ 404, 404, 207 },
+	{ 405, 405, 97 },
+	{ 406, 406, 211 },
+	{ 407, 407, 209 },
+	{ 408, 409, EvenOdd },
+	{ 410, 410, 163 },
+	{ 412, 412, 211 },
+	{ 413, 413, 213 },
+	{ 414, 414, 130 },
+	{ 415, 415, 214 },
+	{ 416, 421, EvenOdd },
+	{ 422, 422, 218 },
+	{ 423, 424, OddEven },
+	{ 425, 425, 218 },
+	{ 428, 429, EvenOdd },
+	{ 430, 430, 218 },
+	{ 431, 432, OddEven },
+	{ 433, 434, 217 },
+	{ 435, 438, OddEven },
+	{ 439, 439, 219 },
+	{ 440, 441, EvenOdd },
+	{ 444, 445, EvenOdd },
+	{ 447, 447, 56 },
+	{ 452, 452, EvenOdd },
+	{ 453, 453, OddEven },
+	{ 454, 454, -2 },
+	{ 455, 455, OddEven },
+	{ 456, 456, EvenOdd },
+	{ 457, 457, -2 },
+	{ 458, 458, EvenOdd },
+	{ 459, 459, OddEven },
+	{ 460, 460, -2 },
+	{ 461, 476, OddEven },
+	{ 477, 477, -79 },
+	{ 478, 495, EvenOdd },
+	{ 497, 497, OddEven },
+	{ 498, 498, EvenOdd },
+	{ 499, 499, -2 },
+	{ 500, 501, EvenOdd },
+	{ 502, 502, -97 },
+	{ 503, 503, -56 },
+	{ 504, 543, EvenOdd },
+	{ 544, 544, -130 },
+	{ 546, 563, EvenOdd },
+	{ 570, 570, 10795 },
+	{ 571, 572, OddEven },
+	{ 573, 573, -163 },
+	{ 574, 574, 10792 },
+	{ 575, 576, 10815 },
+	{ 577, 578, OddEven },
+	{ 579, 579, -195 },
+	{ 580, 580, 69 },
+	{ 581, 581, 71 },
+	{ 582, 591, EvenOdd },
+	{ 592, 592, 10783 },
+	{ 593, 593, 10780 },
+	{ 594, 594, 10782 },
+	{ 595, 595, -210 },
+	{ 596, 596, -206 },
+	{ 598, 599, -205 },
+	{ 601, 601, -202 },
+	{ 603, 603, -203 },
+	{ 604, 604, 42319 },
+	{ 608, 608, -205 },
+	{ 609, 609, 42315 },
+	{ 611, 611, -207 },
+	{ 613, 613, 42280 },
+	{ 614, 614, 42308 },
+	{ 616, 616, -209 },
+	{ 617, 617, -211 },
+	{ 618, 618, 42308 },
+	{ 619, 619, 10743 },
+	{ 620, 620, 42305 },
+	{ 623, 623, -211 },
+	{ 625, 625, 10749 },
+	{ 626, 626, -213 },
+	{ 629, 629, -214 },
+	{ 637, 637, 10727 },
+	{ 640, 640, -218 },
+	{ 642, 642, 42307 },
+	{ 643, 643, -218 },
+	{ 647, 647, 42282 },
+	{ 648, 648, -218 },
+	{ 649, 649, -69 },
+	{ 650, 651, -217 },
+	{ 652, 652, -71 },
+	{ 658, 658, -219 },
+	{ 669, 669, 42261 },
+	{ 670, 670, 42258 },
+	{ 837, 837, 84 },
+	{ 880, 883, EvenOdd },
+	{ 886, 887, EvenOdd },
+	{ 891, 893, 130 },
+	{ 895, 895, 116 },
+	{ 902, 902, 38 },
+	{ 904, 906, 37 },
+	{ 908, 908, 64 },
+	{ 910, 911, 63 },
+	{ 913, 929, 32 },
+	{ 931, 931, 31 },
+	{ 932, 939, 32 },
+	{ 940, 940, -38 },
+	{ 941, 943, -37 },
+	{ 945, 945, -32 },
+	{ 946, 946, 30 },
+	{ 947, 948, -32 },
+	{ 949, 949, 64 },
+	{ 950, 951, -32 },
+	{ 952, 952, 25 },
+	{ 953, 953, 7173 },
+	{ 954, 954, 54 },
+	{ 955, 955, -32 },
+	{ 956, 956, -775 },
+	{ 957, 959, -32 },
+	{ 960, 960, 22 },
+	{ 961, 961, 48 },
+	{ 962, 962, EvenOdd },
+	{ 963, 965, -32 },
+	{ 966, 966, 15 },
+	{ 967, 968, -32 },
+	{ 969, 969, 7517 },
+	{ 970, 971, -32 },
+	{ 972, 972, -64 },
+	{ 973, 974, -63 },
+	{ 975, 975, 8 },
+	{ 976, 976, -62 },
+	{ 977, 977, 35 },
+	{ 981, 981, -47 },
+	{ 982, 982, -54 },
+	{ 983, 983, -8 },
+	{ 984, 1007, EvenOdd },
+	{ 1008, 1008, -86 },
+	{ 1009, 1009, -80 },
+	{ 1010, 1010, 7 },
+	{ 1011, 1011, -116 },
+	{ 1012, 1012, -92 },
+	{ 1013, 1013, -96 },
+	{ 1015, 1016, OddEven },
+	{ 1017, 1017, -7 },
+	{ 1018, 1019, EvenOdd },
+	{ 1021, 1023, -130 },
+	{ 1024, 1039, 80 },
+	{ 1040, 1071, 32 },
+	{ 1072, 1073, -32 },
+	{ 1074, 1074, 6222 },
+	{ 1075, 1075, -32 },
+	{ 1076, 1076, 6221 },
+	{ 1077, 1085, -32 },
+	{ 1086, 1086, 6212 },
+	{ 1087, 1088, -32 },
+	{ 1089, 1090, 6210 },
+	{ 1091, 1097, -32 },
+	{ 1098, 1098, 6204 },
+	{ 1099, 1103, -32 },
+	{ 1104, 1119, -80 },
+	{ 1120, 1122, EvenOdd },
+	{ 1123, 1123, 6180 },
+	{ 1124, 1153, EvenOdd },
+	{ 1162, 1215, EvenOdd },
+	{ 1216, 1216, 15 },
+	{ 1217, 1230, OddEven },
+	{ 1231, 1231, -15 },
+	{ 1232, 1327, EvenOdd },
+	{ 1329, 1366, 48 },
+	{ 1377, 1414, -48 },
+	{ 4256, 4293, 7264 },
+	{ 4295, 4295, 7264 },
+	{ 4301, 4301, 7264 },
+	{ 4304, 4346, 3008 },
+	{ 4349, 4351, 3008 },
+	{ 5024, 5103, 38864 },
+	{ 5104, 5109, 8 },
+	{ 5112, 5117, -8 },
+	{ 7296, 7296, -6254 },
+	{ 7297, 7297, -6253 },
+	{ 7298, 7298, -6244 },
+	{ 7299, 7299, -6242 },
+	{ 7300, 7300, EvenOdd },
+	{ 7301, 7301, -6243 },
+	{ 7302, 7302, -6236 },
+	{ 7303, 7303, -6181 },
+	{ 7304, 7304, 35266 },
+	{ 7312, 7354, -3008 },
+	{ 7357, 7359, -3008 },
+	{ 7545, 7545, 35332 },
+	{ 7549, 7549, 3814 },
+	{ 7566, 7566, 35384 },
+	{ 7680, 7776, EvenOdd },
+	{ 7777, 7777, 58 },
+	{ 7778, 7829, EvenOdd },
+	{ 7835, 7835, -59 },
+	{ 7838, 7838, -7615 },
+	{ 7840, 7935, EvenOdd },
+	{ 7936, 7943, 8 },
+	{ 7944, 7951, -8 },
+	{ 7952, 7957, 8 },
+	{ 7960, 7965, -8 },
+	{ 7968, 7975, 8 },
+	{ 7976, 7983, -8 },
+	{ 7984, 7991, 8 },
+	{ 7992, 7999, -8 },
+	{ 8000, 8005, 8 },
+	{ 8008, 8013, -8 },
+	{ 8017, 8017, 8 },
+	{ 8019, 8019, 8 },
+	{ 8021, 8021, 8 },
+	{ 8023, 8023, 8 },
+	{ 8025, 8025, -8 },
+	{ 8027, 8027, -8 },
+	{ 8029, 8029, -8 },
+	{ 8031, 8031, -8 },
+	{ 8032, 8039, 8 },
+	{ 8040, 8047, -8 },
+	{ 8048, 8049, 74 },
+	{ 8050, 8053, 86 },
+	{ 8054, 8055, 100 },
+	{ 8056, 8057, 128 },
+	{ 8058, 8059, 112 },
+	{ 8060, 8061, 126 },
+	{ 8064, 8071, 8 },
+	{ 8072, 8079, -8 },
+	{ 8080, 8087, 8 },
+	{ 8088, 8095, -8 },
+	{ 8096, 8103, 8 },
+	{ 8104, 8111, -8 },
+	{ 8112, 8113, 8 },
+	{ 8115, 8115, 9 },
+	{ 8120, 8121, -8 },
+	{ 8122, 8123, -74 },
+	{ 8124, 8124, -9 },
+	{ 8126, 8126, -7289 },
+	{ 8131, 8131, 9 },
+	{ 8136, 8139, -86 },
+	{ 8140, 8140, -9 },
+	{ 8144, 8145, 8 },
+	{ 8152, 8153, -8 },
+	{ 8154, 8155, -100 },
+	{ 8160, 8161, 8 },
+	{ 8165, 8165, 7 },
+	{ 8168, 8169, -8 },
+	{ 8170, 8171, -112 },
+	{ 8172, 8172, -7 },
+	{ 8179, 8179, 9 },
+	{ 8184, 8185, -128 },
+	{ 8186, 8187, -126 },
+	{ 8188, 8188, -9 },
+	{ 8486, 8486, -7549 },
+	{ 8490, 8490, -8415 },
+	{ 8491, 8491, -8294 },
+	{ 8498, 8498, 28 },
+	{ 8526, 8526, -28 },
+	{ 8544, 8559, 16 },
+	{ 8560, 8575, -16 },
+	{ 8579, 8580, OddEven },
+	{ 9398, 9423, 26 },
+	{ 9424, 9449, -26 },
+	{ 11264, 11311, 48 },
+	{ 11312, 11359, -48 },
+	{ 11360, 11361, EvenOdd },
+	{ 11362, 11362, -10743 },
+	{ 11363, 11363, -3814 },
+	{ 11364, 11364, -10727 },
+	{ 11365, 11365, -10795 },
+	{ 11366, 11366, -10792 },
+	{ 11367, 11372, OddEven },
+	{ 11373, 11373, -10780 },
+	{ 11374, 11374, -10749 },
+	{ 11375, 11375, -10783 },
+	{ 11376, 11376, -10782 },
+	{ 11378, 11379, EvenOdd },
+	{ 11381, 11382, OddEven },
+	{ 11390, 11391, -10815 },
+	{ 11392, 11491, EvenOdd },
+	{ 11499, 11502, OddEven },
+	{ 11506, 11507, EvenOdd },
+	{ 11520, 11557, -7264 },
+	{ 11559, 11559, -7264 },
+	{ 11565, 11565, -7264 },
+	{ 42560, 42570, EvenOdd },
+	{ 42571, 42571, -35267 },
+	{ 42572, 42605, EvenOdd },
+	{ 42624, 42651, EvenOdd },
+	{ 42786, 42799, EvenOdd },
+	{ 42802, 42863, EvenOdd },
+	{ 42873, 42876, OddEven },
+	{ 42877, 42877, -35332 },
+	{ 42878, 42887, EvenOdd },
+	{ 42891, 42892, OddEven },
+	{ 42893, 42893, -42280 },
+	{ 42896, 42899, EvenOdd },
+	{ 42900, 42900, 48 },
+	{ 42902, 42921, EvenOdd },
+	{ 42922, 42922, -42308 },
+	{ 42923, 42923, -42319 },
+	{ 42924, 42924, -42315 },
+	{ 42925, 42925, -42305 },
+	{ 42926, 42926, -42308 },
+	{ 42928, 42928, -42258 },
+	{ 42929, 42929, -42282 },
+	{ 42930, 42930, -42261 },
+	{ 42931, 42931, 928 },
+	{ 42932, 42947, EvenOdd },
+	{ 42948, 42948, -48 },
+	{ 42949, 42949, -42307 },
+	{ 42950, 42950, -35384 },
+	{ 42951, 42954, OddEven },
+	{ 42960, 42961, EvenOdd },
+	{ 42966, 42969, EvenOdd },
+	{ 42997, 42998, OddEven },
+	{ 43859, 43859, -928 },
+	{ 43888, 43967, -38864 },
+	{ 65313, 65338, 32 },
+	{ 65345, 65370, -32 },
+	{ 66560, 66599, 40 },
+	{ 66600, 66639, -40 },
+	{ 66736, 66771, 40 },
+	{ 66776, 66811, -40 },
+	{ 66928, 66938, 39 },
+	{ 66940, 66954, 39 },
+	{ 66956, 66962, 39 },
+	{ 66964, 66965, 39 },
+	{ 66967, 66977, -39 },
+	{ 66979, 66993, -39 },
+	{ 66995, 67001, -39 },
+	{ 67003, 67004, -39 },
+	{ 68736, 68786, 64 },
+	{ 68800, 68850, -64 },
+	{ 71840, 71871, 32 },
+	{ 71872, 71903, -32 },
+	{ 93760, 93791, 32 },
+	{ 93792, 93823, -32 },
+	{ 125184, 125217, 34 },
+	{ 125218, 125251, -34 },
+};
+const int num_unicode_casefold = 367;
+
+// 1424 groups, 1454 pairs, 205 ranges
+const CaseFold unicode_tolower[] = {
+	{ 65, 90, 32 },
+	{ 181, 181, 775 },
+	{ 192, 214, 32 },
+	{ 216, 222, 32 },
+	{ 256, 302, EvenOddSkip },
+	{ 306, 310, EvenOddSkip },
+	{ 313, 327, OddEvenSkip },
+	{ 330, 374, EvenOddSkip },
+	{ 376, 376, -121 },
+	{ 377, 381, OddEvenSkip },
+	{ 383, 383, -268 },
+	{ 385, 385, 210 },
+	{ 386, 388, EvenOddSkip },
+	{ 390, 390, 206 },
+	{ 391, 391, OddEven },
+	{ 393, 394, 205 },
+	{ 395, 395, OddEven },
+	{ 398, 398, 79 },
+	{ 399, 399, 202 },
+	{ 400, 400, 203 },
+	{ 401, 401, OddEven },
+	{ 403, 403, 205 },
+	{ 404, 404, 207 },
+	{ 406, 406, 211 },
+	{ 407, 407, 209 },
+	{ 408, 408, EvenOdd },
+	{ 412, 412, 211 },
+	{ 413, 413, 213 },
+	{ 415, 415, 214 },
+	{ 416, 420, EvenOddSkip },
+	{ 422, 422, 218 },
+	{ 423, 423, OddEven },
+	{ 425, 425, 218 },
+	{ 428, 428, EvenOdd },
+	{ 430, 430, 218 },
+	{ 431, 431, OddEven },
+	{ 433, 434, 217 },
+	{ 435, 437, OddEvenSkip },
+	{ 439, 439, 219 },
+	{ 440, 440, EvenOdd },
+	{ 444, 444, EvenOdd },
+	{ 452, 452, 2 },
+	{ 453, 453, OddEven },
+	{ 455, 455, 2 },
+	{ 456, 456, EvenOdd },
+	{ 458, 458, 2 },
+	{ 459, 475, OddEvenSkip },
+	{ 478, 494, EvenOddSkip },
+	{ 497, 497, 2 },
+	{ 498, 500, EvenOddSkip },
+	{ 502, 502, -97 },
+	{ 503, 503, -56 },
+	{ 504, 542, EvenOddSkip },
+	{ 544, 544, -130 },
+	{ 546, 562, EvenOddSkip },
+	{ 570, 570, 10795 },
+	{ 571, 571, OddEven },
+	{ 573, 573, -163 },
+	{ 574, 574, 10792 },
+	{ 577, 577, OddEven },
+	{ 579, 579, -195 },
+	{ 580, 580, 69 },
+	{ 581, 581, 71 },
+	{ 582, 590, EvenOddSkip },
+	{ 837, 837, 116 },
+	{ 880, 882, EvenOddSkip },
+	{ 886, 886, EvenOdd },
+	{ 895, 895, 116 },
+	{ 902, 902, 38 },
+	{ 904, 906, 37 },
+	{ 908, 908, 64 },
+	{ 910, 911, 63 },
+	{ 913, 929, 32 },
+	{ 931, 939, 32 },
+	{ 962, 962, EvenOdd },
+	{ 975, 975, 8 },
+	{ 976, 976, -30 },
+	{ 977, 977, -25 },
+	{ 981, 981, -15 },
+	{ 982, 982, -22 },
+	{ 984, 1006, EvenOddSkip },
+	{ 1008, 1008, -54 },
+	{ 1009, 1009, -48 },
+	{ 1012, 1012, -60 },
+	{ 1013, 1013, -64 },
+	{ 1015, 1015, OddEven },
+	{ 1017, 1017, -7 },
+	{ 1018, 1018, EvenOdd },
+	{ 1021, 1023, -130 },
+	{ 1024, 1039, 80 },
+	{ 1040, 1071, 32 },
+	{ 1120, 1152, EvenOddSkip },
+	{ 1162, 1214, EvenOddSkip },
+	{ 1216, 1216, 15 },
+	{ 1217, 1229, OddEvenSkip },
+	{ 1232, 1326, EvenOddSkip },
+	{ 1329, 1366, 48 },
+	{ 4256, 4293, 7264 },
+	{ 4295, 4295, 7264 },
+	{ 4301, 4301, 7264 },
+	{ 5112, 5117, -8 },
+	{ 7296, 7296, -6222 },
+	{ 7297, 7297, -6221 },
+	{ 7298, 7298, -6212 },
+	{ 7299, 7300, -6210 },
+	{ 7301, 7301, -6211 },
+	{ 7302, 7302, -6204 },
+	{ 7303, 7303, -6180 },
+	{ 7304, 7304, 35267 },
+	{ 7312, 7354, -3008 },
+	{ 7357, 7359, -3008 },
+	{ 7680, 7828, EvenOddSkip },
+	{ 7835, 7835, -58 },
+	{ 7838, 7838, -7615 },
+	{ 7840, 7934, EvenOddSkip },
+	{ 7944, 7951, -8 },
+	{ 7960, 7965, -8 },
+	{ 7976, 7983, -8 },
+	{ 7992, 7999, -8 },
+	{ 8008, 8013, -8 },
+	{ 8025, 8025, -8 },
+	{ 8027, 8027, -8 },
+	{ 8029, 8029, -8 },
+	{ 8031, 8031, -8 },
+	{ 8040, 8047, -8 },
+	{ 8072, 8079, -8 },
+	{ 8088, 8095, -8 },
+	{ 8104, 8111, -8 },
+	{ 8120, 8121, -8 },
+	{ 8122, 8123, -74 },
+	{ 8124, 8124, -9 },
+	{ 8126, 8126, -7173 },
+	{ 8136, 8139, -86 },
+	{ 8140, 8140, -9 },
+	{ 8152, 8153, -8 },
+	{ 8154, 8155, -100 },
+	{ 8168, 8169, -8 },
+	{ 8170, 8171, -112 },
+	{ 8172, 8172, -7 },
+	{ 8184, 8185, -128 },
+	{ 8186, 8187, -126 },
+	{ 8188, 8188, -9 },
+	{ 8486, 8486, -7517 },
+	{ 8490, 8490, -8383 },
+	{ 8491, 8491, -8262 },
+	{ 8498, 8498, 28 },
+	{ 8544, 8559, 16 },
+	{ 8579, 8579, OddEven },
+	{ 9398, 9423, 26 },
+	{ 11264, 11311, 48 },
+	{ 11360, 11360, EvenOdd },
+	{ 11362, 11362, -10743 },
+	{ 11363, 11363, -3814 },
+	{ 11364, 11364, -10727 },
+	{ 11367, 11371, OddEvenSkip },
+	{ 11373, 11373, -10780 },
+	{ 11374, 11374, -10749 },
+	{ 11375, 11375, -10783 },
+	{ 11376, 11376, -10782 },
+	{ 11378, 11378, EvenOdd },
+	{ 11381, 11381, OddEven },
+	{ 11390, 11391, -10815 },
+	{ 11392, 11490, EvenOddSkip },
+	{ 11499, 11501, OddEvenSkip },
+	{ 11506, 11506, EvenOdd },
+	{ 42560, 42604, EvenOddSkip },
+	{ 42624, 42650, EvenOddSkip },
+	{ 42786, 42798, EvenOddSkip },
+	{ 42802, 42862, EvenOddSkip },
+	{ 42873, 42875, OddEvenSkip },
+	{ 42877, 42877, -35332 },
+	{ 42878, 42886, EvenOddSkip },
+	{ 42891, 42891, OddEven },
+	{ 42893, 42893, -42280 },
+	{ 42896, 42898, EvenOddSkip },
+	{ 42902, 42920, EvenOddSkip },
+	{ 42922, 42922, -42308 },
+	{ 42923, 42923, -42319 },
+	{ 42924, 42924, -42315 },
+	{ 42925, 42925, -42305 },
+	{ 42926, 42926, -42308 },
+	{ 42928, 42928, -42258 },
+	{ 42929, 42929, -42282 },
+	{ 42930, 42930, -42261 },
+	{ 42931, 42931, 928 },
+	{ 42932, 42946, EvenOddSkip },
+	{ 42948, 42948, -48 },
+	{ 42949, 42949, -42307 },
+	{ 42950, 42950, -35384 },
+	{ 42951, 42953, OddEvenSkip },
+	{ 42960, 42960, EvenOdd },
+	{ 42966, 42968, EvenOddSkip },
+	{ 42997, 42997, OddEven },
+	{ 43888, 43967, -38864 },
+	{ 65313, 65338, 32 },
+	{ 66560, 66599, 40 },
+	{ 66736, 66771, 40 },
+	{ 66928, 66938, 39 },
+	{ 66940, 66954, 39 },
+	{ 66956, 66962, 39 },
+	{ 66964, 66965, 39 },
+	{ 68736, 68786, 64 },
+	{ 71840, 71871, 32 },
+	{ 93760, 93791, 32 },
+	{ 125184, 125217, 34 },
+};
+const int num_unicode_tolower = 205;
+
+
+
+} // namespace re2
+
+
--- a/external/duckdb/third_party/re2/re2/unicode_casefold.h
+++ b/external/duckdb/third_party/re2/re2/unicode_casefold.h
@@ -0,0 +1,78 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_UNICODE_CASEFOLD_H_
+#define RE2_UNICODE_CASEFOLD_H_
+
+// Unicode case folding tables.
+
+// The Unicode case folding tables encode the mapping from one Unicode point
+// to the next largest Unicode point with equivalent folding.  The largest
+// point wraps back to the first.  For example, the tables map:
+//
+//     'A' -> 'a'
+//     'a' -> 'A'
+//
+//     'K' -> 'k'
+//     'k' -> 'K'  (Kelvin symbol)
+//     'K' -> 'K'
+//
+// Like everything Unicode, these tables are big.  If we represent the table
+// as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB.
+// Most table entries look like the ones around them:
+// 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc.
+// Instead of listing all the pairs explicitly, we make a list of ranges
+// and deltas, so that the table entries for 'A' through 'Z' can be represented
+// as a single entry { 'A', 'Z', +32 }.
+//
+// In addition to blocks that map to each other (A-Z mapping to a-z)
+// there are blocks of pairs that individually map to each other
+// (for example, 0100<->0101, 0102<->0103, 0104<->0105, ...).
+// For those, the special delta value EvenOdd marks even/odd pairs
+// (if even, add 1; if odd, subtract 1), and OddEven marks odd/even pairs.
+//
+// In this form, the table has 274 entries, about 3kB.  If we were to split
+// the table into one for 16-bit codes and an overflow table for larger ones,
+// we could get it down to about 1.5kB, but that's not worth the complexity.
+//
+// The grouped form also allows for efficient fold range calculations
+// rather than looping one character at a time.
+
+#include <stdint.h>
+
+#include "util/util.h"
+#include "util/utf.h"
+
+namespace duckdb_re2 {
+
+enum {
+  EvenOdd = 1,
+  OddEven = -1,
+  EvenOddSkip = 1<<30,
+  OddEvenSkip,
+};
+
+struct CaseFold {
+  Rune lo;
+  Rune hi;
+  int32_t delta;
+};
+
+extern const CaseFold unicode_casefold[];
+extern const int num_unicode_casefold;
+
+extern const CaseFold unicode_tolower[];
+extern const int num_unicode_tolower;
+
+// Returns the CaseFold* in the tables that contains rune.
+// If rune is not in the tables, returns the first CaseFold* after rune.
+// If rune is larger than any value in the tables, returns NULL.
+extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune);
+
+// Returns the result of applying the fold f to the rune r.
+extern Rune ApplyFold(const CaseFold *f, Rune r);
+
+}  // namespace re2
+
+#endif  // RE2_UNICODE_CASEFOLD_H_
--- a/external/duckdb/third_party/re2/re2/unicode_groups.cc
+++ b/external/duckdb/third_party/re2/re2/unicode_groups.cc
--- a/external/duckdb/third_party/re2/re2/unicode_groups.h
+++ b/external/duckdb/third_party/re2/re2/unicode_groups.h
@@ -0,0 +1,67 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_UNICODE_GROUPS_H_
+#define RE2_UNICODE_GROUPS_H_
+
+// Unicode character groups.
+
+// The codes get split into ranges of 16-bit codes
+// and ranges of 32-bit codes.  It would be simpler
+// to use only 32-bit ranges, but these tables are large
+// enough to warrant extra care.
+//
+// Using just 32-bit ranges gives 27 kB of data.
+// Adding 16-bit ranges gives 18 kB of data.
+// Adding an extra table of 16-bit singletons would reduce
+// to 16.5 kB of data but make the data harder to use;
+// we don't bother.
+
+#include <stdint.h>
+
+#include "util/util.h"
+#include "util/utf.h"
+
+namespace duckdb_re2 {
+
+struct URange16
+{
+  uint16_t lo;
+  uint16_t hi;
+};
+
+struct URange32
+{
+  Rune lo;
+  Rune hi;
+};
+
+struct UGroup
+{
+  const char *name;
+  int sign;  // +1 for [abc], -1 for [^abc]
+  const URange16 *r16;
+  int nr16;
+  const URange32 *r32;
+  int nr32;
+};
+
+// Named by property or script name (e.g., "Nd", "N", "Han").
+// Negated groups are not included.
+extern const UGroup unicode_groups[];
+extern const int num_unicode_groups;
+
+// Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
+// Negated groups are included.
+extern const UGroup posix_groups[];
+extern const int num_posix_groups;
+
+// Named by Perl name (e.g., "\\d", "\\D").
+// Negated groups are included.
+extern const UGroup perl_groups[];
+extern const int num_perl_groups;
+
+}  // namespace re2
+
+#endif  // RE2_UNICODE_GROUPS_H_
--- a/external/duckdb/third_party/re2/re2/walker-inl.h
+++ b/external/duckdb/third_party/re2/re2/walker-inl.h
@@ -0,0 +1,247 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_WALKER_INL_H_
+#define RE2_WALKER_INL_H_
+
+// Helper class for traversing Regexps without recursion.
+// Clients should declare their own subclasses that override
+// the PreVisit and PostVisit methods, which are called before
+// and after visiting the subexpressions.
+
+// Not quite the Visitor pattern, because (among other things)
+// the Visitor pattern is recursive.
+
+#include <stack>
+
+#include "util/logging.h"
+#include "re2/regexp.h"
+
+namespace duckdb_re2 {
+
+template<typename T> struct WalkState;
+
+template<typename T> class Regexp::Walker {
+ public:
+  Walker();
+  virtual ~Walker();
+
+  // Virtual method called before visiting re's children.
+  // PreVisit passes ownership of its return value to its caller.
+  // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
+  // and passed to the child PreVisits and PostVisits as parent_arg.
+  // At the top-most Regexp, parent_arg is arg passed to walk.
+  // If PreVisit sets *stop to true, the walk does not recurse
+  // into the children.  Instead it behaves as though the return
+  // value from PreVisit is the return value from PostVisit.
+  // The default PreVisit returns parent_arg.
+  virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
+
+  // Virtual method called after visiting re's children.
+  // The pre_arg is the T that PreVisit returned.
+  // The child_args is a vector of the T that the child PostVisits returned.
+  // PostVisit takes ownership of pre_arg.
+  // PostVisit takes ownership of the Ts
+  // in *child_args, but not the vector itself.
+  // PostVisit passes ownership of its return value
+  // to its caller.
+  // The default PostVisit simply returns pre_arg.
+  virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
+                      T* child_args, int nchild_args);
+
+  // Virtual method called to copy a T,
+  // when Walk notices that more than one child is the same re.
+  virtual T Copy(T arg);
+
+  // Virtual method called to do a "quick visit" of the re,
+  // but not its children.  Only called once the visit budget
+  // has been used up and we're trying to abort the walk
+  // as quickly as possible.  Should return a value that
+  // makes sense for the parent PostVisits still to be run.
+  // This function is (hopefully) only called by
+  // WalkExponential, but must be implemented by all clients,
+  // just in case.
+  virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
+
+  // Walks over a regular expression.
+  // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
+  // Returns the T returned by PostVisit on re.
+  T Walk(Regexp* re, T top_arg);
+
+  // Like Walk, but doesn't use Copy.  This can lead to
+  // exponential runtimes on cross-linked Regexps like the
+  // ones generated by Simplify.  To help limit this,
+  // at most max_visits nodes will be visited and then
+  // the walk will be cut off early.
+  // If the walk *is* cut off early, ShortVisit(re)
+  // will be called on regexps that cannot be fully
+  // visited rather than calling PreVisit/PostVisit.
+  T WalkExponential(Regexp* re, T top_arg, int max_visits);
+
+  // Clears the stack.  Should never be necessary, since
+  // Walk always enters and exits with an empty stack.
+  // Logs DFATAL if stack is not already clear.
+  void Reset();
+
+  // Returns whether walk was cut off.
+  bool stopped_early() { return stopped_early_; }
+
+ private:
+  // Walk state for the entire traversal.
+  std::stack<WalkState<T>> stack_;
+  bool stopped_early_;
+  int max_visits_;
+
+  T WalkInternal(Regexp* re, T top_arg, bool use_copy);
+
+  Walker(const Walker&) = delete;
+  Walker& operator=(const Walker&) = delete;
+};
+
+template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
+                                                   T parent_arg,
+                                                   bool* stop) {
+  return parent_arg;
+}
+
+template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
+                                                    T parent_arg,
+                                                    T pre_arg,
+                                                    T* child_args,
+                                                    int nchild_args) {
+  return pre_arg;
+}
+
+template<typename T> T Regexp::Walker<T>::Copy(T arg) {
+  return arg;
+}
+
+// State about a single level in the traversal.
+template<typename T> struct WalkState {
+  WalkState(Regexp* re, T parent)
+    : re(re),
+      n(-1),
+      parent_arg(parent),
+      child_args(NULL) { }
+
+  Regexp* re;  // The regexp
+  int n;  // The index of the next child to process; -1 means need to PreVisit
+  T parent_arg;  // Accumulated arguments.
+  T pre_arg;
+  T child_arg;  // One-element buffer for child_args.
+  T* child_args;
+};
+
+template<typename T> Regexp::Walker<T>::Walker() {
+  stopped_early_ = false;
+}
+
+template<typename T> Regexp::Walker<T>::~Walker() {
+  Reset();
+}
+
+// Clears the stack.  Should never be necessary, since
+// Walk always enters and exits with an empty stack.
+// Logs DFATAL if stack is not already clear.
+template<typename T> void Regexp::Walker<T>::Reset() {
+  if (!stack_.empty()) {
+    LOG(DFATAL) << "Stack not empty.";
+    while (!stack_.empty()) {
+      if (stack_.top().re->nsub_ > 1)
+        delete[] stack_.top().child_args;
+      stack_.pop();
+    }
+  }
+}
+
+template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
+                                                       bool use_copy) {
+  Reset();
+
+  if (re == NULL) {
+    LOG(DFATAL) << "Walk NULL";
+    return top_arg;
+  }
+
+  stack_.push(WalkState<T>(re, top_arg));
+
+  WalkState<T>* s;
+  for (;;) {
+    T t;
+    s = &stack_.top();
+    re = s->re;
+    switch (s->n) {
+      case -1: {
+        if (--max_visits_ < 0) {
+          stopped_early_ = true;
+          t = ShortVisit(re, s->parent_arg);
+          break;
+        }
+        bool stop = false;
+        s->pre_arg = PreVisit(re, s->parent_arg, &stop);
+        if (stop) {
+          t = s->pre_arg;
+          break;
+        }
+        s->n = 0;
+        s->child_args = NULL;
+        if (re->nsub_ == 1)
+          s->child_args = &s->child_arg;
+        else if (re->nsub_ > 1)
+          s->child_args = new T[re->nsub_];
+        FALLTHROUGH_INTENDED;
+      }
+      default: {
+        if (re->nsub_ > 0) {
+          Regexp** sub = re->sub();
+          if (s->n < re->nsub_) {
+            if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
+              s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
+              s->n++;
+            } else {
+              stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
+            }
+            continue;
+          }
+        }
+
+        t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
+        if (re->nsub_ > 1)
+          delete[] s->child_args;
+        break;
+      }
+    }
+
+    // We've finished stack_.top().
+    // Update next guy down.
+    stack_.pop();
+    if (stack_.empty())
+      return t;
+    s = &stack_.top();
+    if (s->child_args != NULL)
+      s->child_args[s->n] = t;
+    else
+      s->child_arg = t;
+    s->n++;
+  }
+}
+
+template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
+  // Without the exponential walking behavior,
+  // this budget should be more than enough for any
+  // regexp, and yet not enough to get us in trouble
+  // as far as CPU time.
+  max_visits_ = 1000000;
+  return WalkInternal(re, top_arg, true);
+}
+
+template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
+                                                          int max_visits) {
+  max_visits_ = max_visits;
+  return WalkInternal(re, top_arg, false);
+}
+
+}  // namespace re2
+
+#endif  // RE2_WALKER_INL_H_
--- a/external/duckdb/third_party/re2/util/logging.h
+++ b/external/duckdb/third_party/re2/util/logging.h
@@ -0,0 +1,111 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_LOGGING_H_
+#define UTIL_LOGGING_H_
+
+// Simplified version of Google's logging.
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ostream>
+#include <sstream>
+#include <stdexcept>
+
+#include "util/util.h"
+
+// Debug-only checking.
+#define DCHECK(condition) assert(condition)
+#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
+#define DCHECK_NE(val1, val2) assert((val1) != (val2))
+#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
+#define DCHECK_LT(val1, val2) assert((val1) < (val2))
+#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
+#define DCHECK_GT(val1, val2) assert((val1) > (val2))
+
+// Always-on checking
+#define CHECK(x)	if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
+#define CHECK_LT(x, y)	CHECK((x) < (y))
+#define CHECK_GT(x, y)	CHECK((x) > (y))
+#define CHECK_LE(x, y)	CHECK((x) <= (y))
+#define CHECK_GE(x, y)	CHECK((x) >= (y))
+#define CHECK_EQ(x, y)	CHECK((x) == (y))
+#define CHECK_NE(x, y)	CHECK((x) != (y))
+
+#define RE2_LOG_INFO LogMessage(__FILE__, __LINE__)
+#define RE2_LOG_WARNING LogMessage(__FILE__, __LINE__)
+#define RE2_LOG_ERROR LogMessage(__FILE__, __LINE__)
+#define RE2_LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
+#define RE2_LOG_QFATAL RE2_LOG_FATAL
+
+// It seems that one of the Windows header files defines ERROR as 0.
+#ifdef _WIN32
+#define LOG_0 RE2_LOG_INFO
+#endif
+
+#ifdef NDEBUG
+#define RE2_LOG_DFATAL RE2_LOG_ERROR
+#else
+#define RE2_LOG_DFATAL RE2_LOG_FATAL
+#endif
+
+#define LOG(severity) RE2_LOG_ ## severity.stream()
+
+#define VLOG(x) if((x)>0){}else RE2_LOG_INFO.stream()
+
+class LogMessage {
+ public:
+  LogMessage(const char* file, int line)
+      : flushed_(false) {
+//    stream() << file << ":" << line << ": ";
+  }
+  void Flush() {
+//    stream() << "\n";
+//    std::string s = str_.str();
+//    size_t n = s.size();
+//    if (fwrite(s.data(), 1, n, stderr) < n) {}  // shut up gcc
+//    flushed_ = true;
+  }
+  ~LogMessage() {
+    if (!flushed_) {
+      Flush();
+    }
+  }
+  std::ostream& stream() { return str_; }
+
+ private:
+  bool flushed_;
+  std::ostringstream str_;
+
+  LogMessage(const LogMessage&) = delete;
+  LogMessage& operator=(const LogMessage&) = delete;
+};
+
+// Silence "destructor never returns" warning for ~LogMessageFatal().
+// Since this is a header file, push and then pop to limit the scope.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4722)
+#endif
+
+class LogMessageFatal : public LogMessage {
+ public:
+  LogMessageFatal(const char* file, int line)
+      : LogMessage(file, line) {
+	  throw std::runtime_error("RE2 Fatal Error");
+  }
+  ~LogMessageFatal() {
+    Flush();
+  }
+ private:
+  LogMessageFatal(const LogMessageFatal&) = delete;
+  LogMessageFatal& operator=(const LogMessageFatal&) = delete;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif  // UTIL_LOGGING_H_
--- a/external/duckdb/third_party/re2/util/mix.h
+++ b/external/duckdb/third_party/re2/util/mix.h
@@ -0,0 +1,41 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MIX_H_
+#define UTIL_MIX_H_
+
+#include <stddef.h>
+#include <limits>
+
+namespace duckdb_re2 {
+
+// Silence "truncation of constant value" warning for kMul in 32-bit mode.
+// Since this is a header file, push and then pop to limit the scope.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4309)
+#endif
+
+class HashMix {
+ public:
+  HashMix() : hash_(1) {}
+  explicit HashMix(size_t val) : hash_(val + 83) {}
+  void Mix(size_t val) {
+    static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL);
+    hash_ *= kMul;
+    hash_ = ((hash_ << 19) |
+             (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val;
+  }
+  size_t get() const { return hash_; }
+ private:
+  size_t hash_;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+}  // namespace re2
+
+#endif  // UTIL_MIX_H_
--- a/external/duckdb/third_party/re2/util/mutex.h
+++ b/external/duckdb/third_party/re2/util/mutex.h
@@ -0,0 +1,165 @@
+// Copyright 2007 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MUTEX_H_
+#define UTIL_MUTEX_H_
+
+/*
+ * A simple mutex wrapper, supporting locks and read-write locks.
+ * You should assume the locks are *not* re-entrant.
+ */
+
+#ifdef RE2_NO_THREADS
+#include <assert.h>
+#define MUTEX_IS_LOCK_COUNTER
+#else
+#ifdef _WIN32
+// Requires Windows Vista or Windows Server 2008 at minimum.
+#include <windows.h>
+#if defined(WINVER) && WINVER >= 0x0600
+#define MUTEX_IS_WIN32_SRWLOCK
+#endif
+#else
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif
+#include <unistd.h>
+#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0
+#define MUTEX_IS_PTHREAD_RWLOCK
+#endif
+#endif
+#endif
+
+#if defined(MUTEX_IS_LOCK_COUNTER)
+typedef int MutexType;
+#elif defined(MUTEX_IS_WIN32_SRWLOCK)
+typedef SRWLOCK MutexType;
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+#include <stdexcept>
+#include <pthread.h>
+#include <stdlib.h>
+typedef pthread_rwlock_t MutexType;
+#else
+#include <shared_mutex>
+typedef std::shared_mutex MutexType;
+#endif
+
+namespace duckdb_re2 {
+
+class Mutex {
+ public:
+  inline Mutex();
+  inline ~Mutex();
+  inline void Lock();    // Block if needed until free then acquire exclusively
+  inline void Unlock();  // Release a lock acquired via Lock()
+  // Note that on systems that don't support read-write locks, these may
+  // be implemented as synonyms to Lock() and Unlock().  So you can use
+  // these for efficiency, but don't use them anyplace where being able
+  // to do shared reads is necessary to avoid deadlock.
+  inline void ReaderLock();   // Block until free or shared then acquire a share
+  inline void ReaderUnlock(); // Release a read share of this Mutex
+  inline void WriterLock() { Lock(); }     // Acquire an exclusive lock
+  inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
+
+ private:
+  MutexType mutex_;
+
+  // Catch the error of writing Mutex when intending MutexLock.
+  Mutex(Mutex *ignored);
+
+  Mutex(const Mutex&) = delete;
+  Mutex& operator=(const Mutex&) = delete;
+};
+
+#if defined(MUTEX_IS_LOCK_COUNTER)
+
+Mutex::Mutex()             : mutex_(0) { }
+Mutex::~Mutex()            { assert(mutex_ == 0); }
+void Mutex::Lock()         { assert(--mutex_ == -1); }
+void Mutex::Unlock()       { assert(mutex_++ == -1); }
+void Mutex::ReaderLock()   { assert(++mutex_ > 0); }
+void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
+
+#elif defined(MUTEX_IS_WIN32_SRWLOCK)
+
+Mutex::Mutex()             : mutex_(SRWLOCK_INIT) { }
+Mutex::~Mutex()            { }
+void Mutex::Lock()         { AcquireSRWLockExclusive(&mutex_); }
+void Mutex::Unlock()       { ReleaseSRWLockExclusive(&mutex_); }
+void Mutex::ReaderLock()   { AcquireSRWLockShared(&mutex_); }
+void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
+
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+
+#define SAFE_PTHREAD(fncall)    \
+  do {                          \
+    if ((fncall) != 0) throw std::runtime_error("RE2 pthread failure"); \
+  } while (0);
+
+Mutex::Mutex()             { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
+Mutex::~Mutex()            { pthread_rwlock_destroy(&mutex_); }
+void Mutex::Lock()         { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
+void Mutex::Unlock()       { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
+void Mutex::ReaderLock()   { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
+void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
+
+#undef SAFE_PTHREAD
+
+#else
+
+Mutex::Mutex()             { }
+Mutex::~Mutex()            { }
+void Mutex::Lock()         { mutex_.lock(); }
+void Mutex::Unlock()       { mutex_.unlock(); }
+void Mutex::ReaderLock()   { mutex_.lock_shared(); }
+void Mutex::ReaderUnlock() { mutex_.unlock_shared(); }
+
+#endif
+
+// --------------------------------------------------------------------------
+// Some helper classes
+
+// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
+class MutexLock {
+ public:
+  explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
+  ~MutexLock() { mu_->Unlock(); }
+ private:
+  Mutex * const mu_;
+
+  MutexLock(const MutexLock&) = delete;
+  MutexLock& operator=(const MutexLock&) = delete;
+};
+
+// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
+class ReaderMutexLock {
+ public:
+  explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
+  ~ReaderMutexLock() { mu_->ReaderUnlock(); }
+ private:
+  Mutex * const mu_;
+
+  ReaderMutexLock(const ReaderMutexLock&) = delete;
+  ReaderMutexLock& operator=(const ReaderMutexLock&) = delete;
+};
+
+class WriterMutexLock {
+ public:
+  explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
+  ~WriterMutexLock() { mu_->WriterUnlock(); }
+ private:
+  Mutex * const mu_;
+
+  WriterMutexLock(const WriterMutexLock&) = delete;
+  WriterMutexLock& operator=(const WriterMutexLock&) = delete;
+};
+
+// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
+#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name")
+#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name")
+#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name")
+
+}  // namespace re2
+
+#endif  // UTIL_MUTEX_H_
--- a/external/duckdb/third_party/re2/util/rune.cc
+++ b/external/duckdb/third_party/re2/util/rune.cc
@@ -0,0 +1,260 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+
+#include <stdarg.h>
+#include <string.h>
+
+#include "util/utf.h"
+
+namespace duckdb_re2 {
+
+enum
+{
+	Bit1	= 7,
+	Bitx	= 6,
+	Bit2	= 5,
+	Bit3	= 4,
+	Bit4	= 3,
+	Bit5	= 2, 
+
+	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
+	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
+	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
+	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
+	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
+	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
+
+	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
+	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
+	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
+	Rune4	= (1<<(Bit4+3*Bitx))-1,
+                                        /* 0001 1111 1111 1111 1111 1111 */
+
+	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
+	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
+
+	Bad	= Runeerror,
+};
+
+int
+chartorune(Rune *rune, const char *str)
+{
+	int c, c1, c2, c3;
+	Rune l;
+
+	/*
+	 * one character sequence
+	 *	00000-0007F => T1
+	 */
+	c = *(unsigned char*)str;
+	if(c < Tx) {
+		*rune = c;
+		return 1;
+	}
+
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	c1 = *(unsigned char*)(str+1) ^ Tx;
+	if(c1 & Testx)
+		goto bad;
+	if(c < T3) {
+		if(c < T2)
+			goto bad;
+		l = ((c << Bitx) | c1) & Rune2;
+		if(l <= Rune1)
+			goto bad;
+		*rune = l;
+		return 2;
+	}
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	c2 = *(unsigned char*)(str+2) ^ Tx;
+	if(c2 & Testx)
+		goto bad;
+	if(c < T4) {
+		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+		if(l <= Rune2)
+			goto bad;
+		*rune = l;
+		return 3;
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	c3 = *(unsigned char*)(str+3) ^ Tx;
+	if (c3 & Testx)
+		goto bad;
+	if (c < T5) {
+		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+		if (l <= Rune3)
+			goto bad;
+		*rune = l;
+		return 4;
+	}
+
+	/*
+	 * Support for 5-byte or longer UTF-8 would go here, but
+	 * since we don't have that, we'll just fall through to bad.
+	 */
+
+	/*
+	 * bad decoding
+	 */
+bad:
+	*rune = Bad;
+	return 1;
+}
+
+int
+runetochar(char *str, const Rune *rune)
+{
+	/* Runes are signed, so convert to unsigned for range check. */
+	unsigned int c;
+
+	/*
+	 * one character sequence
+	 *	00000-0007F => 00-7F
+	 */
+	c = *rune;
+	if(c <= Rune1) {
+		str[0] = static_cast<char>(c);
+		return 1;
+	}
+
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	if(c <= Rune2) {
+		str[0] = T2 | static_cast<char>(c >> 1*Bitx);
+		str[1] = Tx | (c & Maskx);
+		return 2;
+	}
+
+	/*
+	 * If the Rune is out of range, convert it to the error rune.
+	 * Do this test here because the error rune encodes to three bytes.
+	 * Doing it earlier would duplicate work, since an out of range
+	 * Rune wouldn't have fit in one or two bytes.
+	 */
+	if (c > Runemax)
+		c = Runeerror;
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	if (c <= Rune3) {
+		str[0] = T3 | static_cast<char>(c >> 2*Bitx);
+		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+		str[2] = Tx | (c & Maskx);
+		return 3;
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *     10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	str[0] = T4 | static_cast<char>(c >> 3*Bitx);
+	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+	str[3] = Tx | (c & Maskx);
+	return 4;
+}
+
+int
+runelen(Rune rune)
+{
+	char str[10];
+
+	return runetochar(str, &rune);
+}
+
+int
+fullrune(const char *str, int n)
+{
+	if (n > 0) {
+		int c = *(unsigned char*)str;
+		if (c < Tx)
+			return 1;
+		if (n > 1) {
+			if (c < T3)
+				return 1;
+			if (n > 2) {
+				if (c < T4 || n > 3)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+int
+utflen(const char *s)
+{
+	int c;
+	int n;
+	Rune rune;
+
+	n = 0;
+	for(;;) {
+		c = *(unsigned char*)s;
+		if(c < Runeself) {
+			if(c == 0)
+				return n;
+			s++;
+		} else
+			s += chartorune(&rune, s);
+		n++;
+	}
+	return 0;
+}
+
+char*
+utfrune(const char *s, Rune c)
+{
+	int c1;
+	Rune r;
+	int n;
+
+	if(c < Runesync)		/* not part of utf sequence */
+		return strchr((char*)s, c);
+
+	for(;;) {
+		c1 = *(unsigned char*)s;
+		if(c1 < Runeself) {	/* one byte rune */
+			if(c1 == 0)
+				return 0;
+			if(c1 == c)
+				return (char*)s;
+			s++;
+			continue;
+		}
+		n = chartorune(&r, s);
+		if(r == c)
+			return (char*)s;
+		s += n;
+	}
+	return 0;
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/util/strutil.cc
+++ b/external/duckdb/third_party/re2/util/strutil.cc
@@ -0,0 +1,149 @@
+// Copyright 1999-2005 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "util/strutil.h"
+
+#ifdef _WIN32
+#define snprintf _snprintf
+#define vsnprintf _vsnprintf
+#endif
+
+namespace duckdb_re2 {
+
+// ----------------------------------------------------------------------
+// CEscapeString()
+//    Copies 'src' to 'dest', escaping dangerous characters using
+//    C-style escape sequences.  'src' and 'dest' should not overlap.
+//    Returns the number of bytes written to 'dest' (not including the \0)
+//    or (size_t)-1 if there was insufficient space.
+// ----------------------------------------------------------------------
+static size_t CEscapeString(const char* src, size_t src_len,
+                            char* dest, size_t dest_len) {
+  const char* src_end = src + src_len;
+  size_t used = 0;
+
+  for (; src < src_end; src++) {
+    if (dest_len - used < 2)   // space for two-character escape
+      return (size_t)-1;
+
+    unsigned char c = *src;
+    switch (c) {
+      case '\n': dest[used++] = '\\'; dest[used++] = 'n';  break;
+      case '\r': dest[used++] = '\\'; dest[used++] = 'r';  break;
+      case '\t': dest[used++] = '\\'; dest[used++] = 't';  break;
+      case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
+      case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
+      case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
+      default:
+        // Note that if we emit \xNN and the src character after that is a hex
+        // digit then that digit must be escaped too to prevent it being
+        // interpreted as part of the character code by C.
+        if (c < ' ' || c > '~') {
+          if (dest_len - used < 5)   // space for four-character escape + \0
+            return (size_t)-1;
+          snprintf(dest + used, 5, "\\%03o", c);
+          used += 4;
+        } else {
+          dest[used++] = c; break;
+        }
+    }
+  }
+
+  if (dest_len - used < 1)   // make sure that there is room for \0
+    return (size_t)-1;
+
+  dest[used] = '\0';   // doesn't count towards return value though
+  return used;
+}
+
+// ----------------------------------------------------------------------
+// CEscape()
+//    Copies 'src' to result, escaping dangerous characters using
+//    C-style escape sequences.  'src' and 'dest' should not overlap.
+// ----------------------------------------------------------------------
+std::string CEscape(const StringPiece& src) {
+  const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
+  char* dest = new char[dest_len];
+  const size_t used = CEscapeString(src.data(), src.size(),
+                                    dest, dest_len);
+  std::string s = std::string(dest, used);
+  delete[] dest;
+  return s;
+}
+
+void PrefixSuccessor(std::string* prefix) {
+  // We can increment the last character in the string and be done
+  // unless that character is 255, in which case we have to erase the
+  // last character and increment the previous character, unless that
+  // is 255, etc. If the string is empty or consists entirely of
+  // 255's, we just return the empty string.
+  while (!prefix->empty()) {
+    char& c = prefix->back();
+    if (c == '\xff') {  // char literal avoids signed/unsigned.
+      prefix->pop_back();
+    } else {
+      ++c;
+      break;
+    }
+  }
+}
+
+static void StringAppendV(std::string* dst, const char* format, va_list ap) {
+  // First try with a small fixed size buffer
+  char space[1024];
+
+  // It's possible for methods that use a va_list to invalidate
+  // the data in it upon use.  The fix is to make a copy
+  // of the structure before using it and use that copy instead.
+  va_list backup_ap;
+  va_copy(backup_ap, ap);
+  int result = vsnprintf(space, sizeof(space), format, backup_ap);
+  va_end(backup_ap);
+
+  if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) {
+    // It fit
+    dst->append(space, result);
+    return;
+  }
+
+  // Repeatedly increase buffer size until it fits
+  int length = sizeof(space);
+  while (true) {
+    if (result < 0) {
+      // Older behavior: just try doubling the buffer size
+      length *= 2;
+    } else {
+      // We need exactly "result+1" characters
+      length = result+1;
+    }
+    char* buf = new char[length];
+
+    // Restore the va_list before we use it again
+    va_copy(backup_ap, ap);
+    result = vsnprintf(buf, length, format, backup_ap);
+    va_end(backup_ap);
+
+    if ((result >= 0) && (result < length)) {
+      // It fit
+      dst->append(buf, result);
+      delete[] buf;
+      return;
+    }
+    delete[] buf;
+  }
+}
+
+std::string StringPrintf(const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  std::string result;
+  StringAppendV(&result, format, ap);
+  va_end(ap);
+  return result;
+}
+
+}  // namespace re2
--- a/external/duckdb/third_party/re2/util/strutil.h
+++ b/external/duckdb/third_party/re2/util/strutil.h
@@ -0,0 +1,21 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_STRUTIL_H_
+#define UTIL_STRUTIL_H_
+
+#include <string>
+
+#include "re2/stringpiece.h"
+#include "util/util.h"
+
+namespace duckdb_re2 {
+
+std::string CEscape(const StringPiece& src);
+void PrefixSuccessor(std::string* prefix);
+std::string StringPrintf(const char* format, ...);
+
+}  // namespace re2
+
+#endif  // UTIL_STRUTIL_H_
--- a/external/duckdb/third_party/re2/util/utf.h
+++ b/external/duckdb/third_party/re2/util/utf.h
@@ -0,0 +1,44 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ *
+ * This file and rune.cc have been converted to compile as C++ code
+ * in name space re2.
+ */
+
+#ifndef UTIL_UTF_H_
+#define UTIL_UTF_H_
+
+#include <stdint.h>
+
+namespace duckdb_re2 {
+
+typedef signed int Rune;	/* Code-point values in Unicode 4.0 are 21 bits wide.*/
+
+enum
+{
+  UTFmax	= 4,		/* maximum bytes per rune */
+  Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
+  Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
+  Runeerror	= 0xFFFD,	/* decoding error in UTF */
+  Runemax	= 0x10FFFF,	/* maximum rune value */
+};
+
+int runetochar(char* s, const Rune* r);
+int chartorune(Rune* r, const char* s);
+int fullrune(const char* s, int n);
+int utflen(const char* s);
+char* utfrune(const char*, Rune);
+
+}  // namespace re2
+
+#endif  // UTIL_UTF_H_
--- a/external/duckdb/third_party/re2/util/util.h
+++ b/external/duckdb/third_party/re2/util/util.h
@@ -0,0 +1,42 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_UTIL_H_
+#define UTIL_UTIL_H_
+
+#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
+
+#ifndef ATTRIBUTE_NORETURN
+#if defined(__GNUC__)
+#define ATTRIBUTE_NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define ATTRIBUTE_NORETURN __declspec(noreturn)
+#else
+#define ATTRIBUTE_NORETURN
+#endif
+#endif
+
+#ifndef ATTRIBUTE_UNUSED
+#if defined(__GNUC__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+#endif
+
+#ifndef FALLTHROUGH_INTENDED
+#if defined(__clang__)
+#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
+#elif defined(__GNUC__) && __GNUC__ >= 7
+#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
+#else
+#define FALLTHROUGH_INTENDED do {} while (0)
+#endif
+#endif
+
+#ifndef NO_THREAD_SAFETY_ANALYSIS
+#define NO_THREAD_SAFETY_ANALYSIS
+#endif
+
+#endif  // UTIL_UTIL_H_