chroma-markdown/vendor/github.com/dlclark/regexp2/regexp_test.go

916 lines
26 KiB
Go

package regexp2
import (
"reflect"
"strings"
"testing"
"time"
"github.com/dlclark/regexp2/syntax"
)
func TestBacktrack_CatastrophicTimeout(t *testing.T) {
r, err := Compile("(.+)*\\?", 0)
r.MatchTimeout = time.Millisecond * 1
t.Logf("code dump: %v", r.code.Dump())
m, err := r.FindStringMatch("Do you think you found the problem string!")
if err == nil {
t.Errorf("expected timeout err")
}
if m != nil {
t.Errorf("Expected no match")
}
}
func TestSetPrefix(t *testing.T) {
r := MustCompile(`^\s*-TEST`, 0)
if r.code.FcPrefix == nil {
t.Fatalf("Expected prefix set [-\\s] but was nil")
}
if r.code.FcPrefix.PrefixSet.String() != "[-\\s]" {
t.Fatalf("Expected prefix set [\\s-] but was %v", r.code.FcPrefix.PrefixSet.String())
}
}
func TestSetInCode(t *testing.T) {
r := MustCompile(`(?<body>\s*(?<name>.+))`, 0)
t.Logf("code dump: %v", r.code.Dump())
if want, got := 1, len(r.code.Sets); want != got {
t.Fatalf("r.code.Sets wanted %v, got %v", want, got)
}
if want, got := "[\\s]", r.code.Sets[0].String(); want != got {
t.Fatalf("first set wanted %v, got %v", want, got)
}
}
func TestRegexp_Basic(t *testing.T) {
r, err := Compile("test(?<named>ing)?", 0)
//t.Logf("code dump: %v", r.code.Dump())
if err != nil {
t.Errorf("unexpected compile err: %v", err)
}
m, err := r.FindStringMatch("this is a testing stuff")
if err != nil {
t.Errorf("unexpected match err: %v", err)
}
if m == nil {
t.Error("Nil match, expected success")
} else {
//t.Logf("Match: %v", m.dump())
}
}
// check all our functions and properties around basic capture groups and referential for Group 0
func TestCapture_Basic(t *testing.T) {
r := MustCompile(`.*\B(SUCCESS)\B.*`, 0)
m, err := r.FindStringMatch("adfadsfSUCCESSadsfadsf")
if err != nil {
t.Fatalf("Unexpected match error: %v", err)
}
if m == nil {
t.Fatalf("Should have matched")
}
if want, got := "adfadsfSUCCESSadsfadsf", m.String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := 0, m.Index; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := 22, m.Length; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := 1, len(m.Captures); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := m.String(), m.Captures[0].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := 0, m.Captures[0].Index; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := 22, m.Captures[0].Length; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
g := m.Groups()
if want, got := 2, len(g); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
// group 0 is always the match
if want, got := m.String(), g[0].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := 1, len(g[0].Captures); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
// group 0's capture is always the match
if want, got := m.Captures[0].String(), g[0].Captures[0].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
// group 1 is our first explicit group (unnamed)
if want, got := 7, g[1].Index; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := 7, g[1].Length; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := "SUCCESS", g[1].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
}
func TestEscapeUnescape_Basic(t *testing.T) {
s1 := "#$^*+(){}<>\\|. "
s2 := Escape(s1)
s3, err := Unescape(s2)
if err != nil {
t.Fatalf("Unexpected error during unescape: %v", err)
}
//confirm one way
if want, got := `\#\$\^\*\+\(\)\{\}<>\\\|\.\ `, s2; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
//confirm round-trip
if want, got := s1, s3; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
}
func TestGroups_Basic(t *testing.T) {
type d struct {
p string
s string
name []string
num []int
strs []string
}
data := []d{
d{"(?<first_name>\\S+)\\s(?<last_name>\\S+)", // example
"Ryan Byington",
[]string{"0", "first_name", "last_name"},
[]int{0, 1, 2},
[]string{"Ryan Byington", "Ryan", "Byington"}},
d{"((?<One>abc)\\d+)?(?<Two>xyz)(.*)", // example
"abc208923xyzanqnakl",
[]string{"0", "1", "2", "One", "Two"},
[]int{0, 1, 2, 3, 4},
[]string{"abc208923xyzanqnakl", "abc208923", "anqnakl", "abc", "xyz"}},
d{"((?<256>abc)\\d+)?(?<16>xyz)(.*)", // numeric names
"0272saasdabc8978xyz][]12_+-",
[]string{"0", "1", "2", "16", "256"},
[]int{0, 1, 2, 16, 256},
[]string{"abc8978xyz][]12_+-", "abc8978", "][]12_+-", "xyz", "abc"}},
d{"((?<4>abc)(?<digits>\\d+))?(?<2>xyz)(?<everything_else>.*)", // mix numeric and string names
"0272saasdabc8978xyz][]12_+-",
[]string{"0", "1", "2", "digits", "4", "everything_else"},
[]int{0, 1, 2, 3, 4, 5},
[]string{"abc8978xyz][]12_+-", "abc8978", "xyz", "8978", "abc", "][]12_+-"}},
d{"(?<first_name>\\S+)\\s(?<first_name>\\S+)", // dupe string names
"Ryan Byington",
[]string{"0", "first_name"},
[]int{0, 1},
[]string{"Ryan Byington", "Byington"}},
d{"(?<15>\\S+)\\s(?<15>\\S+)", // dupe numeric names
"Ryan Byington",
[]string{"0", "15"},
[]int{0, 15},
[]string{"Ryan Byington", "Byington"}},
// *** repeated from above, but with alt cap syntax ***
d{"(?'first_name'\\S+)\\s(?'last_name'\\S+)", //example
"Ryan Byington",
[]string{"0", "first_name", "last_name"},
[]int{0, 1, 2},
[]string{"Ryan Byington", "Ryan", "Byington"}},
d{"((?'One'abc)\\d+)?(?'Two'xyz)(.*)", // example
"abc208923xyzanqnakl",
[]string{"0", "1", "2", "One", "Two"},
[]int{0, 1, 2, 3, 4},
[]string{"abc208923xyzanqnakl", "abc208923", "anqnakl", "abc", "xyz"}},
d{"((?'256'abc)\\d+)?(?'16'xyz)(.*)", // numeric names
"0272saasdabc8978xyz][]12_+-",
[]string{"0", "1", "2", "16", "256"},
[]int{0, 1, 2, 16, 256},
[]string{"abc8978xyz][]12_+-", "abc8978", "][]12_+-", "xyz", "abc"}},
d{"((?'4'abc)(?'digits'\\d+))?(?'2'xyz)(?'everything_else'.*)", // mix numeric and string names
"0272saasdabc8978xyz][]12_+-",
[]string{"0", "1", "2", "digits", "4", "everything_else"},
[]int{0, 1, 2, 3, 4, 5},
[]string{"abc8978xyz][]12_+-", "abc8978", "xyz", "8978", "abc", "][]12_+-"}},
d{"(?'first_name'\\S+)\\s(?'first_name'\\S+)", // dupe string names
"Ryan Byington",
[]string{"0", "first_name"},
[]int{0, 1},
[]string{"Ryan Byington", "Byington"}},
d{"(?'15'\\S+)\\s(?'15'\\S+)", // dupe numeric names
"Ryan Byington",
[]string{"0", "15"},
[]int{0, 15},
[]string{"Ryan Byington", "Byington"}},
}
fatalf := func(re *Regexp, v d, format string, args ...interface{}) {
args = append(args, v, re.code.Dump())
t.Fatalf(format+" using test data: %#v\ndump:%v", args...)
}
validateGroupNamesNumbers := func(re *Regexp, v d) {
if len(v.name) != len(v.num) {
fatalf(re, v, "Invalid data, group name count and number count must match")
}
groupNames := re.GetGroupNames()
if !reflect.DeepEqual(groupNames, v.name) {
fatalf(re, v, "group names expected: %v, actual: %v", v.name, groupNames)
}
groupNums := re.GetGroupNumbers()
if !reflect.DeepEqual(groupNums, v.num) {
fatalf(re, v, "group numbers expected: %v, actual: %v", v.num, groupNums)
}
// make sure we can freely get names and numbers from eachother
for i := range groupNums {
if want, got := groupNums[i], re.GroupNumberFromName(groupNames[i]); want != got {
fatalf(re, v, "group num from name Wanted '%v'\nGot '%v'", want, got)
}
if want, got := groupNames[i], re.GroupNameFromNumber(groupNums[i]); want != got {
fatalf(re, v, "group name from num Wanted '%v'\nGot '%v'", want, got)
}
}
}
for _, v := range data {
// compile the regex
re := MustCompile(v.p, 0)
// validate our group name/num info before execute
validateGroupNamesNumbers(re, v)
m, err := re.FindStringMatch(v.s)
if err != nil {
fatalf(re, v, "Unexpected error in match: %v", err)
}
if m == nil {
fatalf(re, v, "Match is nil")
}
if want, got := len(v.strs), m.GroupCount(); want != got {
fatalf(re, v, "GroupCount() Wanted '%v'\nGot '%v'", want, got)
}
g := m.Groups()
if want, got := len(v.strs), len(g); want != got {
fatalf(re, v, "len(m.Groups()) Wanted '%v'\nGot '%v'", want, got)
}
// validate each group's value from the execute
for i := range v.name {
grp1 := m.GroupByName(v.name[i])
grp2 := m.GroupByNumber(v.num[i])
// should be identical reference
if grp1 != grp2 {
fatalf(re, v, "Expected GroupByName and GroupByNumber to return same result for %v, %v", v.name[i], v.num[i])
}
if want, got := v.strs[i], grp1.String(); want != got {
fatalf(re, v, "Value[%v] Wanted '%v'\nGot '%v'", i, want, got)
}
}
// validate our group name/num info after execute
validateGroupNamesNumbers(re, v)
}
}
func TestErr_GroupName(t *testing.T) {
// group 0 is off limits
if _, err := Compile("foo(?<0>bar)", 0); err == nil {
t.Fatalf("zero group, expected error during compile")
} else if want, got := "error parsing regexp: capture number cannot be zero in `foo(?<0>bar)`", err.Error(); want != got {
t.Fatalf("invalid error text, want '%v', got '%v'", want, got)
}
if _, err := Compile("foo(?'0'bar)", 0); err == nil {
t.Fatalf("zero group, expected error during compile")
} else if want, got := "error parsing regexp: capture number cannot be zero in `foo(?'0'bar)`", err.Error(); want != got {
t.Fatalf("invalid error text, want '%v', got '%v'", want, got)
}
// group tag can't start with a num
if _, err := Compile("foo(?<1bar>)", 0); err == nil {
t.Fatalf("invalid group name, expected error during compile")
} else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?<1bar>)`", err.Error(); want != got {
t.Fatalf("invalid error text, want '%v', got '%v'", want, got)
}
if _, err := Compile("foo(?'1bar')", 0); err == nil {
t.Fatalf("invalid group name, expected error during compile")
} else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?'1bar')`", err.Error(); want != got {
t.Fatalf("invalid error text, want '%v', got '%v'", want, got)
}
// missing closing group tag
if _, err := Compile("foo(?<bar)", 0); err == nil {
t.Fatalf("invalid group name, expected error during compile")
} else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?<bar)`", err.Error(); want != got {
t.Fatalf("invalid error text, want '%v', got '%v'", want, got)
}
if _, err := Compile("foo(?'bar)", 0); err == nil {
t.Fatalf("invalid group name, expected error during compile")
} else if want, got := "error parsing regexp: invalid group name: group names must begin with a word character and have a matching terminator in `foo(?'bar)`", err.Error(); want != got {
t.Fatalf("invalid error text, want '%v', got '%v'", want, got)
}
}
func TestConstantUneffected(t *testing.T) {
// had a bug where "constant" sets would get modified with alternations and be broken in memory until restart
// this meant that if you used a known-set (like \s) in a larger set it would "poison" \s for the process
re := MustCompile(`(\s|\*)test\s`, 0)
if want, got := 2, len(re.code.Sets); want != got {
t.Fatalf("wanted %v sets, got %v", want, got)
}
if want, got := "[\\*\\s]", re.code.Sets[0].String(); want != got {
t.Fatalf("wanted set 0 %v, got %v", want, got)
}
if want, got := "[\\s]", re.code.Sets[1].String(); want != got {
t.Fatalf("wanted set 1 %v, got %v", want, got)
}
}
func TestAlternationConstAndEscape(t *testing.T) {
re := MustCompile(`\:|\s`, 0)
if want, got := 1, len(re.code.Sets); want != got {
t.Fatalf("wanted %v sets, got %v", want, got)
}
if want, got := "[:\\s]", re.code.Sets[0].String(); want != got {
t.Fatalf("wanted set 0 %v, got %v", want, got)
}
}
func TestStartingCharsOptionalNegate(t *testing.T) {
// to maintain matching with the corefx we've made the negative char classes be negative and the
// categories they contain positive. This means they're not combinable or suitable for prefixes.
// In general this could be a fine thing since negatives are extremely wide groups and not
// missing much on prefix optimizations.
// the below expression *could* have a prefix of [\S\d] but
// this requires a change in charclass.go when setting
// NotSpaceClass = getCharSetFromCategoryString()
// to negate the individual categories rather than the CharSet itself
// this would deviate from corefx
re := MustCompile(`(^(\S{2} )?\S{2}(\d+|/) *\S{3}\S{3} ?\d{2,4}[A-Z] ?\d{2}[A-Z]{3}|(\S{2} )?\d{2,4})`, 0)
if re.code.FcPrefix != nil {
t.Fatalf("FcPrefix wanted nil, got %v", re.code.FcPrefix)
}
}
func TestParseNegativeDigit(t *testing.T) {
re := MustCompile(`\D`, 0)
if want, got := 1, len(re.code.Sets); want != got {
t.Fatalf("wanted %v sets, got %v", want, got)
}
if want, got := "[\\P{Nd}]", re.code.Sets[0].String(); want != got {
t.Fatalf("wanted set 0 %v, got %v", want, got)
}
}
func TestRunNegativeDigit(t *testing.T) {
re := MustCompile(`\D`, 0)
m, err := re.MatchString("this is a test")
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if !m {
t.Fatalf("Expected match")
}
}
func TestCancellingClasses(t *testing.T) {
// [\w\W\s] should become "." because it means "anything"
re := MustCompile(`[\w\W\s]`, 0)
if want, got := 1, len(re.code.Sets); want != got {
t.Fatalf("wanted %v sets, got %v", want, got)
}
if want, got := syntax.AnyClass().String(), re.code.Sets[0].String(); want != got {
t.Fatalf("wanted set 0 %v, got %v", want, got)
}
}
func TestConcatLoopCaptureSet(t *testing.T) {
//(A|B)*?CD different Concat/Loop/Capture/Set (had [A-Z] should be [AB])
// we were not copying the Sets in the prefix FC stack, so the underlying sets were unexpectedly mutating
// so set [AB] becomes [ABC] when we see the the static C in FC stack generation (which are the valid start chars),
// but that was mutating the tree node's original set [AB] because even though we copied the slie header,
// the two header's pointed to the same underlying byte array...which was mutated.
re := MustCompile(`(A|B)*CD`, 0)
if want, got := 1, len(re.code.Sets); want != got {
t.Fatalf("wanted %v sets, got %v", want, got)
}
if want, got := "[AB]", re.code.Sets[0].String(); want != got {
t.Fatalf("wanted set 0 %v, got %v", want, got)
}
}
func TestFirstcharsIgnoreCase(t *testing.T) {
//((?i)AB(?-i)C|D)E different Firstchars (had [da] should be [ad])
// we were not canonicalizing when converting the prefix set to lower case
// so our set's were potentially not searching properly
re := MustCompile(`((?i)AB(?-i)C|D)E`, 0)
if re.code.FcPrefix == nil {
t.Fatalf("wanted prefix, got nil")
}
if want, got := "[ad]", re.code.FcPrefix.PrefixSet.String(); want != got {
t.Fatalf("wanted prefix %v, got %v", want, got)
}
}
func TestRepeatingGroup(t *testing.T) {
re := MustCompile(`(data?)+`, 0)
m, err := re.FindStringMatch("datadat")
if err != nil {
t.Fatalf("Unexpected err: %v", err)
}
if m == nil {
t.Fatalf("Expected match")
}
g := m.GroupByNumber(1)
if g == nil {
t.Fatalf("Expected group")
}
if want, got := 2, len(g.Captures); want != got {
t.Fatalf("wanted cap count %v, got %v", want, got)
}
if want, got := g.Captures[1].String(), g.Capture.String(); want != got {
t.Fatalf("expected last capture of the group to be embedded")
}
if want, got := "data", g.Captures[0].String(); want != got {
t.Fatalf("expected cap 0 to be %v, got %v", want, got)
}
if want, got := "dat", g.Captures[1].String(); want != got {
t.Fatalf("expected cap 1 to be %v, got %v", want, got)
}
}
func TestFindNextMatch_Basic(t *testing.T) {
re := MustCompile(`(T|E)(?=h|E|S|$)`, 0)
m, err := re.FindStringMatch(`This is a TEST`)
if err != nil {
t.Fatalf("Unexpected err 0: %v", err)
}
if m == nil {
t.Fatalf("Expected match 0")
}
if want, got := 0, m.Index; want != got {
t.Fatalf("expected match 0 to start at %v, got %v", want, got)
}
m, err = re.FindNextMatch(m)
if err != nil {
t.Fatalf("Unexpected err 1: %v", err)
}
if m == nil {
t.Fatalf("Expected match 1")
}
if want, got := 10, m.Index; want != got {
t.Fatalf("expected match 1 to start at %v, got %v", want, got)
}
m, err = re.FindNextMatch(m)
if err != nil {
t.Fatalf("Unexpected err 2: %v", err)
}
if m == nil {
t.Fatalf("Expected match 2")
}
if want, got := 11, m.Index; want != got {
t.Fatalf("expected match 2 to start at %v, got %v", want, got)
}
m, err = re.FindNextMatch(m)
if err != nil {
t.Fatalf("Unexpected err 3: %v", err)
}
if m == nil {
t.Fatalf("Expected match 3")
}
if want, got := 13, m.Index; want != got {
t.Fatalf("expected match 3 to start at %v, got %v", want, got)
}
}
func TestUnicodeSupplementaryCharSetMatch(t *testing.T) {
//0x2070E 0x20731 𠜱 0x20779 𠝹
re := MustCompile("[𠜎-𠝹]", 0)
if m, err := re.MatchString("\u2070"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if m {
t.Fatalf("Unexpected match")
}
if m, err := re.MatchString("𠜱"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
}
func TestUnicodeSupplementaryCharInRange(t *testing.T) {
//0x2070E 0x20731 𠜱 0x20779 𠝹
re := MustCompile(".", 0)
if m, err := re.MatchString("\u2070"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
if m, err := re.MatchString("𠜱"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
}
func TestUnicodeScriptSets(t *testing.T) {
re := MustCompile(`\p{Katakana}+`, 0)
if m, err := re.MatchString("\u30A0\u30FF"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
}
func TestHexadecimalCurlyBraces(t *testing.T) {
re := MustCompile(`\x20`, 0)
if m, err := re.MatchString(" "); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
re = MustCompile(`\x{C4}`, 0)
if m, err := re.MatchString("Ä"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
re = MustCompile(`\x{0C5}`, 0)
if m, err := re.MatchString("Å"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
re = MustCompile(`\x{00C6}`, 0)
if m, err := re.MatchString("Æ"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
re = MustCompile(`\x{1FF}`, 0)
if m, err := re.MatchString("ǿ"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
re = MustCompile(`\x{02FF}`, 0)
if m, err := re.MatchString("˿"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
re = MustCompile(`\x{1392}`, 0)
if m, err := re.MatchString("᎒"); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
re = MustCompile(`\x{0010ffff}`, 0)
if m, err := re.MatchString(string(0x10ffff)); err != nil {
t.Fatalf("Unexpected err: %v", err)
} else if !m {
t.Fatalf("Expected match")
}
if _, err := Compile(`\x2R`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x0`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x{`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x{2`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x{2R`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x{2R}`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x{}`, 0); err == nil {
t.Fatalf("Expected error")
}
if _, err := Compile(`\x{10000`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x{1234`, 0); err == nil {
t.Fatal("Expected error")
}
if _, err := Compile(`\x{123456789}`, 0); err == nil {
t.Fatal("Expected error")
}
}
func TestEmptyCharClass(t *testing.T) {
if _, err := Compile("[]", 0); err == nil {
t.Fatal("Empty char class isn't valid outside of ECMAScript mode")
}
}
func TestECMAEmptyCharClass(t *testing.T) {
re := MustCompile("[]", ECMAScript)
if m, err := re.MatchString("a"); err != nil {
t.Fatal(err)
} else if m {
t.Fatal("Expected no match")
}
}
func TestDot(t *testing.T) {
re := MustCompile(".", 0)
if m, err := re.MatchString("\r"); err != nil {
t.Fatal(err)
} else if !m {
t.Fatal("Expected match")
}
}
func TestECMADot(t *testing.T) {
re := MustCompile(".", ECMAScript)
if m, err := re.MatchString("\r"); err != nil {
t.Fatal(err)
} else if m {
t.Fatal("Expected no match")
}
}
func TestDecimalLookahead(t *testing.T) {
re := MustCompile(`\1(A)`, 0)
m, err := re.FindStringMatch("AA")
if err != nil {
t.Fatal(err)
} else if m != nil {
t.Fatal("Expected no match")
}
}
func TestECMADecimalLookahead(t *testing.T) {
re := MustCompile(`\1(A)`, ECMAScript)
m, err := re.FindStringMatch("AA")
if err != nil {
t.Fatal(err)
}
if c := m.GroupCount(); c != 2 {
t.Fatalf("Group count !=2 (%d)", c)
}
if s := m.GroupByNumber(0).String(); s != "A" {
t.Fatalf("Group0 != 'A' ('%s')", s)
}
if s := m.GroupByNumber(1).String(); s != "A" {
t.Fatalf("Group1 != 'A' ('%s')", s)
}
}
func TestECMAOctal(t *testing.T) {
re := MustCompile(`\100`, ECMAScript)
if m, err := re.MatchString("@"); err != nil {
t.Fatal(err)
} else if !m {
t.Fatal("Expected match")
}
}
func TestNegateRange(t *testing.T) {
re := MustCompile(`[\D]`, 0)
if m, err := re.MatchString("A"); err != nil {
t.Fatal(err)
} else if !m {
t.Fatal("Expected match")
}
}
func TestECMANegateRange(t *testing.T) {
re := MustCompile(`[\D]`, ECMAScript)
if m, err := re.MatchString("A"); err != nil {
t.Fatal(err)
} else if !m {
t.Fatal("Expected match")
}
}
func TestThreeByteUnicode_InputOnly(t *testing.T) {
// confirm the bmprefix properly ignores 3-byte unicode in the input value
// this used to panic
re := MustCompile("高", 0)
if m, err := re.MatchString("📍Test高"); err != nil {
t.Fatal(err)
} else if !m {
t.Fatal("Expected match")
}
}
func TestMultibyteUnicode_MatchPartialPattern(t *testing.T) {
re := MustCompile("猟な", 0)
if m, err := re.MatchString("なあ🍺な"); err != nil {
t.Fatal(err)
} else if m {
t.Fatal("Expected no match")
}
}
func TestMultibyteUnicode_Match(t *testing.T) {
re := MustCompile("猟な", 0)
if m, err := re.MatchString("なあ🍺猟な"); err != nil {
t.Fatal(err)
} else if !m {
t.Fatal("Expected match")
}
}
func TestAlternationNamedOptions_Errors(t *testing.T) {
// all of these should give an error "error parsing regexp:"
data := []string{
"(?(?e))", "(?(?a)", "(?(?", "(?(", "?(a:b)", "?(a)", "?(a|b)", "?((a)", "?((a)a", "?((a)a|", "?((a)a|b",
"(?(?i))", "(?(?I))", "(?(?m))", "(?(?M))", "(?(?s))", "(?(?S))", "(?(?x))", "(?(?X))", "(?(?n))", "(?(?N))", " (?(?n))",
}
for _, p := range data {
re, err := Compile(p, 0)
if err == nil {
t.Fatal("Expected error, got nil")
}
if re != nil {
t.Fatal("Expected unparsed regexp, got non-nil")
}
if !strings.HasPrefix(err.Error(), "error parsing regexp: ") {
t.Fatalf("Wanted parse error, got '%v'", err)
}
}
}
func TestAlternationNamedOptions_Success(t *testing.T) {
data := []struct {
pattern string
input string
expectSuccess bool
matchVal string
}{
{"(?(cat)|dog)", "cat", true, ""},
{"(?(cat)|dog)", "catdog", true, ""},
{"(?(cat)dog1|dog2)", "catdog1", false, ""},
{"(?(cat)dog1|dog2)", "catdog2", true, "dog2"},
{"(?(cat)dog1|dog2)", "catdog1dog2", true, "dog2"},
{"(?(dog2))", "dog2", true, ""},
{"(?(cat)|dog)", "oof", false, ""},
{"(?(a:b))", "a", true, ""},
{"(?(a:))", "a", true, ""},
}
for _, p := range data {
re := MustCompile(p.pattern, 0)
m, err := re.FindStringMatch(p.input)
if err != nil {
t.Fatalf("Unexpected error during match: %v", err)
}
if want, got := p.expectSuccess, m != nil; want != got {
t.Fatalf("Success mismatch for %v, wanted %v, got %v", p.pattern, want, got)
}
if m != nil {
if want, got := p.matchVal, m.String(); want != got {
t.Fatalf("Match val mismatch for %v, wanted %v, got %v", p.pattern, want, got)
}
}
}
}
func TestAlternationConstruct_Matches(t *testing.T) {
re := MustCompile("(?(A)A123|C789)", 0)
m, err := re.FindStringMatch("A123 B456 C789")
if err != nil {
t.Fatalf("Unexpected err: %v", err)
}
if m == nil {
t.Fatal("Expected match, got nil")
}
if want, got := "A123", m.String(); want != got {
t.Fatalf("Wanted %v, got %v", want, got)
}
m, err = re.FindNextMatch(m)
if err != nil {
t.Fatalf("Unexpected err in second match: %v", err)
}
if m == nil {
t.Fatal("Expected second match, got nil")
}
if want, got := "C789", m.String(); want != got {
t.Fatalf("Wanted %v, got %v", want, got)
}
m, err = re.FindNextMatch(m)
if err != nil {
t.Fatalf("Unexpected err in third match: %v", err)
}
if m != nil {
t.Fatal("Did not expect third match")
}
}
func TestParserFuzzCrashes(t *testing.T) {
var crashes = []string{
"(?'-", "(\\c0)", "(\\00(?())", "[\\p{0}", "(\x00?.*.()?(()?)?)*.x\xcb?&(\\s\x80)", "\\p{0}", "[0-[\\p{0}",
}
for _, c := range crashes {
t.Log(c)
Compile(c, 0)
}
}
func TestParserFuzzHangs(t *testing.T) {
var hangs = []string{
"\r{865720113}z\xd5{\r{861o", "\r{915355}\r{9153}", "\r{525005}", "\x01{19765625}", "(\r{068828256})", "\r{677525005}",
}
for _, c := range hangs {
t.Log(c)
Compile(c, 0)
}
}
func BenchmarkParserPrefixLongLen(b *testing.B) {
re := MustCompile("\r{100001}T+", 0)
inp := strings.Repeat("testing", 10000) + strings.Repeat("\r", 100000) + "TTTT"
b.ResetTimer()
for i := 0; i < b.N; i++ {
if m, err := re.MatchString(inp); err != nil {
b.Fatalf("Unexpected err: %v", err)
} else if m {
b.Fatalf("Expected no match")
}
}
}
/*
func TestPcreStuff(t *testing.T) {
re := MustCompile(`(?(?=(a))a)`, Debug)
inp := unEscapeToMatch(`a`)
fmt.Printf("Inp %q\n", inp)
m, err := re.FindStringMatch(inp)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if m == nil {
t.Fatalf("Expected match")
}
fmt.Printf("Match %s\n", m.dump())
fmt.Printf("Text: %v\n", unEscapeGroup(m.String()))
}
*/
//(.*)(\d+) different FirstChars ([\x00-\t\v-\x08] OR [\x00-\t\v-\uffff\p{Nd}]