chroma-markdown/vendor/github.com/dlclark/regexp2/regexp_pcre_test.go

package regexp2

import (
	"bufio"
	"bytes"
	"fmt"
	"log"
	"os"
	"regexp"
	"strconv"
	"strings"
	"testing"
	"time"
)

// Process the file "testoutput1" from PCRE2 v10.21 (public domain)
var totalCount, failCount = 0, 0

func TestPcre_Basics(t *testing.T) {
	defer func() {
		if failCount > 0 {
			t.Logf("%v of %v patterns failed", failCount, totalCount)
		}
	}()
	// open our test patterns file and run through it
	// validating results as we go
	file, err := os.Open("testoutput1")
	if err != nil {
		log.Fatal(err)
	}
	defer file.Close()

	// the high level structure of the file:
	//		#comments - ignore only outside of the pattern
	//		pattern (could be multi-line, could be surrounded by "" or //) after the / there are the options some we understand, some we don't
	//		    test case
	//		 0: success case
	//		\= Expect no match (ignored)
	//		    another test case
	//		No Match
	//
	//		another pattern ...etc

	scanner := bufio.NewScanner(file)
	// main pattern loop
	for scanner.Scan() {
		// reading the file a line at a time
		line := scanner.Text()

		if trim := strings.TrimSpace(line); trim == "" || strings.HasPrefix(trim, "#") {
			// skip blanks and comments
			continue
		}

		patternStart := line[0]
		if patternStart != '/' && patternStart != '"' {
			// an error!  expected a pattern but we didn't understand what was in the file
			t.Fatalf("Unknown file format, expected line to start with '/' or '\"', line in: %v", line)
		}

		// start building our pattern, handling multi-line patterns
		pattern := line
		totalCount++

		// keep appending the lines to our pattern string until we
		// find our closing tag, don't allow the first char to match on the
		// line start, but subsequent lines could end on the first char
		allowFirst := false
		for !containsEnder(line, patternStart, allowFirst) {
			if !scanner.Scan() {
				// an error!  expected more pattern, but got eof
				t.Fatalf("Unknown file format, expected more pattern text, but got EOF, pattern so far: %v", pattern)
			}
			line = scanner.Text()
			pattern += fmt.Sprintf("\n%s", line)
			allowFirst = true
		}

		// we have our raw pattern! -- we need to convert this to a compiled regex
		re := compileRawPattern(t, pattern)

		var (
			capsIdx map[int]int
			m       *Match
			toMatch string
		)
		// now we need to parse the test cases if there are any
		// they start with 4 spaces -- if we don't get a 4-space start then
		// we're back out to our next pattern
		for scanner.Scan() {
			line = scanner.Text()

			// blank line is our separator for a new pattern
			if strings.TrimSpace(line) == "" {
				break
			}

			// could be either "    " or "\= Expect"
			if strings.HasPrefix(line, "\\= Expect") {
				continue
			} else if strings.HasPrefix(line, "    ") {
				// trim off leading spaces for our text to match
				toMatch = line[4:]
				// trim off trailing spaces too
				toMatch = strings.TrimRight(toMatch, " ")

				m = matchString(t, re, toMatch)

				capsIdx = make(map[int]int)
				continue
				//t.Fatalf("Expected match text to start with 4 spaces, instead got: '%v'", line)
			} else if strings.HasPrefix(line, "No match") {
				validateNoMatch(t, re, m)
				// no match means we're done
				continue
			} else if subs := matchGroup.FindStringSubmatch(line); len(subs) == 3 {
				gIdx, _ := strconv.Atoi(subs[1])
				if _, ok := capsIdx[gIdx]; !ok {
					capsIdx[gIdx] = 0
				}
				validateMatch(t, re, m, toMatch, subs[2], gIdx, capsIdx[gIdx])
				capsIdx[gIdx]++
				continue
			} else {
				// no match -- problem
				t.Fatalf("Unknown file format, expected match or match group but got '%v'", line)
			}
		}

	}

	if err := scanner.Err(); err != nil {
		log.Fatal(err)
	}
}

var matchGroup = regexp.MustCompile(`^\s*(\d+): (.*)`)

func problem(t *testing.T, input string, args ...interface{}) {
	failCount++
	t.Errorf(input, args...)
}

func validateNoMatch(t *testing.T, re *Regexp, m *Match) {
	if re == nil || m == nil {
		return
	}

	problem(t, "Expected no match for pattern '%v', but got '%v'", re.pattern, m.String())
}

func validateMatch(t *testing.T, re *Regexp, m *Match, toMatch, value string, idx, capIdx int) {
	if re == nil {
		// already error'd earlier up stream
		return
	}

	if m == nil {
		// we didn't match, but should have
		problem(t, "Expected match for pattern '%v' with input '%v', but got no match", re.pattern, toMatch)
		return
	}

	g := m.Groups()
	if len(g) <= idx {
		problem(t, "Expected group %v does not exist in pattern '%v' with input '%v'", idx, re.pattern, toMatch)
		return
	}

	if value == "<unset>" {
		// this means we shouldn't have a cap for this group
		if len(g[idx].Captures) > 0 {
			problem(t, "Expected no cap %v in group %v in pattern '%v' with input '%v'", g[idx].Captures[capIdx].String(), idx, re.pattern, toMatch)
		}

		return
	}

	if len(g[idx].Captures) <= capIdx {
		problem(t, "Expected cap %v does not exist in group %v in pattern '%v' with input '%v'", capIdx, idx, re.pattern, toMatch)
		return
	}

	escp := unEscapeGroup(g[idx].String())
	//escp := unEscapeGroup(g[idx].Captures[capIdx].String())
	if escp != value {
		problem(t, "Expected '%v' but got '%v' for cap %v, group %v for pattern '%v' with input '%v'", value, escp, capIdx, idx, re.pattern, toMatch)
		return
	}
}

func compileRawPattern(t *testing.T, pattern string) *Regexp {
	// check our end for RegexOptions -trim them off
	index := strings.LastIndexAny(pattern, "/\"")
	//
	// Append "= Debug" to compare details between corefx and regexp2 on the PCRE test suite
	//
	var opts RegexOptions

	if index+1 < len(pattern) {
		textOptions := pattern[index+1:]
		pattern = pattern[:index+1]
		// there are lots of complex options here
		for _, textOpt := range strings.Split(textOptions, ",") {
			switch textOpt {
			case "dupnames":
				// we don't know how to handle this...
			default:
				if strings.Contains(textOpt, "i") {
					opts |= IgnoreCase
				}
				if strings.Contains(textOpt, "s") {
					opts |= Singleline
				}
				if strings.Contains(textOpt, "m") {
					opts |= Multiline
				}
				if strings.Contains(textOpt, "x") {
					opts |= IgnorePatternWhitespace
				}
			}
		}

	}

	// trim off first and last char
	pattern = pattern[1 : len(pattern)-1]

	defer func() {
		if rec := recover(); rec != nil {
			problem(t, "PANIC in compiling \"%v\": %v", pattern, rec)
		}
	}()
	re, err := Compile(pattern, opts)
	if err != nil {
		problem(t, "Error parsing \"%v\": %v", pattern, err)
	}
	return re
}

func matchString(t *testing.T, re *Regexp, toMatch string) *Match {
	if re == nil {
		return nil
	}

	re.MatchTimeout = time.Second * 1

	escp := ""
	var err error
	if toMatch != "\\" {
		escp = unEscapeToMatch(toMatch)
	}
	m, err := re.FindStringMatch(escp)
	if err != nil {
		problem(t, "Error matching \"%v\" in pattern \"%v\": %v", toMatch, re.pattern, err)
	}
	return m
}

func containsEnder(line string, ender byte, allowFirst bool) bool {
	index := strings.LastIndexByte(line, ender)
	if index > 0 {
		return true
	} else if index == 0 && allowFirst {
		return true
	}
	return false
}

func unEscapeToMatch(line string) string {
	idx := strings.IndexRune(line, '\\')
	// no slashes means no unescape needed
	if idx == -1 {
		return line
	}

	buf := bytes.NewBufferString(line[:idx])
	// get the runes for the rest of the string -- we're going full parser scan on this

	inEscape := false
	// take any \'s and convert them
	for i := idx; i < len(line); i++ {
		ch := line[i]
		if ch == '\\' {
			if inEscape {
				buf.WriteByte(ch)
			}
			inEscape = !inEscape
			continue
		}
		if inEscape {
			switch ch {
			case 'x':
				buf.WriteByte(scanHex(line, &i))
			case 'a':
				buf.WriteByte(0x07)
			case 'b':
				buf.WriteByte('\b')
			case 'e':
				buf.WriteByte(0x1b)
			case 'f':
				buf.WriteByte('\f')
			case 'n':
				buf.WriteByte('\n')
			case 'r':
				buf.WriteByte('\r')
			case 't':
				buf.WriteByte('\t')
			case 'v':
				buf.WriteByte(0x0b)
			default:
				if ch >= '0' && ch <= '7' {
					buf.WriteByte(scanOctal(line, &i))
				} else {
					buf.WriteByte(ch)
					//panic(fmt.Sprintf("unexpected escape '%v' in %v", string(ch), line))
				}
			}
			inEscape = false
		} else {
			buf.WriteByte(ch)
		}
	}

	return buf.String()
}

func unEscapeGroup(val string) string {
	// use hex for chars 0x00-0x1f, 0x7f-0xff
	buf := &bytes.Buffer{}

	for i := 0; i < len(val); i++ {
		ch := val[i]
		if ch <= 0x1f || ch >= 0x7f {
			//write it as a \x00
			fmt.Fprintf(buf, "\\x%.2x", ch)
		} else {
			// write as-is
			buf.WriteByte(ch)
		}
	}

	return buf.String()
}

func scanHex(line string, idx *int) byte {
	if *idx >= len(line)-2 {
		panic(fmt.Sprintf("not enough hex chars in %v at %v", line, *idx))
	}
	(*idx)++
	d1 := hexDigit(line[*idx])
	(*idx)++
	d2 := hexDigit(line[*idx])
	if d1 < 0 || d2 < 0 {
		panic("bad hex chars")
	}

	return byte(d1*0x10 + d2)
}

// Returns n <= 0xF for a hex digit.
func hexDigit(ch byte) int {

	if d := uint(ch - '0'); d <= 9 {
		return int(d)
	}

	if d := uint(ch - 'a'); d <= 5 {
		return int(d + 0xa)
	}

	if d := uint(ch - 'A'); d <= 5 {
		return int(d + 0xa)
	}

	return -1
}

// Scans up to three octal digits (stops before exceeding 0377).
func scanOctal(line string, idx *int) byte {
	// Consume octal chars only up to 3 digits and value 0377

	// octals can be 3,2, or 1 digit
	c := 3

	if diff := len(line) - *idx; c > diff {
		c = diff
	}

	i := 0
	d := int(line[*idx] - '0')
	for c > 0 && d <= 7 {
		i *= 8
		i += d

		c--
		(*idx)++
		if *idx < len(line) {
			d = int(line[*idx] - '0')
		}
	}
	(*idx)--

	// Octal codes only go up to 255.  Any larger and the behavior that Perl follows
	// is simply to truncate the high bits.
	i &= 0xFF

	return byte(i)
}
Initial commit 2017-10-22 05:37:38 +02:00			`package regexp2`

			`import (`
			`"bufio"`
			`"bytes"`
			`"fmt"`
			`"log"`
			`"os"`
			`"regexp"`
			`"strconv"`
			`"strings"`
			`"testing"`
			`"time"`
			`)`

			`// Process the file "testoutput1" from PCRE2 v10.21 (public domain)`
			`var totalCount, failCount = 0, 0`

			`func TestPcre_Basics(t *testing.T) {`
			`defer func() {`
			`if failCount > 0 {`
			`t.Logf("%v of %v patterns failed", failCount, totalCount)`
			`}`
			`}()`
			`// open our test patterns file and run through it`
			`// validating results as we go`
			`file, err := os.Open("testoutput1")`
			`if err != nil {`
			`log.Fatal(err)`
			`}`
			`defer file.Close()`

			`// the high level structure of the file:`
			`// #comments - ignore only outside of the pattern`
			`// pattern (could be multi-line, could be surrounded by "" or //) after the / there are the options some we understand, some we don't`
			`// test case`
			`// 0: success case`
			`// \= Expect no match (ignored)`
			`// another test case`
			`// No Match`
			`//`
			`// another pattern ...etc`

			`scanner := bufio.NewScanner(file)`
			`// main pattern loop`
			`for scanner.Scan() {`
			`// reading the file a line at a time`
			`line := scanner.Text()`

			`if trim := strings.TrimSpace(line); trim == "" \|\| strings.HasPrefix(trim, "#") {`
			`// skip blanks and comments`
			`continue`
			`}`

			`patternStart := line[0]`
			`if patternStart != '/' && patternStart != '"' {`
			`// an error! expected a pattern but we didn't understand what was in the file`
			`t.Fatalf("Unknown file format, expected line to start with '/' or '\"', line in: %v", line)`
			`}`

			`// start building our pattern, handling multi-line patterns`
			`pattern := line`
			`totalCount++`

			`// keep appending the lines to our pattern string until we`
			`// find our closing tag, don't allow the first char to match on the`
			`// line start, but subsequent lines could end on the first char`
			`allowFirst := false`
			`for !containsEnder(line, patternStart, allowFirst) {`
			`if !scanner.Scan() {`
			`// an error! expected more pattern, but got eof`
			`t.Fatalf("Unknown file format, expected more pattern text, but got EOF, pattern so far: %v", pattern)`
			`}`
			`line = scanner.Text()`
			`pattern += fmt.Sprintf("\n%s", line)`
			`allowFirst = true`
			`}`

			`// we have our raw pattern! -- we need to convert this to a compiled regex`
			`re := compileRawPattern(t, pattern)`

			`var (`
			`capsIdx map[int]int`
			`m *Match`
			`toMatch string`
			`)`
			`// now we need to parse the test cases if there are any`
			`// they start with 4 spaces -- if we don't get a 4-space start then`
			`// we're back out to our next pattern`
			`for scanner.Scan() {`
			`line = scanner.Text()`

			`// blank line is our separator for a new pattern`
			`if strings.TrimSpace(line) == "" {`
			`break`
			`}`

			`// could be either " " or "\= Expect"`
			`if strings.HasPrefix(line, "\\= Expect") {`
			`continue`
			`} else if strings.HasPrefix(line, " ") {`
			`// trim off leading spaces for our text to match`
			`toMatch = line[4:]`
			`// trim off trailing spaces too`
			`toMatch = strings.TrimRight(toMatch, " ")`

			`m = matchString(t, re, toMatch)`

			`capsIdx = make(map[int]int)`
			`continue`
			`//t.Fatalf("Expected match text to start with 4 spaces, instead got: '%v'", line)`
			`} else if strings.HasPrefix(line, "No match") {`
			`validateNoMatch(t, re, m)`
			`// no match means we're done`
			`continue`
			`} else if subs := matchGroup.FindStringSubmatch(line); len(subs) == 3 {`
			`gIdx, _ := strconv.Atoi(subs[1])`
			`if _, ok := capsIdx[gIdx]; !ok {`
			`capsIdx[gIdx] = 0`
			`}`
			`validateMatch(t, re, m, toMatch, subs[2], gIdx, capsIdx[gIdx])`
			`capsIdx[gIdx]++`
			`continue`
			`} else {`
			`// no match -- problem`
			`t.Fatalf("Unknown file format, expected match or match group but got '%v'", line)`
			`}`
			`}`

			`}`

			`if err := scanner.Err(); err != nil {`
			`log.Fatal(err)`
			`}`
			`}`

			var matchGroup = regexp.MustCompile(`^\s(\d+): (.)`)

			`func problem(t *testing.T, input string, args ...interface{}) {`
			`failCount++`
			`t.Errorf(input, args...)`
			`}`

			`func validateNoMatch(t testing.T, re Regexp, m *Match) {`
			`if re == nil \|\| m == nil {`
			`return`
			`}`

			`problem(t, "Expected no match for pattern '%v', but got '%v'", re.pattern, m.String())`
			`}`

			`func validateMatch(t testing.T, re Regexp, m *Match, toMatch, value string, idx, capIdx int) {`
			`if re == nil {`
			`// already error'd earlier up stream`
			`return`
			`}`

			`if m == nil {`
			`// we didn't match, but should have`
			`problem(t, "Expected match for pattern '%v' with input '%v', but got no match", re.pattern, toMatch)`
			`return`
			`}`

			`g := m.Groups()`
			`if len(g) <= idx {`
			`problem(t, "Expected group %v does not exist in pattern '%v' with input '%v'", idx, re.pattern, toMatch)`
			`return`
			`}`

			`if value == "<unset>" {`
			`// this means we shouldn't have a cap for this group`
			`if len(g[idx].Captures) > 0 {`
			`problem(t, "Expected no cap %v in group %v in pattern '%v' with input '%v'", g[idx].Captures[capIdx].String(), idx, re.pattern, toMatch)`
			`}`

			`return`
			`}`

			`if len(g[idx].Captures) <= capIdx {`
			`problem(t, "Expected cap %v does not exist in group %v in pattern '%v' with input '%v'", capIdx, idx, re.pattern, toMatch)`
			`return`
			`}`

			`escp := unEscapeGroup(g[idx].String())`
			`//escp := unEscapeGroup(g[idx].Captures[capIdx].String())`
			`if escp != value {`
			`problem(t, "Expected '%v' but got '%v' for cap %v, group %v for pattern '%v' with input '%v'", value, escp, capIdx, idx, re.pattern, toMatch)`
			`return`
			`}`
			`}`

			`func compileRawPattern(t testing.T, pattern string) Regexp {`
			`// check our end for RegexOptions -trim them off`
			`index := strings.LastIndexAny(pattern, "/\"")`
			`//`
			`// Append "= Debug" to compare details between corefx and regexp2 on the PCRE test suite`
			`//`
			`var opts RegexOptions`

			`if index+1 < len(pattern) {`
			`textOptions := pattern[index+1:]`
			`pattern = pattern[:index+1]`
			`// there are lots of complex options here`
			`for _, textOpt := range strings.Split(textOptions, ",") {`
			`switch textOpt {`
			`case "dupnames":`
			`// we don't know how to handle this...`
			`default:`
			`if strings.Contains(textOpt, "i") {`
			`opts \|= IgnoreCase`
			`}`
			`if strings.Contains(textOpt, "s") {`
			`opts \|= Singleline`
			`}`
			`if strings.Contains(textOpt, "m") {`
			`opts \|= Multiline`
			`}`
			`if strings.Contains(textOpt, "x") {`
			`opts \|= IgnorePatternWhitespace`
			`}`
			`}`
			`}`

			`}`

			`// trim off first and last char`
			`pattern = pattern[1 : len(pattern)-1]`

			`defer func() {`
			`if rec := recover(); rec != nil {`
			`problem(t, "PANIC in compiling \"%v\": %v", pattern, rec)`
			`}`
			`}()`
			`re, err := Compile(pattern, opts)`
			`if err != nil {`
			`problem(t, "Error parsing \"%v\": %v", pattern, err)`
			`}`
			`return re`
			`}`

			`func matchString(t testing.T, re Regexp, toMatch string) *Match {`
			`if re == nil {`
			`return nil`
			`}`

			`re.MatchTimeout = time.Second * 1`

			`escp := ""`
			`var err error`
			`if toMatch != "\\" {`
			`escp = unEscapeToMatch(toMatch)`
			`}`
			`m, err := re.FindStringMatch(escp)`
			`if err != nil {`
			`problem(t, "Error matching \"%v\" in pattern \"%v\": %v", toMatch, re.pattern, err)`
			`}`
			`return m`
			`}`

			`func containsEnder(line string, ender byte, allowFirst bool) bool {`
			`index := strings.LastIndexByte(line, ender)`
			`if index > 0 {`
			`return true`
			`} else if index == 0 && allowFirst {`
			`return true`
			`}`
			`return false`
			`}`

			`func unEscapeToMatch(line string) string {`
			`idx := strings.IndexRune(line, '\\')`
			`// no slashes means no unescape needed`
			`if idx == -1 {`
			`return line`
			`}`

			`buf := bytes.NewBufferString(line[:idx])`
			`// get the runes for the rest of the string -- we're going full parser scan on this`

			`inEscape := false`
			`// take any \'s and convert them`
			`for i := idx; i < len(line); i++ {`
			`ch := line[i]`
			`if ch == '\\' {`
			`if inEscape {`
			`buf.WriteByte(ch)`
			`}`
			`inEscape = !inEscape`
			`continue`
			`}`
			`if inEscape {`
			`switch ch {`
			`case 'x':`
			`buf.WriteByte(scanHex(line, &i))`
			`case 'a':`
			`buf.WriteByte(0x07)`
			`case 'b':`
			`buf.WriteByte('\b')`
			`case 'e':`
			`buf.WriteByte(0x1b)`
			`case 'f':`
			`buf.WriteByte('\f')`
			`case 'n':`
			`buf.WriteByte('\n')`
			`case 'r':`
			`buf.WriteByte('\r')`
			`case 't':`
			`buf.WriteByte('\t')`
			`case 'v':`
			`buf.WriteByte(0x0b)`
			`default:`
			`if ch >= '0' && ch <= '7' {`
			`buf.WriteByte(scanOctal(line, &i))`
			`} else {`
			`buf.WriteByte(ch)`
			`//panic(fmt.Sprintf("unexpected escape '%v' in %v", string(ch), line))`
			`}`
			`}`
			`inEscape = false`
			`} else {`
			`buf.WriteByte(ch)`
			`}`
			`}`

			`return buf.String()`
			`}`

			`func unEscapeGroup(val string) string {`
			`// use hex for chars 0x00-0x1f, 0x7f-0xff`
			`buf := &bytes.Buffer{}`

			`for i := 0; i < len(val); i++ {`
			`ch := val[i]`
			`if ch <= 0x1f \|\| ch >= 0x7f {`
			`//write it as a \x00`
			`fmt.Fprintf(buf, "\\x%.2x", ch)`
			`} else {`
			`// write as-is`
			`buf.WriteByte(ch)`
			`}`
			`}`

			`return buf.String()`
			`}`

			`func scanHex(line string, idx *int) byte {`
			`if *idx >= len(line)-2 {`
			`panic(fmt.Sprintf("not enough hex chars in %v at %v", line, *idx))`
			`}`
			`(*idx)++`
			`d1 := hexDigit(line[*idx])`
			`(*idx)++`
			`d2 := hexDigit(line[*idx])`
			`if d1 < 0 \|\| d2 < 0 {`
			`panic("bad hex chars")`
			`}`

			`return byte(d1*0x10 + d2)`
			`}`

			`// Returns n <= 0xF for a hex digit.`
			`func hexDigit(ch byte) int {`

			`if d := uint(ch - '0'); d <= 9 {`
			`return int(d)`
			`}`

			`if d := uint(ch - 'a'); d <= 5 {`
			`return int(d + 0xa)`
			`}`

			`if d := uint(ch - 'A'); d <= 5 {`
			`return int(d + 0xa)`
			`}`

			`return -1`
			`}`

			`// Scans up to three octal digits (stops before exceeding 0377).`
			`func scanOctal(line string, idx *int) byte {`
			`// Consume octal chars only up to 3 digits and value 0377`

			`// octals can be 3,2, or 1 digit`
			`c := 3`

			`if diff := len(line) - *idx; c > diff {`
			`c = diff`
			`}`

			`i := 0`
			`d := int(line[*idx] - '0')`
			`for c > 0 && d <= 7 {`
			`i *= 8`
			`i += d`

			`c--`
			`(*idx)++`
			`if *idx < len(line) {`
			`d = int(line[*idx] - '0')`
			`}`
			`}`
			`(*idx)--`

			`// Octal codes only go up to 255. Any larger and the behavior that Perl follows`
			`// is simply to truncate the high bits.`
			`i &= 0xFF`

			`return byte(i)`
			`}`