feat: query_parser + stringcase.

2025-06-23 23:26:59 +08:00
parent de36ab695d
commit 55c80024c2
22 changed files with 1362 additions and 148 deletions
--- a/stringcase/split.go
+++ b/stringcase/split.go
@@ -0,0 +1,144 @@
+package stringcase
+
+import (
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type runeInfo struct {
+	r rune
+}
+
+// Checks whether or not the rune represented by rInfo is a digit.
+func (rInfo *runeInfo) isDigit() bool {
+	return unicode.IsDigit(rInfo.r)
+}
+
+// Checks whether or not the rune represented by rInfo is an uppercase rune.
+func (rInfo *runeInfo) isUppercase() bool {
+	return unicode.IsUpper(rInfo.r)
+}
+
+// A reader designed for reading "CamelCase" strings.
+type rdr struct {
+	input       string   // The data this reader operates on.
+	pos         int      // The position of this reader.
+	hasNextRune bool     // A flag indicating if there's a next rune.
+	rdRune      runeInfo // Information about the last rune that was read.
+	nxtRune     runeInfo // Information about the next rune that's about to be read.
+}
+
+// Read the next rune from r.
+func (r *rdr) readRune() {
+	r.rdRune = runeInfo{rune(r.input[r.pos])}
+	r.pos = r.pos + 1
+	r.hasNextRune = r.pos < len(r.input)
+
+	if r.hasNextRune {
+		r.nxtRune = runeInfo{rune(r.input[r.pos])}
+	}
+}
+
+// Undo the last rune from r.
+func (r *rdr) unreadRune() {
+	r.pos = r.pos - 1
+	r.nxtRune = r.rdRune
+	r.rdRune = runeInfo{rune(r.input[r.pos])}
+	r.hasNextRune = true // NOTE: An undo operation means that there will be always a next rune.
+}
+
+// Verify if the word that's currently read by r is a word that should NOT be split.
+// If noSplit contains a word that starts with the word that's currently read by r, this function returns true, false
+// otherwise.
+func (r *rdr) isNoSplitWord(sIdx int, noSplit []string) bool {
+	return ContainsFn(noSplit, r.input[sIdx:r.pos+1], func(got, want string) bool {
+		return strings.HasPrefix(got, want)
+	})
+}
+
+// Read the next part from r.
+// Each word in noSplit (if provided) is treated as a word that shouldn't be split.
+func (r *rdr) readNextPart(noSplit []string) string {
+	sIdx := r.pos
+
+	r.readRune()
+
+	if r.rdRune.isDigit() {
+		return r.readNumber(sIdx, noSplit)
+	}
+
+	return r.readWord(sIdx, noSplit)
+}
+
+// Read and return a number from r.
+func (r *rdr) readNumber(sIdx int, noSplit []string) string {
+	if r.hasNextRune && r.nxtRune.isDigit() {
+		for r.hasNextRune && (r.nxtRune.isDigit() || r.isNoSplitWord(sIdx, noSplit)) {
+			r.readRune()
+		}
+
+		return r.input[sIdx:r.pos]
+	}
+
+	return r.input[sIdx:r.pos]
+}
+
+// Read and return a word from r.
+func (r *rdr) readWord(sIdx int, noSplit []string) string {
+	if r.hasNextRune && r.nxtRune.isUppercase() {
+		for r.hasNextRune && (r.nxtRune.isUppercase() || r.isNoSplitWord(sIdx, noSplit)) {
+			r.readRune()
+		}
+
+		if r.hasNextRune && (!r.nxtRune.isUppercase() && !r.nxtRune.isDigit()) {
+			r.unreadRune()
+		}
+
+		return r.input[sIdx:r.pos]
+	}
+
+	for r.hasNextRune && (r.isNoSplitWord(sIdx, noSplit) || (!r.nxtRune.isUppercase() && !r.nxtRune.isDigit())) {
+		r.readRune()
+	}
+
+	return r.input[sIdx:r.pos]
+}
+
+// Split reads v treating it as a "CamelCase" and returns the different words.
+// If v isn't a valid UTF-8 string, or when v is an empty string, a slice with one element (v) is returned.
+// Each word in noSplit (if provided) is treated as a word that shouldn't be split.
+func Split(input string, noSplit ...string) []string {
+	if !utf8.ValidString(input) || len(input) == 0 {
+		return []string{input}
+	}
+
+	output := make([]string, 0)
+
+	inputs := SplitByNonAlphanumeric(input)
+	for _, v := range inputs {
+		v = strings.TrimSpace(v)
+		if v == "" {
+			continue
+		}
+		output = append(output, split(v, noSplit...)...)
+	}
+
+	return output
+}
+
+func split(input string, noSplit ...string) []string {
+	if !utf8.ValidString(input) || len(input) == 0 {
+		return []string{input}
+	}
+
+	vRdr := &rdr{input: input}
+	output := make([]string, 0)
+
+	for vRdr.pos < len(input) {
+		part := vRdr.readNextPart(noSplit)
+		output = append(output, part)
+	}
+
+	return output
+}