common
: add partial regex support (#12808)
* move string_find_partial_stop & string_ends_with to common * add common_regex (supports partial matches) Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update common/regex-partial.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update common/regex-partial.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update common/regex-partial.h Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * partial regex: add missing iterator end checks * string utils: use string_views * direct throw to avoid ggml.h include * regex-partial: replace missed ggml_asserts --------- Co-authored-by: ochafik <ochafik@google.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
f5170c1d7a
commit
3198405e98
8 changed files with 575 additions and 5 deletions
56
common/regex-partial.h
Normal file
56
common/regex-partial.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
#pragma once
|
||||
|
||||
#include <regex>
|
||||
#include <string>
|
||||
|
||||
enum common_regex_match_type {
|
||||
COMMON_REGEX_MATCH_TYPE_NONE,
|
||||
COMMON_REGEX_MATCH_TYPE_PARTIAL,
|
||||
COMMON_REGEX_MATCH_TYPE_FULL,
|
||||
};
|
||||
|
||||
struct common_string_range {
|
||||
size_t begin;
|
||||
size_t end;
|
||||
common_string_range(size_t begin, size_t end) : begin(begin), end(end) {
|
||||
if (begin > end) {
|
||||
throw std::runtime_error("Invalid range");
|
||||
}
|
||||
}
|
||||
// prevent default ctor
|
||||
common_string_range() = delete;
|
||||
bool empty() const {
|
||||
return begin == end;
|
||||
}
|
||||
bool operator==(const common_string_range & other) const {
|
||||
return begin == other.begin && end == other.end;
|
||||
}
|
||||
};
|
||||
|
||||
struct common_regex_match {
|
||||
common_regex_match_type type = COMMON_REGEX_MATCH_TYPE_NONE;
|
||||
std::vector<common_string_range> groups;
|
||||
|
||||
bool operator==(const common_regex_match & other) const {
|
||||
return type == other.type && groups == other.groups;
|
||||
}
|
||||
bool operator!=(const common_regex_match & other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
class common_regex {
|
||||
std::string pattern;
|
||||
std::regex rx;
|
||||
std::regex rx_reversed_partial;
|
||||
|
||||
public:
|
||||
explicit common_regex(const std::string & pattern);
|
||||
|
||||
common_regex_match search(const std::string & input, size_t pos, bool as_match = false) const;
|
||||
|
||||
const std::string & str() const { return pattern; }
|
||||
};
|
||||
|
||||
// For testing only (pretty print of failures).
|
||||
std::string regex_to_reversed_partial_regex(const std::string & pattern);
|
Loading…
Add table
Add a link
Reference in a new issue