llama : fix whitespace escaping in tokenizer (#2724)

This commit is contained in:
goerch 2023-08-22 23:10:42 +02:00 committed by GitHub
parent c63bb1d16a
commit 46ef5b5fcf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 21 deletions

View file

@ -11,18 +11,11 @@
#include <locale>
static std::string escape_whitespace(const std::string& text) {
std::string result;
bool escaping = false;
result += "\xe2\x96\x81";
std::string result = "\xe2\x96\x81";
for (size_t offs = 0; offs < text.length(); ++offs) {
if (text[offs] == ' ') {
if (!escaping) {
result += "\xe2\x96\x81";
escaping = true;
}
}
else {
escaping = false;
result += "\xe2\x96\x81";
} else {
result += text[offs];
}
}