llama : move end-user examples to tools directory (#13249)
* llama : move end-user examples to tools directory --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
parent
b34443923c
commit
1d36b3670b
213 changed files with 226 additions and 190 deletions
|
@ -1,77 +0,0 @@
|
|||
import pytest
|
||||
from utils import *
|
||||
|
||||
server = ServerPreset.tinyllama_infill()
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def create_server():
|
||||
global server
|
||||
server = ServerPreset.tinyllama_infill()
|
||||
|
||||
|
||||
def test_infill_without_input_extra():
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/infill", data={
|
||||
"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
||||
"prompt": " int n_threads = llama_",
|
||||
"input_suffix": "}\n",
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert match_regex("(Ann|small|shiny|Daddy)+", res.body["content"])
|
||||
|
||||
|
||||
def test_infill_with_input_extra():
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/infill", data={
|
||||
"input_extra": [{
|
||||
"filename": "llama.h",
|
||||
"text": "LLAMA_API int32_t llama_n_threads();\n"
|
||||
}],
|
||||
"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
||||
"prompt": " int n_threads = llama_",
|
||||
"input_suffix": "}\n",
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert match_regex("(Dad|excited|park)+", res.body["content"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_extra", [
|
||||
{},
|
||||
{"filename": "ok"},
|
||||
{"filename": 123},
|
||||
{"filename": 123, "text": "abc"},
|
||||
{"filename": 123, "text": 456},
|
||||
])
|
||||
def test_invalid_input_extra_req(input_extra):
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/infill", data={
|
||||
"input_extra": [input_extra],
|
||||
"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
||||
"prompt": " int n_threads = llama_",
|
||||
"input_suffix": "}\n",
|
||||
})
|
||||
assert res.status_code == 400
|
||||
assert "error" in res.body
|
||||
|
||||
|
||||
@pytest.mark.skipif(not is_slow_test_allowed(), reason="skipping slow test")
|
||||
def test_with_qwen_model():
|
||||
global server
|
||||
server.model_file = None
|
||||
server.model_hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-IQ3_XXS-GGUF"
|
||||
server.model_hf_file = "qwen2.5-coder-1.5b-iq3_xxs-imat.gguf"
|
||||
server.start(timeout_seconds=600)
|
||||
res = server.make_request("POST", "/infill", data={
|
||||
"input_extra": [{
|
||||
"filename": "llama.h",
|
||||
"text": "LLAMA_API int32_t llama_n_threads();\n"
|
||||
}],
|
||||
"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
||||
"prompt": " int n_threads = llama_",
|
||||
"input_suffix": "}\n",
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["content"] == "n_threads();\n printf(\"Number of threads: %d\\n\", n_threads);\n return 0;\n"
|
Loading…
Add table
Add a link
Reference in a new issue