In the latest version of sglang, the response_format=None
will lead error.
python -m sglang.launch_server --model-path xxxx/Qwen/Qwen2.5-72B-Instruct --tp 8 --host 0.0.0.0 --port 8100 --served-model-name dsrdq32
After update sglang to v0.4.4, it can lead error like:
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/middleware/base.py", line 163, in call_next
raise app_exc
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/middleware/base.py", line 149, in coro
await self.app(scope, receive_or_disconnect, send_no_error)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in __call__
await self.app(scope, receive, send)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/routing.py", line 715, in __call__
await self.middleware_stack(scope, receive, send)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
return await dependant.call(**values)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/sglang/srt/entrypoints/http_server.py", line 495, in openai_v1_chat_completions
return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/sglang/srt/openai_api/adapter.py", line 1229, in v1_chat_completions
all_requests = [ChatCompletionRequest(**request_json)]
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/pydantic/main.py", line 214, in __init__
validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
pydantic_core._pydantic_core.ValidationError: 2 validation errors for ChatCompletionRequest
response_format.ResponseFormat
Input should be a valid dictionary or instance of ResponseFormat [type=model_type, input_value=None, input_type=NoneType]
For further information visit https://errors.pydantic.dev/2.10/v/model_type
response_format.StructuralTagResponseFormat
Input should be a valid dictionary or instance of StructuralTagResponseFormat [type=model_type, input_value=None, input_type=NoneType]
For further information visit https://errors.pydantic.dev/2.10/v/model_type
Reproduction
import openai
client = openai.Client()
response = client.chat.completions.create(
model="xxx",
messages=[
{"role": "user", "content": "List 3 countries and their capitals."},
],
temperature=0,
max_tokens=64,
response_format=None
)
print_highlight(response)
if we annotate the response_format or use response_format={"type": "json_object"},
it will be ok, maybe i will check the repo code to find if there change something.
python3 -m sglang.check_env
INFO 03-19 15:58:34 __init__.py:190] Automatically detected platform cuda.
/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/utils/cpp_extension.py:361: UserWarning:
!! WARNING !!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Your compiler (c++) is not compatible with the compiler Pytorch was
built with for this platform, which is g++ on linux. Please
use g++ to to compile your extension. Alternatively, you may
compile PyTorch from source using c++, and then you can also use
c++ to compile your extension.
See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help
with compiling PyTorch from source.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!! WARNING !!
warnings.warn(WRONG_COMPILER_WARNING.format(
/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1964: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation.
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
warnings.warn(
Traceback (most recent call last):
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2104, in _run_ninja_build
subprocess.run(
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/subprocess.py", line 526, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/sglang/check_env.py", line 306, in <module>
check_env()
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/sglang/check_env.py", line 285, in check_env
env_info.update(get_package_versions(PACKAGE_LIST))
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/sglang/check_env.py", line 62, in get_package_versions
module = importlib.import_module(package_name)
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 883, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/xgrammar/__init__.py", line 1, in <module>
from . import testing
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/xgrammar/testing.py", line 11, in <module>
from .matcher import GrammarMatcher, bitmask_dtype
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/xgrammar/matcher.py", line 13, in <module>
from .kernels import apply_token_bitmask_inplace_kernels
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/xgrammar/kernels/__init__.py", line 12, in <module>
from .apply_token_bitmask_inplace_cuda import apply_token_bitmask_inplace_cuda
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cuda.py", line 54, in <module>
_load_torch_ops()
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cuda.py", line 42, in _load_torch_ops
torch.utils.cpp_extension.load_inline(
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1646, in load_inline
return _jit_compile(
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1721, in _jit_compile
_write_ninja_file_and_build_library(
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1833, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2120, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'xgrammar': [1/3] c++ -MMD -MF main.o.d -DTORCH_EXTENSION_NAME=xgrammar -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/TH -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -Wno-switch-bool -c /data/home/fanliyang/.cache/torch_extensions/py310_cu124/xgrammar/main.cpp -o main.o
FAILED: main.o
c++ -MMD -MF main.o.d -DTORCH_EXTENSION_NAME=xgrammar -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/TH -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -Wno-switch-bool -c /data/home/fanliyang/.cache/torch_extensions/py310_cu124/xgrammar/main.cpp -o main.o
/bin/sh: 1: c++: not found
[2/3] /usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output cuda.cuda.o.d -DTORCH_EXTENSION_NAME=xgrammar -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/TH -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' -O3 -std=c++17 --threads 4 -use_fast_math -c /data/home/fanliyang/.cache/torch_extensions/py310_cu124/xgrammar/cuda.cu -o cuda.cuda.o
FAILED: cuda.cuda.o
/usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output cuda.cuda.o.d -DTORCH_EXTENSION_NAME=xgrammar -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/TH -isystem /data/home/fanliyang/miniforge3/envs/nlp310/lib/python3.10/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /data/home/fanliyang/miniforge3/envs/nlp310/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' -O3 -std=c++17 --threads 4 -use_fast_math -c /data/home/fanliyang/.cache/torch_extensions/py310_cu124/xgrammar/cuda.cu -o cuda.cuda.o
gcc: fatal error: cannot execute ‘cc1plus’: execvp: No such file or directory
compilation terminated.
nvcc fatal : Failed to preprocess host compiler properties.
ninja: build stopped: subcommand failed.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4