After that, simply run the pipeline and save the results.
import lighteval from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.vllm.vllm_model import VLLMModelConfig from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters from lighteval.utils.utils import EnvConfig from lighteval.utils.imports import is_accelerate_available if is_accelerate_available(): from datetime import timedelta from accelerate import Accelerator, InitProcessGroupKwargs accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) else: accelerator = None def main(): evaluation_tracker = EvaluationTracker( output_dir="./results", save_details=True, push_to_hub=True, hub_results_org="your user name", ) pipeline_params = PipelineParameters( launcher_type=ParallelismManager.ACCELERATE, env_config=EnvConfig(cache_dir="tmp/"), custom_task_directory=None, override_batch_size=1, max_samples=10 ) model_config = VLLMModelConfig( model_name="HuggingFaceH4/zephyr-7b-beta", dtype="float16", use_chat_template=True, ) task = "helm|mmlu|5|1" pipeline = Pipeline( tasks=task, pipeline_parameters=pipeline_params, evaluation_tracker=evaluation_tracker, model_config=model_config, ) pipeline.evaluate() pipeline.save_and_push_results() pipeline.show_results() if __name__ == "__main__": main()
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4