35 lines
941 B
Makefile
35 lines
941 B
Makefile
set shell := ["bash", "-uc"]
|
|
|
|
validate:
|
|
uv run dataset/validate_schema.py
|
|
uv run dataset/score_data.py
|
|
for f in data/*.jsonl; do \
|
|
uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
|
|
done
|
|
|
|
score:
|
|
uv run dataset/score_data.py
|
|
|
|
schema:
|
|
uv run dataset/validate_schema.py
|
|
|
|
analyze:
|
|
for f in data/*.jsonl; do \
|
|
uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
|
|
done
|
|
|
|
prepare:
|
|
QMD_BASE_MODEL=Qwen/Qwen3-1.7B uv run dataset/prepare_data.py --seed 42
|
|
|
|
train-local:
|
|
just prepare
|
|
HF_TOKEN=${HF_TOKEN} uv run torchrun --standalone --nproc_per_node auto \
|
|
train.py sft --config configs/sft_local.yaml |& tee /tmp/qmd-sft-train.log
|
|
|
|
# Experimental GRPO training is in finetune/experiments/grpo and not part of
|
|
# the default pipeline.
|
|
#
|
|
# grpo-local:
|
|
# HF_TOKEN=${HF_TOKEN} uv run train.py grpo --config experiments/grpo/grpo.yaml |& tee /tmp/qmd-grpo-train.log
|
|
|